From 0e2b73a1361ee56e9b1fe9e1f3b681ff99a7d2ab Mon Sep 17 00:00:00 2001 From: jalil Date: Mon, 12 Aug 2024 10:21:52 +0200 Subject: [PATCH] reg2 modified --- .../celloracle/config.vsh.yaml | 0 .../{ => multi_omics}/celloracle/main.py | 0 .../{ => multi_omics}/celloracle/run.sh | 0 .../{ => multi_omics}/celloracle/script.py | 0 .../{ => multi_omics}/figr/config.novsh.yaml | 0 src/methods/{ => multi_omics}/figr/run.sh | 0 src/methods/{ => multi_omics}/figr/script.R | 0 src/methods/{ => multi_omics}/granie/script.R | 0 .../pycistopic/config.vsh.yaml | 0 .../{ => multi_omics}/pycistopic/script.py | 0 .../{ => multi_omics}/pycistopic/test.sh | 0 .../scenicplus/config.vsh.yaml | 0 .../{ => multi_omics}/scenicplus/script.py | 0 .../{ => multi_omics}/scenicplus/test.sh | 0 .../{ => multi_omics}/scglue/config.vsh.yaml | 0 src/methods/{ => multi_omics}/scglue/main.py | 0 src/methods/{ => multi_omics}/scglue/run.sh | 0 .../{ => multi_omics}/scglue/script.py | 0 .../{ => single_omics}/ennet/config.vsh.yaml | 0 src/methods/{ => single_omics}/ennet/script.R | 0 src/methods/{ => single_omics}/ennet/test.sh | 0 .../{ => single_omics}/genie3/config.vsh.yaml | 0 .../{ => single_omics}/genie3/script.py | 0 src/methods/{ => single_omics}/genie3/test.sh | 0 .../grnboost2/config.vsh.yaml | 0 .../{ => single_omics}/grnboost2/script.py | 0 .../{ => single_omics}/grnboost2/test.sh | 0 .../{ => single_omics}/pidc/config.vsh.yaml | 0 src/methods/{ => single_omics}/pidc/pidc.jl | 0 src/methods/{ => single_omics}/pidc/script.py | 0 src/methods/{ => single_omics}/pidc/test.sh | 0 .../{ => single_omics}/portia/config.vsh.yaml | 0 .../{ => single_omics}/portia/script.py | 0 src/methods/{ => single_omics}/portia/test.sh | 0 .../{ => single_omics}/ppcor/config.vsh.yaml | 0 src/methods/{ => single_omics}/ppcor/script.R | 0 src/methods/{ => single_omics}/ppcor/test.sh | 0 .../{ => single_omics}/scsgl/config.vsh.yaml | 0 .../{ => single_omics}/scsgl/script.py | 0 src/methods/{ => single_omics}/scsgl/test.sh | 0 .../tigress/config.vsh.yaml | 0 .../{ => single_omics}/tigress/script.R | 0 .../{ => single_omics}/tigress/test.sh | 0 src/metrics/regression_1/main.py | 4 ---- src/metrics/regression_1/script.py | 7 ------ src/metrics/regression_2/config.vsh.yaml | 3 +-- .../{config.vsh.yaml => config.novsh.yaml} | 0 .../consensus/create-consensus.sh | 5 ++++- src/metrics/regression_2/main.py | 20 ++++++++++++----- src/metrics/regression_2/script.py | 22 +++++++++++++++---- src/workflows/run_grn_evaluation/main.nf | 1 + 51 files changed, 38 insertions(+), 24 deletions(-) rename src/methods/{ => multi_omics}/celloracle/config.vsh.yaml (100%) rename src/methods/{ => multi_omics}/celloracle/main.py (100%) rename src/methods/{ => multi_omics}/celloracle/run.sh (100%) rename src/methods/{ => multi_omics}/celloracle/script.py (100%) rename src/methods/{ => multi_omics}/figr/config.novsh.yaml (100%) rename src/methods/{ => multi_omics}/figr/run.sh (100%) rename src/methods/{ => multi_omics}/figr/script.R (100%) rename src/methods/{ => multi_omics}/granie/script.R (100%) rename src/methods/{ => multi_omics}/pycistopic/config.vsh.yaml (100%) rename src/methods/{ => multi_omics}/pycistopic/script.py (100%) rename src/methods/{ => multi_omics}/pycistopic/test.sh (100%) rename src/methods/{ => multi_omics}/scenicplus/config.vsh.yaml (100%) rename src/methods/{ => multi_omics}/scenicplus/script.py (100%) rename src/methods/{ => multi_omics}/scenicplus/test.sh (100%) rename src/methods/{ => multi_omics}/scglue/config.vsh.yaml (100%) rename src/methods/{ => multi_omics}/scglue/main.py (100%) rename src/methods/{ => multi_omics}/scglue/run.sh (100%) rename src/methods/{ => multi_omics}/scglue/script.py (100%) rename src/methods/{ => single_omics}/ennet/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/ennet/script.R (100%) rename src/methods/{ => single_omics}/ennet/test.sh (100%) rename src/methods/{ => single_omics}/genie3/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/genie3/script.py (100%) rename src/methods/{ => single_omics}/genie3/test.sh (100%) rename src/methods/{ => single_omics}/grnboost2/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/grnboost2/script.py (100%) rename src/methods/{ => single_omics}/grnboost2/test.sh (100%) rename src/methods/{ => single_omics}/pidc/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/pidc/pidc.jl (100%) rename src/methods/{ => single_omics}/pidc/script.py (100%) rename src/methods/{ => single_omics}/pidc/test.sh (100%) rename src/methods/{ => single_omics}/portia/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/portia/script.py (100%) rename src/methods/{ => single_omics}/portia/test.sh (100%) rename src/methods/{ => single_omics}/ppcor/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/ppcor/script.R (100%) rename src/methods/{ => single_omics}/ppcor/test.sh (100%) rename src/methods/{ => single_omics}/scsgl/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/scsgl/script.py (100%) rename src/methods/{ => single_omics}/scsgl/test.sh (100%) rename src/methods/{ => single_omics}/tigress/config.vsh.yaml (100%) rename src/methods/{ => single_omics}/tigress/script.R (100%) rename src/methods/{ => single_omics}/tigress/test.sh (100%) rename src/metrics/regression_2/consensus/{config.vsh.yaml => config.novsh.yaml} (100%) diff --git a/src/methods/celloracle/config.vsh.yaml b/src/methods/multi_omics/celloracle/config.vsh.yaml similarity index 100% rename from src/methods/celloracle/config.vsh.yaml rename to src/methods/multi_omics/celloracle/config.vsh.yaml diff --git a/src/methods/celloracle/main.py b/src/methods/multi_omics/celloracle/main.py similarity index 100% rename from src/methods/celloracle/main.py rename to src/methods/multi_omics/celloracle/main.py diff --git a/src/methods/celloracle/run.sh b/src/methods/multi_omics/celloracle/run.sh similarity index 100% rename from src/methods/celloracle/run.sh rename to src/methods/multi_omics/celloracle/run.sh diff --git a/src/methods/celloracle/script.py b/src/methods/multi_omics/celloracle/script.py similarity index 100% rename from src/methods/celloracle/script.py rename to src/methods/multi_omics/celloracle/script.py diff --git a/src/methods/figr/config.novsh.yaml b/src/methods/multi_omics/figr/config.novsh.yaml similarity index 100% rename from src/methods/figr/config.novsh.yaml rename to src/methods/multi_omics/figr/config.novsh.yaml diff --git a/src/methods/figr/run.sh b/src/methods/multi_omics/figr/run.sh similarity index 100% rename from src/methods/figr/run.sh rename to src/methods/multi_omics/figr/run.sh diff --git a/src/methods/figr/script.R b/src/methods/multi_omics/figr/script.R similarity index 100% rename from src/methods/figr/script.R rename to src/methods/multi_omics/figr/script.R diff --git a/src/methods/granie/script.R b/src/methods/multi_omics/granie/script.R similarity index 100% rename from src/methods/granie/script.R rename to src/methods/multi_omics/granie/script.R diff --git a/src/methods/pycistopic/config.vsh.yaml b/src/methods/multi_omics/pycistopic/config.vsh.yaml similarity index 100% rename from src/methods/pycistopic/config.vsh.yaml rename to src/methods/multi_omics/pycistopic/config.vsh.yaml diff --git a/src/methods/pycistopic/script.py b/src/methods/multi_omics/pycistopic/script.py similarity index 100% rename from src/methods/pycistopic/script.py rename to src/methods/multi_omics/pycistopic/script.py diff --git a/src/methods/pycistopic/test.sh b/src/methods/multi_omics/pycistopic/test.sh similarity index 100% rename from src/methods/pycistopic/test.sh rename to src/methods/multi_omics/pycistopic/test.sh diff --git a/src/methods/scenicplus/config.vsh.yaml b/src/methods/multi_omics/scenicplus/config.vsh.yaml similarity index 100% rename from src/methods/scenicplus/config.vsh.yaml rename to src/methods/multi_omics/scenicplus/config.vsh.yaml diff --git a/src/methods/scenicplus/script.py b/src/methods/multi_omics/scenicplus/script.py similarity index 100% rename from src/methods/scenicplus/script.py rename to src/methods/multi_omics/scenicplus/script.py diff --git a/src/methods/scenicplus/test.sh b/src/methods/multi_omics/scenicplus/test.sh similarity index 100% rename from src/methods/scenicplus/test.sh rename to src/methods/multi_omics/scenicplus/test.sh diff --git a/src/methods/scglue/config.vsh.yaml b/src/methods/multi_omics/scglue/config.vsh.yaml similarity index 100% rename from src/methods/scglue/config.vsh.yaml rename to src/methods/multi_omics/scglue/config.vsh.yaml diff --git a/src/methods/scglue/main.py b/src/methods/multi_omics/scglue/main.py similarity index 100% rename from src/methods/scglue/main.py rename to src/methods/multi_omics/scglue/main.py diff --git a/src/methods/scglue/run.sh b/src/methods/multi_omics/scglue/run.sh similarity index 100% rename from src/methods/scglue/run.sh rename to src/methods/multi_omics/scglue/run.sh diff --git a/src/methods/scglue/script.py b/src/methods/multi_omics/scglue/script.py similarity index 100% rename from src/methods/scglue/script.py rename to src/methods/multi_omics/scglue/script.py diff --git a/src/methods/ennet/config.vsh.yaml b/src/methods/single_omics/ennet/config.vsh.yaml similarity index 100% rename from src/methods/ennet/config.vsh.yaml rename to src/methods/single_omics/ennet/config.vsh.yaml diff --git a/src/methods/ennet/script.R b/src/methods/single_omics/ennet/script.R similarity index 100% rename from src/methods/ennet/script.R rename to src/methods/single_omics/ennet/script.R diff --git a/src/methods/ennet/test.sh b/src/methods/single_omics/ennet/test.sh similarity index 100% rename from src/methods/ennet/test.sh rename to src/methods/single_omics/ennet/test.sh diff --git a/src/methods/genie3/config.vsh.yaml b/src/methods/single_omics/genie3/config.vsh.yaml similarity index 100% rename from src/methods/genie3/config.vsh.yaml rename to src/methods/single_omics/genie3/config.vsh.yaml diff --git a/src/methods/genie3/script.py b/src/methods/single_omics/genie3/script.py similarity index 100% rename from src/methods/genie3/script.py rename to src/methods/single_omics/genie3/script.py diff --git a/src/methods/genie3/test.sh b/src/methods/single_omics/genie3/test.sh similarity index 100% rename from src/methods/genie3/test.sh rename to src/methods/single_omics/genie3/test.sh diff --git a/src/methods/grnboost2/config.vsh.yaml b/src/methods/single_omics/grnboost2/config.vsh.yaml similarity index 100% rename from src/methods/grnboost2/config.vsh.yaml rename to src/methods/single_omics/grnboost2/config.vsh.yaml diff --git a/src/methods/grnboost2/script.py b/src/methods/single_omics/grnboost2/script.py similarity index 100% rename from src/methods/grnboost2/script.py rename to src/methods/single_omics/grnboost2/script.py diff --git a/src/methods/grnboost2/test.sh b/src/methods/single_omics/grnboost2/test.sh similarity index 100% rename from src/methods/grnboost2/test.sh rename to src/methods/single_omics/grnboost2/test.sh diff --git a/src/methods/pidc/config.vsh.yaml b/src/methods/single_omics/pidc/config.vsh.yaml similarity index 100% rename from src/methods/pidc/config.vsh.yaml rename to src/methods/single_omics/pidc/config.vsh.yaml diff --git a/src/methods/pidc/pidc.jl b/src/methods/single_omics/pidc/pidc.jl similarity index 100% rename from src/methods/pidc/pidc.jl rename to src/methods/single_omics/pidc/pidc.jl diff --git a/src/methods/pidc/script.py b/src/methods/single_omics/pidc/script.py similarity index 100% rename from src/methods/pidc/script.py rename to src/methods/single_omics/pidc/script.py diff --git a/src/methods/pidc/test.sh b/src/methods/single_omics/pidc/test.sh similarity index 100% rename from src/methods/pidc/test.sh rename to src/methods/single_omics/pidc/test.sh diff --git a/src/methods/portia/config.vsh.yaml b/src/methods/single_omics/portia/config.vsh.yaml similarity index 100% rename from src/methods/portia/config.vsh.yaml rename to src/methods/single_omics/portia/config.vsh.yaml diff --git a/src/methods/portia/script.py b/src/methods/single_omics/portia/script.py similarity index 100% rename from src/methods/portia/script.py rename to src/methods/single_omics/portia/script.py diff --git a/src/methods/portia/test.sh b/src/methods/single_omics/portia/test.sh similarity index 100% rename from src/methods/portia/test.sh rename to src/methods/single_omics/portia/test.sh diff --git a/src/methods/ppcor/config.vsh.yaml b/src/methods/single_omics/ppcor/config.vsh.yaml similarity index 100% rename from src/methods/ppcor/config.vsh.yaml rename to src/methods/single_omics/ppcor/config.vsh.yaml diff --git a/src/methods/ppcor/script.R b/src/methods/single_omics/ppcor/script.R similarity index 100% rename from src/methods/ppcor/script.R rename to src/methods/single_omics/ppcor/script.R diff --git a/src/methods/ppcor/test.sh b/src/methods/single_omics/ppcor/test.sh similarity index 100% rename from src/methods/ppcor/test.sh rename to src/methods/single_omics/ppcor/test.sh diff --git a/src/methods/scsgl/config.vsh.yaml b/src/methods/single_omics/scsgl/config.vsh.yaml similarity index 100% rename from src/methods/scsgl/config.vsh.yaml rename to src/methods/single_omics/scsgl/config.vsh.yaml diff --git a/src/methods/scsgl/script.py b/src/methods/single_omics/scsgl/script.py similarity index 100% rename from src/methods/scsgl/script.py rename to src/methods/single_omics/scsgl/script.py diff --git a/src/methods/scsgl/test.sh b/src/methods/single_omics/scsgl/test.sh similarity index 100% rename from src/methods/scsgl/test.sh rename to src/methods/single_omics/scsgl/test.sh diff --git a/src/methods/tigress/config.vsh.yaml b/src/methods/single_omics/tigress/config.vsh.yaml similarity index 100% rename from src/methods/tigress/config.vsh.yaml rename to src/methods/single_omics/tigress/config.vsh.yaml diff --git a/src/methods/tigress/script.R b/src/methods/single_omics/tigress/script.R similarity index 100% rename from src/methods/tigress/script.R rename to src/methods/single_omics/tigress/script.R diff --git a/src/methods/tigress/test.sh b/src/methods/single_omics/tigress/test.sh similarity index 100% rename from src/methods/tigress/test.sh rename to src/methods/single_omics/tigress/test.sh diff --git a/src/metrics/regression_1/main.py b/src/metrics/regression_1/main.py index d188db3ef..378b41adc 100644 --- a/src/metrics/regression_1/main.py +++ b/src/metrics/regression_1/main.py @@ -136,10 +136,6 @@ def set_global_seed(seed): lightgbm.LGBMRegressor().set_params(random_state=seed) -def format_folder(work_dir, exclude_missing_genes, reg_type, theta, tf_n, norm_method, subsample=None): - return f'{work_dir}/benchmark/scores/subsample_{subsample}/exclude_missing_genes_{exclude_missing_genes}/{reg_type}/theta_{theta}_tf_n_{tf_n}/{norm_method}' - - def pivot_grn(net): ''' make net to have gene*tf format''' df_tmp = net.pivot(index='target', columns='source', values='weight') diff --git a/src/metrics/regression_1/script.py b/src/metrics/regression_1/script.py index 5fc3ad38d..1e387904f 100644 --- a/src/metrics/regression_1/script.py +++ b/src/metrics/regression_1/script.py @@ -29,10 +29,6 @@ metric_ids = output.columns.to_numpy() metric_values = output.values[0] -# if metric_ids.ndim == 1: -# metric_ids = metric_ids.reshape(1, -1) -# if metric_values.ndim == 1: -# metric_values = metric_values.reshape(1, -1) print(metric_ids.shape, metric_values.shape) output = ad.AnnData( @@ -45,7 +41,4 @@ } ) - -# print(output.uns) - output.write_h5ad(par["score"], compression="gzip") \ No newline at end of file diff --git a/src/metrics/regression_2/config.vsh.yaml b/src/metrics/regression_2/config.vsh.yaml index 4471c056e..62bae04ec 100644 --- a/src/metrics/regression_2/config.vsh.yaml +++ b/src/metrics/regression_2/config.vsh.yaml @@ -11,12 +11,11 @@ functionality: path: script.py - path: main.py arguments: - - name: --layer - type: string - name: --consensus type: file direction: input must_exist: true + default: 'resources/grn-benchmark/consensus-num-regulators.json' platforms: - type: docker image: ghcr.io/openproblems-bio/base_python:1.0.4 diff --git a/src/metrics/regression_2/consensus/config.vsh.yaml b/src/metrics/regression_2/consensus/config.novsh.yaml similarity index 100% rename from src/metrics/regression_2/consensus/config.vsh.yaml rename to src/metrics/regression_2/consensus/config.novsh.yaml diff --git a/src/metrics/regression_2/consensus/create-consensus.sh b/src/metrics/regression_2/consensus/create-consensus.sh index ef4e3c54c..0d3803a2e 100644 --- a/src/metrics/regression_2/consensus/create-consensus.sh +++ b/src/metrics/regression_2/consensus/create-consensus.sh @@ -1 +1,4 @@ -viash build src/metrics/regression_2/consensus/config.vsh.yaml --platform docker -o bin/regression_2/consensus && bin/regression_2/consensus/consensus_for_regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --output resources/grn-benchmark/consensus-num-regulators.json --grn_folder resources/grn-benchmark/grn_models/ --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv \ No newline at end of file +viash run src/metrics/regression_2/consensus/config.novsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ + --output resources/grn-benchmark/consensus-num-regulators.json \ + --grn_folder resources/grn_models/ \ + --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv \ No newline at end of file diff --git a/src/metrics/regression_2/main.py b/src/metrics/regression_2/main.py index 92b41385b..c569eafb1 100644 --- a/src/metrics/regression_2/main.py +++ b/src/metrics/regression_2/main.py @@ -70,7 +70,7 @@ def cross_validate_gene( if estimator_t == 'ridge': model = Ridge(random_state=random_state) elif estimator_t == 'GB': - model = lightgbm.LGBMRegressor(verbosity=-1, n_estimators=100, n_jobs=4, random_state=random_state) + model = lightgbm.LGBMRegressor(verbosity=-1, n_estimators=100, n_jobs=par["max_workers"], random_state=random_state) else: raise NotImplementedError(f'Unknown model "{estimator_t}"') @@ -209,6 +209,9 @@ def main(par: Dict[str, Any]) -> pd.DataFrame: # Load perturbation data perturbation_data = ad.read_h5ad(par['perturbation_data']) + subsample = par['subsample'] + if subsample != -1: + perturbation_data = perturbation_data[np.random.choice(perturbation_data.n_obs, subsample, replace=False), :] gene_names = perturbation_data.var.index.to_numpy() n_genes = len(gene_names) groups = LabelEncoder().fit_transform(perturbation_data.obs.plate_name) @@ -216,10 +219,14 @@ def main(par: Dict[str, Any]) -> pd.DataFrame: # Load inferred GRN print(f'Loading GRN', flush=True) grn = load_grn(par['prediction'], gene_names) + + # Load and standardize perturbation data layer = par['layer'] X = perturbation_data.layers[layer] + print(X.shape) + X = RobustScaler().fit_transform(X) # Load consensus numbers of putative regulators @@ -232,26 +239,27 @@ def main(par: Dict[str, Any]) -> pd.DataFrame: # Evaluate GRN print(f'Compute metrics for layer: {layer}', flush=True) print(f'Dynamic approach:', flush=True) - score_dynamic = dynamic_approach(grn, X, groups, gene_names, par['reg_type']) + # score_dynamic = dynamic_approach(grn, X, groups, gene_names, par['reg_type']) print(f'Static approach (theta=0):', flush=True) score_static_min = static_approach(grn, n_features_theta_min, X, groups, gene_names, par['reg_type']) print(f'Static approach (theta=0.5):', flush=True) score_static_median = static_approach(grn, n_features_theta_median, X, groups, gene_names, par['reg_type']) print(f'Static approach (theta=1):', flush=True) score_static_max = static_approach(grn, n_features_theta_max, X, groups, gene_names, par['reg_type']) - score_overall = score_dynamic + score_static_min + score_static_median + score_static_max + # score_overall = score_dynamic + score_static_min + score_static_median + score_static_max # TODO: find a mathematically sound way to combine Z-scores and r2 scores - print(f'Score on {layer}: {score_overall}') + # print(f'Score on {layer}: {score_overall}') results = { 'static-theta-0.0': [score_static_min], 'static-theta-0.5': [score_static_median], 'static-theta-1.0': [score_static_max], - 'dynamic': [score_dynamic], - 'Overall': [score_overall], + # 'dynamic': [score_dynamic], + # 'Overall': [score_overall], } # Convert results to DataFrame df_results = pd.DataFrame(results) + df_results['Mean'] = df_results.mean(axis=1) return df_results diff --git a/src/metrics/regression_2/script.py b/src/metrics/regression_2/script.py index 7e9e36d43..a51bc059b 100644 --- a/src/metrics/regression_2/script.py +++ b/src/metrics/regression_2/script.py @@ -8,10 +8,11 @@ par = { 'perturbation_data': 'resources/grn-benchmark/perturbation_data.h5ad', 'layer': 'scgen_pearson', - 'prediction': 'resources/grn-benchmark/negative_control.csv', + 'prediction': 'resources/grn_models/collectri.csv', 'consensus': 'resources/grn-benchmark/consensus-num-regulators.json', 'score': 'output/score_regression2.csv', - 'reg_type': 'ridge' + 'reg_type': 'ridge', + } ## VIASH END @@ -27,6 +28,19 @@ output = main(par) print('Write output to file', flush=True) -print(output) -output.to_csv(par['score']) +metric_ids = output.columns.to_numpy() +metric_values = output.values[0] + +print(metric_ids.shape, metric_values.shape) +output = ad.AnnData( + X=np.empty((0, 0)), + uns={ + "dataset_id": par["layer"], + "method_id": f"reg2-{par['method_id']}", + "metric_ids": metric_ids, + "metric_values": metric_values + } +) + +output.write_h5ad(par["score"], compression="gzip") print('Completed', flush=True) diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf index 75c0142c2..b062fb784 100644 --- a/src/workflows/run_grn_evaluation/main.nf +++ b/src/workflows/run_grn_evaluation/main.nf @@ -15,6 +15,7 @@ workflow run_wf { // construct list of metrics metrics = [ regression_1 + regression_2 ] /***************************