diff --git a/scripts/run_grn_evaluation_tw.sh b/scripts/run_grn_evaluation_tw.sh index d245580b2..c65bb8908 100644 --- a/scripts/run_grn_evaluation_tw.sh +++ b/scripts/run_grn_evaluation_tw.sh @@ -2,13 +2,13 @@ # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" -RUN_ID="scgen_pearson_gb_sub549" +RUN_ID="pearson_gb" resources_dir="s3://openproblems-data/resources/grn" publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}" # grn_models_folder="${resources_dir}/supplementary/grn_models_noised" grn_models_folder="${resources_dir}/grn_models" reg_type=GB -subsample=-1 +subsample=-2 max_workers=20 param_file="./params/${RUN_ID}.yaml" @@ -24,7 +24,7 @@ grn_names=( ) # layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm") -layers=( "scgen_pearson" ) +layers=( "pearson" ) # Start writing to the YAML file cat > $param_file << HERE @@ -52,17 +52,17 @@ for grn_name in "${grn_names[@]}"; do done done -# # Append negative control -# grn_name="negative_control" -# for layer in "${layers[@]}"; do -# append_entry "$grn_name" "" "true" -# done +# Append negative control +grn_name="negative_control" +for layer in "${layers[@]}"; do + append_entry "$grn_name" "" "true" +done -# # Append positive controls -# grn_name="positive_control" -# for layer in "${layers[@]}"; do -# append_entry "$grn_name" "true" -# done +# Append positive controls +grn_name="positive_control" +for layer in "${layers[@]}"; do + append_entry "$grn_name" "true" +done # Append the remaining output_state and publish_dir to the YAML file cat >> $param_file << HERE @@ -84,7 +84,7 @@ HERE --main-script target/nextflow/workflows/run_grn_evaluation/main.nf ` --workspace 53907369739130 ` --compute-env 6TeIFgV5OY4pJCk8I0bfOh ` - --params-file ./params/scgen_pearson_gb_sub549.yaml ` + --params-file ./params/scgen_pearson_gb_pcs.yaml ` --config src/common/nextflow_helpers/labels_tw.config diff --git a/src/metrics/regression_1/main.py b/src/metrics/regression_1/main.py index 1bd719062..0f2d17fc8 100644 --- a/src/metrics/regression_1/main.py +++ b/src/metrics/regression_1/main.py @@ -191,6 +191,8 @@ def main(par): perturbation_data = perturbation_data[mask,:] else: perturbation_data = perturbation_data[np.random.choice(perturbation_data.n_obs, subsample, replace=False), :] + + print(perturbation_data.shape) pert_df = pert_df.T # make it gene*sample diff --git a/src/metrics/regression_1/script_all.py b/src/metrics/regression_1/script_all.py index deb74a8a6..9af3cfe67 100644 --- a/src/metrics/regression_1/script_all.py +++ b/src/metrics/regression_1/script_all.py @@ -52,41 +52,42 @@ def create_negative_control(gene_names) -> np.ndarray: for ii, layer in enumerate(layers): par["layer"] = layer if grn_model=='positive_control': - # print('Inferring GRN') - # net = create_positive_control(perturbation_data.layers[par["layer"]], groups) + print('Inferring positive control') + net = create_positive_control(perturbation_data.layers[par["layer"]], groups) - # net = pd.DataFrame(net, index=gene_names, columns=gene_names) - # net = net.loc[:, net.columns.isin(tf_all)] + net = pd.DataFrame(net, index=gene_names, columns=gene_names) + net = net.loc[:, net.columns.isin(tf_all)] - # pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight') + pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight') - # pivoted_net = pivoted_net.rename(columns={'index': 'target'}) - # pivoted_net = pivoted_net[pivoted_net['weight'] != 0] + pivoted_net = pivoted_net.rename(columns={'index': 'target'}) + pivoted_net = pivoted_net[pivoted_net['weight'] != 0] par['prediction'] = f"{par['temp_dir']}/{layer}_positive_control.csv" - # pivoted_net.to_csv(par['prediction']) + print(par['prediction']) + pivoted_net.to_csv(par['prediction']) elif grn_model=='negative_control': - # print('Inferring GRN') - # net = create_negative_control(gene_names) + print('Inferring negative control') + net = create_negative_control(gene_names) - # pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight') + pivoted_net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight') - # pivoted_net = pivoted_net.rename(columns={'index': 'target'}) - # pivoted_net = pivoted_net[pivoted_net['weight'] != 0] + pivoted_net = pivoted_net.rename(columns={'index': 'target'}) + pivoted_net = pivoted_net[pivoted_net['weight'] != 0] par['prediction'] = f"{par['temp_dir']}/negative_control.csv" - # pivoted_net.to_csv(par['prediction']) + pivoted_net.to_csv(par['prediction']) else: par['prediction'] = f"resources/grn_models/{grn_model}.csv" - output = main(par) - output.index = [layer] + # output = main(par) + # output.index = [layer] - if ii == 0: - score = output - else: - score = pd.concat([score, output], axis=0) + # if ii == 0: + # score = output + # else: + # score = pd.concat([score, output], axis=0) - print("Write output to file", flush=True) - print(grn_model, layer, score) + # print("Write output to file", flush=True) + # print(grn_model, layer, score) - print("Write output to file", flush=True) - score.to_csv(par['score']) + # print("Write output to file", flush=True) + # score.to_csv(par['score']) diff --git a/src/metrics/regression_2/main.py b/src/metrics/regression_2/main.py index 520c29731..2707cfb33 100644 --- a/src/metrics/regression_2/main.py +++ b/src/metrics/regression_2/main.py @@ -198,7 +198,7 @@ def static_approach(grn: np.ndarray, n_features: np.ndarray, X: np.ndarray, grou # Cross-validate each gene using the inferred GRN to define select input features res = cross_validate(reg_type, gene_names, X, groups, grn, n_features, n_jobs=n_jobs) mean_r2_scores = np.asarray([res['scores'][j]['avg-r2'] for j in range(len(res['scores']))]) - mean_r2_scores = mean_r2_scores[mean_r2_scores>-1] + mean_r2_scores = mean_r2_scores[mean_r2_scores>-10] return np.mean(mean_r2_scores)