@@ -16,16 +16,16 @@ save_dir="output/tf_binding"
1616mkdir -p " $save_dir "
1717
1818# datasets to process
19- datasets=(' replogle' ' norman' ' adamson' ) # "300BCG" "ibd" 'parsebioscience''op' "300BCG" 'parsebioscience'
19+ datasets=(' replogle' ' norman' ' adamson' ) # "300BCG" "ibd" 'parsebioscience''op' "300BCG" 'parsebioscience'
2020# methods to process
2121methods=(" negative_control" " pearson_corr" " positive_control" " ppcor" " portia" " scenic" " grnboost" " scprint" " scenicplus" " celloracle" " scglue" " figr" " granie" )
2222
23- # temporary file to collect CSV rows
24- combined_csv=" ${save_dir} /tf_binding_scores.csv"
25- echo " dataset,method,metric,value" > " $combined_csv "
26-
2723for dataset in " ${datasets[@]} " ; do
2824 echo -e " \n\nProcessing dataset: $dataset \n"
25+
26+ # Create separate CSV file for each dataset
27+ dataset_csv=" ${save_dir} /tf_binding_scores_${dataset} .csv"
28+ echo " dataset,method,metric,value" > " $dataset_csv "
2929
3030 evaluation_data=" resources/grn_benchmark/evaluation_data/${dataset} _bulk.h5ad"
3131
@@ -39,13 +39,30 @@ for dataset in "${datasets[@]}"; do
3939 fi
4040
4141 echo -e " \nProcessing method: $method \n"
42- python src/metrics/tf_binding/script.py \
43- --prediction " $prediction " \
44- --evaluation_data " $evaluation_data " \
45- --ground_truth " resources/grn_benchmark/ground_truth/K562.csv" \
46- --score " $score "
42+ # python src/metrics/tf_binding/script.py \
43+ # --prediction "$prediction" \
44+ # --evaluation_data "$evaluation_data" \
45+ # --ground_truth "resources/grn_benchmark/ground_truth/K562.csv" \
46+ # --score "$score"
47+
48+ # Extract metrics from the .h5ad and append to CSV
49+ python -u - << EOF
50+ import anndata as ad
51+ import pandas as pd
52+
53+ adata = ad.read_h5ad("${score} ")
54+ if "metric_values" in adata.uns:
55+ metric_names = adata.uns["metric_ids"]
56+ metric_values = adata.uns["metric_values"]
57+ df = pd.DataFrame({"metric": metric_names, "value": metric_values})
58+ df["dataset"] = "${dataset} "
59+ df["method"] = "${method} "
60+ df = df[["dataset", "method", "metric", "value"]] # Reorder columns to match header
61+ df.to_csv("${dataset_csv} ", mode="a", header=False, index=False)
62+ EOF
4763
4864 done
65+ echo -e " \nResults for dataset $dataset collected in: $dataset_csv "
4966done
5067
51- echo -e " \nAll results collected in: $combined_csv "
68+ echo -e " \nAll dataset results saved in separate CSV files in : $save_dir "
0 commit comments