1313
1414def main (par ):
1515 prediction = read_prediction (par )
16+ test_data = ad .read_h5ad (par ['evaluation_data' ], backed = 'r' )
17+ evaluation_genes = test_data .var_names .tolist ()
18+ n_targets_total = len (evaluation_genes )
19+
1620 tf_all = np .loadtxt (par ['tf_all' ], dtype = str , delimiter = ',' , skiprows = 1 )
1721 true_graph = pd .read_csv (par ['ground_truth' ])
18- true_graph = true_graph [true_graph ['source' ].isin (tf_all )]
22+ true_graph = true_graph [( true_graph ['source' ].isin (tf_all )) & ( true_graph [ 'target' ]. isin ( evaluation_genes ) )]
1923 assert prediction .shape [0 ] > 0 , 'No links found in the network'
2024 assert true_graph .shape [0 ] > 0 , 'No links found in the ground truth'
2125
@@ -26,22 +30,40 @@ def main(par):
2630 pred_edges = prediction [prediction ['source' ] == tf ]
2731 true_labels = true_edges ['target' ].isin (pred_edges ['target' ]).astype (int )
2832 pred_scores = pred_edges .set_index ('target' ).reindex (true_edges ['target' ])['weight' ].fillna (0 )
29- ap = average_precision_score (true_labels , pred_scores )
33+ if true_labels .sum () == 0 : # no positives
34+ ap = 0.0
35+ else :
36+ ap = average_precision_score (true_labels , pred_scores )
3037 else :
3138 ap = float ('nan' )
32-
33- scores_model .append ({'source' : tf , 'ap' : ap })
39+ n_targets = len (true_edges )
40+
41+ # ----- Analytic random baseline -----
42+ # Extend true edges to all evaluation genes
43+ true_labels_random = np .zeros (n_targets_total )
44+ idx = [evaluation_genes .index (t ) for t in true_edges ['target' ]]
45+ true_labels_random [idx ] = 1
46+ ap_random = true_labels_random .sum () / len (true_labels_random )
47+
48+ scores_model .append ({'source' : tf , 'ap' : ap , 'n_targets' : n_targets , 'ap_random' : ap_random })
3449
3550 scores_df = pd .DataFrame (scores_model )
36- print (scores_df )
51+ print ('Number of TFs in GRN:' , len ( scores_df [ scores_df [ 'ap' ]. notna ()]) )
3752
38- # Precision: mean over available TFs (ignoring NaNs)
39- precision = scores_df ['ap' ].mean (skipna = True )
53+ # Compute weighted mean (ignoring NaNs)
54+ valid = scores_df .dropna (subset = ['ap' ])
55+ weighted_precision = np .average (valid ['ap' ], weights = valid ['n_targets' ])
4056
41- # Recall: mean over all TFs, punishing NaNs as 0
57+ # Compute unweighted means (for reference)
58+ precision = scores_df ['ap' ].mean (skipna = True )
59+ precision_random = scores_df ['ap_random' ].mean (skipna = True )
4260 recall = scores_df ['ap' ].fillna (0 ).mean ()
4361
44- # One-row summary DataFrame
45- summary_df = pd .DataFrame ([{'precision' : precision , 'recall' : recall }])
62+ summary_df = pd .DataFrame ([{
63+ 'precision' : precision ,
64+ 'precision_random' : precision_random ,
65+ 'recall' : recall ,
66+ 'weighted_precision' : weighted_precision
67+ }])
4668
4769 return summary_df
0 commit comments