|
279 | 279 | " \"lt_iqr_desc\"\n",
|
280 | 280 | "]\n",
|
281 | 281 | "\n",
|
282 |
| - "# Initialize list to store results\n", |
283 | 282 | "all_results = []\n",
|
284 | 283 | "\n",
|
285 |
| - "# Loop through architectures and collect metrics\n", |
286 | 284 | "for arch in [\"rn-20\", \"wrn28-2\", \"wrn40-4\"]:\n",
|
287 | 285 | " exp_id = f\"{arch}_CIFAR10\"\n",
|
288 | 286 | " if exp_id not in final_model_metrics:\n",
|
289 | 287 | " continue\n",
|
290 | 288 | " \n",
|
291 |
| - " # Get precision/recall metrics for this architecture\n", |
292 | 289 | " results = make_precision_recall_at_k_df_single_threshold(\n",
|
293 | 290 | " scores_df=final_model_metrics[exp_id],\n",
|
294 | 291 | " ground_truth_df=final_model_metrics[exp_id], \n",
|
295 | 292 | " fpr_threshold=0.001,\n",
|
296 | 293 | " k_frac=0.01\n",
|
297 | 294 | " )\n",
|
298 | 295 | " \n",
|
299 |
| - " # Add architecture column\n", |
300 | 296 | " results['architecture'] = arch\n",
|
301 | 297 | " \n",
|
302 |
| - " # Append to list\n", |
303 | 298 | " all_results.append(results)\n",
|
304 | 299 | "\n",
|
305 |
| - "# Create column names for precision and recall\n", |
306 | 300 | "precision_columns = [\"precision_\" + m for m in metrics]\n",
|
307 | 301 | "recall_columns = [\"recall_\" + m for m in metrics]\n",
|
308 | 302 | "\n",
|
309 |
| - "# Create dataframe with both precision and recall columns\n", |
310 | 303 | "df = pd.DataFrame.from_records(all_results)[precision_columns + recall_columns + ['architecture']]\n",
|
311 | 304 | "\n",
|
312 |
| - "# Pivot table to make architectures as columns\n", |
313 | 305 | "df_pivot = df.melt(id_vars=['architecture'], var_name='metric', value_name='value')\n",
|
314 | 306 | "\n",
|
315 |
| - "# Create MultiIndex before pivoting\n", |
316 | 307 | "df_pivot['type'] = df_pivot['metric'].str.split('_').str[0]\n",
|
317 | 308 | "df_pivot['metric'] = df_pivot['metric'].str.split('_').str[1:].str.join('_')\n",
|
318 | 309 | "df_pivot.set_index(['type', 'metric'], inplace=True)\n",
|
319 | 310 | "\n",
|
320 |
| - "# Now pivot with the MultiIndex\n", |
321 | 311 | "results_df = df_pivot.pivot(columns='architecture', values='value')\n",
|
322 | 312 | "\n",
|
323 |
| - "# Sort index to group precision and recall metrics together\n", |
324 | 313 | "idx = pd.MultiIndex.from_product([['precision', 'recall'], metrics], names=['type', 'metric'])\n",
|
325 | 314 | "results_df = results_df.reindex(idx)\n",
|
326 | 315 | "\n",
|
|
484 | 473 | " \"lt_iqr_desc\"\n",
|
485 | 474 | "]\n",
|
486 | 475 | "\n",
|
487 |
| - "# Initialize list to store results\n", |
488 | 476 | "all_results = []\n",
|
489 | 477 | "\n",
|
490 |
| - "# Loop through datasets and collect metrics\n", |
491 | 478 | "for dataset in [\"CIFAR10\", \"CIFAR100\", \"CINIC10\"]:\n",
|
492 | 479 | " exp_id = f\"wrn28-2_{dataset}\" # Use WRN-28-2 for all datasets\n",
|
493 | 480 | " if exp_id not in final_model_metrics:\n",
|
494 | 481 | " continue\n",
|
495 | 482 | " \n",
|
496 |
| - " # Get precision/recall metrics for this dataset\n", |
497 | 483 | " results = make_precision_recall_at_k_df_single_threshold(\n",
|
498 | 484 | " scores_df=final_model_metrics[exp_id],\n",
|
499 | 485 | " ground_truth_df=final_model_metrics[exp_id], \n",
|
500 | 486 | " fpr_threshold=0.001,\n",
|
501 | 487 | " k_frac=0.01\n",
|
502 | 488 | " )\n",
|
503 | 489 | " \n",
|
504 |
| - " # Add dataset column\n", |
505 | 490 | " results['dataset'] = dataset\n",
|
506 | 491 | " \n",
|
507 |
| - " # Append to list\n", |
508 | 492 | " all_results.append(results)\n",
|
509 | 493 | "\n",
|
510 |
| - "# Create column names for precision and recall\n", |
511 | 494 | "precision_columns = [\"precision_\" + m for m in metrics]\n",
|
512 | 495 | "recall_columns = [\"recall_\" + m for m in metrics]\n",
|
513 | 496 | "\n",
|
514 |
| - "# Create dataframe with both precision and recall columns\n", |
515 | 497 | "df = pd.DataFrame.from_records(all_results)[precision_columns + recall_columns + ['dataset']]\n",
|
516 | 498 | "\n",
|
517 |
| - "# Pivot table to make datasets as columns\n", |
518 | 499 | "df_pivot = df.melt(id_vars=['dataset'], var_name='metric', value_name='value')\n",
|
519 | 500 | "\n",
|
520 |
| - "# Create MultiIndex before pivoting\n", |
521 | 501 | "df_pivot['type'] = df_pivot['metric'].str.split('_').str[0]\n",
|
522 | 502 | "df_pivot['metric'] = df_pivot['metric'].str.split('_').str[1:].str.join('_')\n",
|
523 | 503 | "df_pivot.set_index(['type', 'metric'], inplace=True)\n",
|
524 | 504 | "\n",
|
525 |
| - "# Now pivot with the MultiIndex\n", |
526 | 505 | "results_df = df_pivot.pivot(columns='dataset', values='value')\n",
|
527 | 506 | "\n",
|
528 |
| - "# Sort index to group precision and recall metrics together\n", |
529 | 507 | "idx = pd.MultiIndex.from_product([['precision', 'recall'], metrics], names=['type', 'metric'])\n",
|
530 | 508 | "results_df = results_df.reindex(idx)\n",
|
531 | 509 | "\n",
|
|
628 | 606 | }
|
629 | 607 | ],
|
630 | 608 | "source": [
|
631 |
| - "# Initialize results storage\n", |
632 | 609 | "results = []\n",
|
633 | 610 | "\n",
|
634 |
| - "# Loop through all experiments\n", |
635 | 611 | "for exp_id in final_model_metrics.keys():\n",
|
636 | 612 | " scores = final_model_metrics[exp_id][\"lira_score\"]\n",
|
637 | 613 | " labels = final_model_metrics[exp_id][\"target_trained_on\"]\n",
|
638 | 614 | "\n",
|
639 |
| - " # Calculate LiRA AUC\n", |
640 | 615 | " lira_auc = roc_auc_score(labels, scores)\n",
|
641 | 616 | "\n",
|
642 |
| - " # Calculate TPR at FPR=0.001 \n", |
643 | 617 | " fpr, tpr, thresholds = roc_curve(labels, scores)\n",
|
644 | 618 | " idx = np.argmin(np.abs(fpr - 0.001))\n",
|
645 | 619 | " tpr_at_fpr = tpr[idx]\n",
|
|
650 | 624 | " 'tpr_at_fpr': tpr_at_fpr\n",
|
651 | 625 | " })\n",
|
652 | 626 | "\n",
|
653 |
| - "# Create dataframe\n", |
654 | 627 | "df = pd.DataFrame(results).set_index('exp_id')\n",
|
655 | 628 | "print(\"\\nLiRA Metrics across all experiments:\")\n",
|
656 | 629 | "df\n"
|
|
813 | 786 | " \"lt_iqr_desc\"\n",
|
814 | 787 | "]\n",
|
815 | 788 | "\n",
|
816 |
| - "# Initialize list to store results\n", |
817 | 789 | "all_results = []\n",
|
818 | 790 | "\n",
|
819 |
| - "# Loop through datasets and collect metrics\n", |
820 | 791 | "for dataset in [\"CIFAR10\", \"CIFAR100\", \"CINIC10\"]:\n",
|
821 | 792 | " exp_id = f\"wrn28-2_{dataset}\" # Use WRN-28-2 for all datasets\n",
|
822 | 793 | " if exp_id not in final_model_metrics:\n",
|
823 | 794 | " continue\n",
|
824 | 795 | " \n",
|
825 |
| - " # Get precision/recall metrics for this dataset\n", |
826 | 796 | " results = make_precision_recall_at_k_df_single_threshold(\n",
|
827 | 797 | " scores_df=final_model_metrics[exp_id],\n",
|
828 | 798 | " ground_truth_df=final_model_metrics[exp_id], \n",
|
829 | 799 | " fpr_threshold=0.001,\n",
|
830 | 800 | " k_frac=0.01\n",
|
831 | 801 | " )\n",
|
832 | 802 | " \n",
|
833 |
| - " # Add dataset column\n", |
834 | 803 | " results['dataset'] = dataset\n",
|
835 | 804 | " \n",
|
836 |
| - " # Append to list\n", |
837 | 805 | " all_results.append(results)\n",
|
838 | 806 | "\n",
|
839 |
| - "# Create column names for precision and recall\n", |
840 | 807 | "precision_columns = [\"precision_\" + m for m in metrics]\n",
|
841 | 808 | "recall_columns = [\"recall_\" + m for m in metrics]\n",
|
842 | 809 | "\n",
|
843 |
| - "# Create dataframe with both precision and recall columns\n", |
844 | 810 | "df = pd.DataFrame.from_records(all_results)[precision_columns + recall_columns + ['dataset']]\n",
|
845 | 811 | "\n",
|
846 |
| - "# Pivot table to make datasets as columns\n", |
847 | 812 | "df_pivot = df.melt(id_vars=['dataset'], var_name='metric', value_name='value')\n",
|
848 | 813 | "\n",
|
849 |
| - "# Create MultiIndex before pivoting\n", |
850 | 814 | "df_pivot['type'] = df_pivot['metric'].str.split('_').str[0]\n",
|
851 | 815 | "df_pivot['metric'] = df_pivot['metric'].str.split('_').str[1:].str.join('_')\n",
|
852 | 816 | "df_pivot.set_index(['type', 'metric'], inplace=True)\n",
|
853 | 817 | "\n",
|
854 |
| - "# Now pivot with the MultiIndex\n", |
855 | 818 | "results_df = df_pivot.pivot(columns='dataset', values='value')\n",
|
856 | 819 | "\n",
|
857 |
| - "# Sort index to group precision and recall metrics together\n", |
858 | 820 | "idx = pd.MultiIndex.from_product([['precision', 'recall'], metrics], names=['type', 'metric'])\n",
|
859 | 821 | "results_df = results_df.reindex(idx)\n",
|
860 | 822 | "\n",
|
|
1065 | 1027 | "for exp_id in final_model_metrics:\n",
|
1066 | 1028 | " df = final_model_metrics[exp_id]\n",
|
1067 | 1029 | "\n",
|
1068 |
| - " # Create empty results dictionary with metrics as rows\n", |
1069 | 1030 | " k_fracs = [0.01, 0.03, 0.05, 0.10, 0.20, 0.50]\n",
|
1070 | 1031 | "\n",
|
1071 | 1032 | " # Populate results\n",
|
|
1080 | 1041 | " results[exp_id][f'k={k_frac*100}% Precision'] = stats['precision_lt_iqr_desc']\n",
|
1081 | 1042 | " results[exp_id][f'k={k_frac*100}% Recall'] = stats['recall_lt_iqr_desc']\n",
|
1082 | 1043 | "\n",
|
1083 |
| - " # Convert to DataFrame with metrics as index\n", |
1084 | 1044 | "results_df = pd.DataFrame.from_dict(results, orient='index')\n",
|
1085 | 1045 | "\n",
|
1086 |
| - "# Sort columns to group by k%\n", |
1087 | 1046 | "column_order = []\n",
|
1088 | 1047 | "for k_frac in k_fracs:\n",
|
1089 | 1048 | " column_order.extend([f'k={k_frac*100}% Precision', f'k={k_frac*100}% Recall'])\n",
|
|
1246 | 1205 | "source": [
|
1247 | 1206 | "from collections import defaultdict\n",
|
1248 | 1207 | "\n",
|
1249 |
| - "# Create empty lists/dicts to store results\n", |
1250 | 1208 | "exp_ids = []\n",
|
1251 | 1209 | "total_pos = []\n",
|
1252 | 1210 | "pos_at_thresh = []\n",
|
1253 | 1211 | "max_recalls = defaultdict(list)\n",
|
1254 | 1212 | "\n",
|
1255 |
| - "# Calculate metrics for each experiment\n", |
1256 | 1213 | "for exp_id in final_model_metrics.keys():\n",
|
1257 | 1214 | " results = final_model_metrics[exp_id]\n",
|
1258 | 1215 | " \n",
|
|
1263 | 1220 | " total_pos.append(total_positives)\n",
|
1264 | 1221 | " pos_at_thresh.append(num_positives_at_threshold)\n",
|
1265 | 1222 | " \n",
|
1266 |
| - " # Calculate max recalls for different k%\n", |
1267 | 1223 | " for k in (1, 3, 5, 10, 20, 50):\n",
|
1268 | 1224 | " predicted_samples = k * total_positives // 100\n",
|
1269 | 1225 | " max_recall = min(1, predicted_samples / num_positives_at_threshold)\n",
|
1270 | 1226 | " max_recalls[k].append(max_recall)\n",
|
1271 | 1227 | "\n",
|
1272 |
| - "# Create and display results DataFrame\n", |
1273 | 1228 | "results_dict = {\n",
|
1274 | 1229 | " 'Total Positives': total_pos,\n",
|
1275 | 1230 | " 'Positives at Threshold': pos_at_thresh,\n",
|
1276 | 1231 | "}\n",
|
1277 |
| - "# Add max recalls to results dictionary\n", |
1278 | 1232 | "for k in max_recalls:\n",
|
1279 | 1233 | " results_dict[f'Max Recall {k}%'] = max_recalls[k]\n",
|
1280 | 1234 | "\n",
|
|
1407 | 1361 | " \"spearman_lt_iqr\",\n",
|
1408 | 1362 | "]\n",
|
1409 | 1363 | "\n",
|
1410 |
| - "# Create DataFrame with metrics for each experiment\n", |
1411 | 1364 | "metrics_by_exp = {}\n",
|
1412 | 1365 | "\n",
|
1413 | 1366 | "for exp_id in final_model_metrics:\n",
|
|
1422 | 1375 | " \n",
|
1423 | 1376 | " metrics_by_exp[exp_id] = metrics_dict\n",
|
1424 | 1377 | "\n",
|
1425 |
| - "# Create DataFrame with experiments as columns and metrics as rows\n", |
1426 | 1378 | "results_df = pd.DataFrame(metrics_by_exp).loc[metrics]\n",
|
1427 | 1379 | "\n",
|
1428 | 1380 | "display(results_df)\n",
|
|
0 commit comments