-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathavg_results.py
More file actions
117 lines (97 loc) · 4.21 KB
/
avg_results.py
File metadata and controls
117 lines (97 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pandas as pd
import os
import glob
import argparse
def calculate_solver_performance(csv_directory: str):
"""
Reads all result CSVs from a directory, calculates the solver's overall
performance, including the average number of tasks solved per run.
Args:
csv_directory (str): The path to the directory containing the result CSVs.
"""
# Load and Combine Data
all_files = glob.glob(os.path.join(csv_directory, "*.csv"))
if not all_files:
print(f"Error: No CSV files found in directory '{csv_directory}'.")
return
df_list = []
for filename in all_files:
try:
# Extract seed from filename and add as a column
basename = os.path.basename(filename)
# Assumes filename format like 'taskid_seed_1.csv'
seed = int(basename.rsplit("_", 1)[1].split(".")[0])
if os.path.getsize(filename) > 0:
df = pd.read_csv(filename)
df["seed"] = seed # Add seed column for grouping
df_list.append(df)
else:
print(f"Warning: Skipping empty file: {filename}")
except (pd.errors.EmptyDataError, IndexError, ValueError):
print(f"Warning: Skipping malformed or empty file: {filename}")
continue
if not df_list:
print("Error: All CSV files were empty or could not be read.")
return
results_df = pd.concat(df_list, ignore_index=True)
# Convert test_fitness to numeric, coercing errors to NaN
results_df["test_fitness"] = pd.to_numeric(
results_df["test_fitness"], errors="coerce"
)
# Clip fitness to be non-negative
results_df["test_fitness"] = results_df["test_fitness"].clip(lower=0)
# Calculate tasks solved per seed run
# A task is "solved" if its test_fitness is exactly 1.0
results_df["is_solved"] = results_df["test_fitness"] == 1.0
# Group by seed and count the number of solved tasks for each run
solved_counts_per_seed = results_df.groupby("seed")["is_solved"].sum()
if solved_counts_per_seed.empty:
print("No valid data to calculate solved task statistics.")
avg_solved = 0
std_dev_solved = 0
else:
# Calculate the average and standard deviation of solved tasks across all seeds
avg_solved = solved_counts_per_seed.mean()
std_dev_solved = solved_counts_per_seed.std()
# Define required columns for overall averages
required_columns = ["test_fitness", "time_taken", "solution_size", "evaluations"]
for col in required_columns:
if col in results_df.columns:
results_df[col] = pd.to_numeric(results_df[col], errors="coerce")
else:
print(f"Warning: Column '{col}' not found. It will be ignored.")
results_df[col] = pd.NA
results_df.dropna(subset=required_columns, inplace=True)
if results_df.empty:
print("No valid data available to calculate overall performance averages.")
return
average_fitness = results_df["test_fitness"].mean()
average_time = results_df["time_taken"].mean()
average_size = results_df["solution_size"].mean()
average_evaluations = results_df["evaluations"].mean()
# Print Results
print("\n" + "=" * 55)
print(" Overall Performance Summary")
print("=" * 55)
print(f"Number of seed runs analyzed: {len(solved_counts_per_seed)}")
print(
f"Tasks Solved (per run): {avg_solved:.2f} ± {std_dev_solved:.2f} (mean ± std)"
)
print("-" * 55)
print(f"Average Test Fitness: {average_fitness:.4f}")
print(f"Average Time Taken per Task: {average_time:.2f} seconds")
print(f"Average Solution Size (Nodes): {average_size:.2f}")
print(f"Average Evaluations per Task: {average_evaluations:,.0f}")
print("=" * 55)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Calculate average performance from ARC solver results."
)
parser.add_argument(
"--csv_dir",
default="lexicase_pixel_new_grammar",
type=str,
help="Directory containing the individual CSV result files.",
)
args = parser.parse_args()
calculate_solver_performance(args.csv_dir)