Skip to content

Commit

Permalink
Add evaluation result aggregation and rendering
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyaMuravjov committed Mar 5, 2024
1 parent 916dadb commit a56189e
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 2 deletions.
138 changes: 136 additions & 2 deletions cli/eval_all_pairs_cflr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,23 @@
import os
import subprocess
import sys
import warnings
from math import floor, log10
from pathlib import Path
from typing import Optional, List

import pandas as pd

from cli.runners.all_pairs_cflr_tool_runner import IncompatibleCflrToolError
from cli.runners.all_pairs_cflr_tool_runner_facade import run_appropriate_all_pairs_cflr_tool

DISPLAY_STD_THRESHOLD = 0.1

# see `man timeout`
TIMEOUT_EXIT_CODE = 124


def is_enough_data_collected(result_file_path, rounds):
def is_enough_data_collected(result_file_path: Path, rounds: int):
try:
with open(result_file_path, 'r') as file:
reader = list(csv.reader(file))
Expand Down Expand Up @@ -78,7 +84,7 @@ def run_experiment(
print(f" {s_edges} {ram_kb} {time_sec}")
writer = csv.writer(csvfile)
writer.writerow([
{algo_name},
algo_name,
os.path.basename(graph_base_name),
os.path.basename(grammar_base_name),
s_edges,
Expand All @@ -87,13 +93,139 @@ def run_experiment(
])


def round_to_significant_digits(x: float, digits: int = 2) -> float:
if x == 0:
return x
return round(x, max(0, -int(floor(log10(abs(x)))) + digits - 1))


def reduce_result_file_to_one_row(result_file_path: Path) -> pd.DataFrame:
df = pd.read_csv(result_file_path)

if len(df) == 0:
return df

df['ram_gb'] = df['ram_kb'].apply(
lambda x: x / 10**6 if isinstance(x, int) or isinstance(x, float) else x
)
assert df['algo'].nunique() <= 1
assert df['graph'].nunique() <= 1
assert df['grammar'].nunique() <= 1
if df['s_edges'].isin(['OOM', 'OOT', '-']).any():
# leave only one entry
df = df[df['s_edges'].isin(['OOM', 'OOT', '-'])].head(1)
else:
unique_s_edges = df['s_edges'].unique()
if len(unique_s_edges) > 1:
warnings.warn(f"Inconsistent 's_edges' values {unique_s_edges} found in {result_file_path}. "
f"Using first 's_edges' value.")

ram_gb_mean = df['ram_gb'].mean()
time_sec_mean = df['time_sec'].mean()

# sample standard deviation
ram_gb_std = df['ram_gb'].std(ddof=1) if len(df) > 1 else -1
time_sec_std = df['time_sec'].std(ddof=1) if len(df) > 1 else -1

df = pd.DataFrame({
'algo': [df['algo'].iloc[0]],
'graph': [df['graph'].iloc[0]],
'grammar': [df['grammar'].iloc[0]],
's_edges': [df['s_edges'].iloc[0]],
'ram_gb': [
round_to_significant_digits(ram_gb_mean)
if ram_gb_std < DISPLAY_STD_THRESHOLD * ram_gb_mean
else f"{round_to_significant_digits(ram_gb_mean)} ± {round_to_significant_digits(ram_gb_std)}"
],
'time_sec': [
# Graspan reports analysis time in whole seconds, so it may report 0
(round_to_significant_digits(time_sec_mean) if time_sec_mean != 0 else "< 1")
if time_sec_std < DISPLAY_STD_THRESHOLD * time_sec_mean
else f"{round_to_significant_digits(time_sec_mean)} ± {round_to_significant_digits(time_sec_std)}"
]
})
return df


def pprint_df(df: pd.DataFrame, title: str):
df_string = df.to_markdown(maxheadercolwidths=12, maxcolwidths=12)
width = max(len(line) for line in df_string.splitlines())
print(title.center(width, "="))
print(df_string)
print("=" * width)


def min_numeric(series: pd.Series) -> float:
numeric_series = pd.to_numeric(series, errors='coerce').dropna()
return float('inf') if numeric_series.empty else numeric_series.min()


def display_results_for_grammar(df: pd.DataFrame, grammar: str):
df = df[df['grammar'] == grammar].copy()
df['algo'] = df['algo'].apply(lambda algo: algo.lower())
df.drop(columns=['grammar'], inplace=True)

df['graph'] = pd.Categorical(df['graph'], sorted(
df['graph'].unique(),
key=lambda graph: min_numeric(df[df['graph'] == graph]['time_sec'])
))

s_edges_df = df.pivot(index='graph', columns='algo', values='s_edges').sort_index()
s_edges_df.columns = [
f'{col} (HAS KNOWN BUGS)'
if "pocr" in col.lower()
else col
for col in s_edges_df.columns
]
pprint_df(
s_edges_df,
title=f" #ANSWER (grammar '{grammar}') ",
)

print()
ram_df = df.pivot(index='graph', columns='algo', values='ram_gb').sort_index()
pprint_df(
ram_df,
title=f" RAM, GB (grammar '{grammar}') "
)
print()
time_df = df.pivot(index='graph', columns='algo', values='time_sec').sort_index()
pprint_df(
time_df,
title=f" TIME, SEC (grammar '{grammar}') "
)
print()
print()


def display_results(result_files_paths: List[Path]) -> None:
print()
print("RESULTS:")
print(f"Sample std is shown when it's over {DISPLAY_STD_THRESHOLD * 100}% of the mean.")
print()

df = pd.concat(
[reduce_result_file_to_one_row(result_file_path) for result_file_path in result_files_paths],
ignore_index=True
)
df['algo'] = pd.Categorical(df['algo'], categories=df['algo'].unique())
with pd.option_context(
'display.max_rows', None,
'display.max_columns', None
):
for grammar in df['grammar'].unique():
display_results_for_grammar(df, grammar)
print(f"Sample std is shown when it's over {DISPLAY_STD_THRESHOLD * 100}% of the mean.")


def eval_all_pairs_cflr(
algo_config: Path,
data_config: Path,
result_path: Path,
rounds: Optional[int],
timeout_sec: Optional[int],
):
result_files_paths = []
with open(algo_config, mode='r') as algo_file:
algo_reader = csv.DictReader(algo_file)
for algo_row in algo_reader:
Expand Down Expand Up @@ -122,6 +254,8 @@ def eval_all_pairs_cflr(
timeout_sec=timeout_sec,
result_file_path=result_file_path
)
result_files_paths.append(result_file_path)
display_results(result_files_paths)


def main(raw_args: List[str]):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ python-graphblas==2023.7.0
pandas==2.0.3
numpy==1.23.5
psutil==5.9.8
tabulate==0.9.0

0 comments on commit a56189e

Please sign in to comment.