From 8724ed4e36d8aaf66da6d0fd19e7051e0705bc55 Mon Sep 17 00:00:00 2001 From: "George, Asha" Date: Thu, 6 Mar 2025 15:37:45 -0800 Subject: [PATCH 1/3] Changes for Unresolved comment '# TODO: remove placeholding zeroed y' and Unresolved comment '# TODO: add float values'. lines of code = 1 --- sklbench/datasets/loaders.py | 3 ++- sklbench/utils/special_params.py | 13 ++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sklbench/datasets/loaders.py b/sklbench/datasets/loaders.py index 20df75b2..7aedf328 100644 --- a/sklbench/datasets/loaders.py +++ b/sklbench/datasets/loaders.py @@ -802,7 +802,8 @@ def load_ann_dataset_template(url, raw_data_cache): } del x_train, x_test # TODO: remove placeholding zeroed y - y = np.zeros((x.shape[0],)) + #y = np.zeros((x.shape[0],)) + y = np.zeros(x.shape[0]) return {"x": x, "y": y}, data_desc diff --git a/sklbench/utils/special_params.py b/sklbench/utils/special_params.py index 49191023..e9ec921c 100644 --- a/sklbench/utils/special_params.py +++ b/sklbench/utils/special_params.py @@ -36,6 +36,10 @@ def is_special_value(value) -> bool: return isinstance(value, str) and value.startswith(SP_VALUE_STR) +def float_range(start,stop,step): + while start < stop: + yield start + start += step def explain_range(range_str: str) -> List: def check_range_values_size(range_values: List[int], size: int): @@ -47,13 +51,15 @@ def check_range_values_size(range_values: List[int], size: int): range_values = range_str.replace("[RANGE]", "").split(":") # TODO: add float values range_type = range_values[0] - range_values = list(map(int, range_values[1:])) + #range_values = list(map(int, range_values[1:])) + range_values = list(map(float, range_values[1:])) # - add:start{int}:end{int}:step{int} - Arithmetic progression # Sequence: start + step * i <= end if range_type == "add": check_range_values_size(range_values, 3) start, end, step = range_values - return list(range(start, end + step, step)) + #return list(range(start, end + step, step)) + return list(float_range(start, end + step, step)) # - mul:current{int}:end{int}:step{int} - Geometric progression # Sequence: current * step <= end elif range_type == "mul": @@ -71,7 +77,8 @@ def check_range_values_size(range_values: List[int], size: int): range_values.append(1) check_range_values_size(range_values, 4) base, start, end, step = range_values - return [base**i for i in range(start, end + step, step)] + #return [base**i for i in range(start, end + step, step)] + return [base**i for i in float_range(start, end + step, step)] else: raise ValueError(f'Unknown "{range_type}" range type') From 758a760dd34fec538c7488f6db5b4f67e4723f81 Mon Sep 17 00:00:00 2001 From: "George, Asha" Date: Fri, 7 Mar 2025 11:00:04 -0800 Subject: [PATCH 2/3] Changes for datetime representation Unresolved comment '# TODO: replace unix time in ms with datetime'. --- sklbench/runner/commands_helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklbench/runner/commands_helper.py b/sklbench/runner/commands_helper.py index 09e61369..963a8939 100644 --- a/sklbench/runner/commands_helper.py +++ b/sklbench/runner/commands_helper.py @@ -18,6 +18,7 @@ import os import sys from time import time +from datetime import datetime from typing import Dict, List, Tuple from ..utils.bench_case import get_bench_case_name, get_bench_case_value @@ -61,7 +62,7 @@ def generate_benchmark_command( get_bench_case_name(bench_case, shortened=True, separator="_"), hash_from_json_repr(bench_case), # TODO: replace unix time in ms with datetime - str(int(time() * 1000)), + datetime.now().strftime("%Y-%m-%d %H:%M"), ] ), ) From 6bad102d9e7dfb60730fe7b83c0f8d86ae64f901 Mon Sep 17 00:00:00 2001 From: Marcia Louis Date: Fri, 7 Mar 2025 16:35:15 -0800 Subject: [PATCH 3/3] feat: Add additional data descriptors to benchmark results - Updated to include additional information (, , and ) in the benchmark results. - Ensured that the dictionary is updated with the new fields and metrics. - Modified to handle and process the new data descriptors in the benchmark results. These changes enhance the benchmark results by providing more detailed information about the dataset and its characteristics --- sklbench/benchmarks/custom_function.py | 9 ++++++--- sklbench/report/implementation.py | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sklbench/benchmarks/custom_function.py b/sklbench/benchmarks/custom_function.py index 25abb900..d2816229 100644 --- a/sklbench/benchmarks/custom_function.py +++ b/sklbench/benchmarks/custom_function.py @@ -104,11 +104,14 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): "function": function_name, } result = enrich_result(result, bench_case) - # TODO: replace `x_train` data_desc with more informative values - result.update(data_description["x_train"]) + # Replace `x_train` data_desc with more informative values + result.update({ + "memory_usage": x_train.nbytes, + "feature_names": list(x_train.columns) if isinstance(x_train, pd.DataFrame) else None, + "class_distribution": dict(pd.Series(y_train).value_counts()) if y_train is not None else None + }) result.update(metrics) return [result] - if __name__ == "__main__": main_template(main) diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index 28fa2bb0..cb2caebc 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -89,12 +89,15 @@ "dataset", "samples", "features", + "feature_names", "format", "dtype", "order", "n_classes", + "class_distribution", "n_clusters", "batch_size", + "memory_usage", ] DIFFBY_COLUMNS = ["environment_name", "library", "format", "device"]