-
Notifications
You must be signed in to change notification settings - Fork 73
/
Copy pathcustom_function.py
117 lines (100 loc) · 4.22 KB
/
custom_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# ===============================================================================
# Copyright 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============================================================================
from typing import Dict, List, Tuple
from ..datasets import load_data
from ..datasets.transformer import split_and_transform_data
from ..utils.bench_case import get_bench_case_value
from ..utils.common import get_module_members
from ..utils.config import bench_case_filter
from ..utils.custom_types import BenchCase
from ..utils.logger import logger
from ..utils.measurement import measure_case
from ..utils.special_params import assign_case_special_values_on_run
from .common import enrich_result, main_template
def get_function_instance(library_name: str, function_name: str):
_, functions_map = get_module_members(library_name.split("."))
if function_name not in functions_map:
raise ValueError(
f"Unable to find {function_name} function in {library_name} module."
)
if len(functions_map[function_name]) != 1:
logger.debug(
f'List of estimator with name "{function_name}": '
f"{functions_map[function_name]}"
)
logger.warning(
f"Found {len(functions_map[function_name])} classes for "
f'"{function_name}" estimator name. '
f"Using first {functions_map[function_name][0]}."
)
return functions_map[function_name][0]
def get_function_args(bench_case: BenchCase, x_train, y_train, x_test, y_test) -> Tuple:
args_map = {
"x_train": x_train,
"y_train": y_train,
"x_test": x_test,
"y_test": y_test,
}
# order format: "arg1|arg2|...|argN"
args_order = get_bench_case_value(
bench_case, "algorithm:args_order", "x_train|y_train"
)
args = (args_map[arg] for arg in args_order.split("|"))
return args
def measure_function_instance(bench_case, function_instance, args: Tuple, kwargs: Dict):
metrics = dict()
metrics["time[ms]"], metrics["time std[ms]"], _ = measure_case(
bench_case, function_instance, *args, **kwargs
)
return metrics
def main(bench_case: BenchCase, filters: List[BenchCase]):
library_name = get_bench_case_value(bench_case, "algorithm:library")
function_name = get_bench_case_value(bench_case, "algorithm:function")
function_instance = get_function_instance(library_name, function_name)
# load and transform data
data, data_description = load_data(bench_case)
(x_train, x_test, y_train, y_test), data_description = split_and_transform_data(
bench_case, data, data_description
)
# assign special values
assign_case_special_values_on_run(
bench_case, (x_train, y_train, x_test, y_test), data_description
)
function_args = get_function_args(bench_case, x_train, y_train, x_test, y_test)
if not bench_case_filter(bench_case, filters):
logger.warning("Benchmarking case was filtered.")
return list()
metrics = measure_function_instance(
bench_case,
function_instance,
function_args,
get_bench_case_value(bench_case, "algorithm:kwargs", dict()),
)
result = {
"task": "utility",
"function": function_name,
}
result = enrich_result(result, bench_case)
# Replace `x_train` data_desc with more informative values
result.update({
"memory_usage": x_train.nbytes,
"feature_names": list(x_train.columns) if isinstance(x_train, pd.DataFrame) else None,
"class_distribution": dict(pd.Series(y_train).value_counts()) if y_train is not None else None
})
result.update(metrics)
return [result]
if __name__ == "__main__":
main_template(main)