@@ -14,86 +14,102 @@ def get_from_config():
14
14
"""Get benchmarks configuration from the config.json file"""
15
15
current_path = Path (__file__ ).resolve ().parent
16
16
17
- config_path = current_path / 'config.json'
18
- with open (config_path , 'r' ) as config_file :
19
- config_file = '' .join (line for line in config_file
20
- if line and '//' not in line )
17
+ config_path = current_path / "config.json"
18
+ with open (config_path , "r" ) as config_file :
19
+ config_file = "" .join (line for line in config_file if line and "//" not in line )
21
20
config = json .loads (config_file )
22
21
23
- profile = os .getenv (' SKLBENCH_PROFILE' , config [' profile' ])
22
+ profile = os .getenv (" SKLBENCH_PROFILE" , config [" profile" ])
24
23
25
- n_jobs_vals_env = os .getenv (' SKLBENCH_NJOBS' )
24
+ n_jobs_vals_env = os .getenv (" SKLBENCH_NJOBS" )
26
25
if n_jobs_vals_env :
27
26
n_jobs_vals = eval (n_jobs_vals_env )
28
27
else :
29
- n_jobs_vals = config [' n_jobs_vals' ]
28
+ n_jobs_vals = config [" n_jobs_vals" ]
30
29
if not n_jobs_vals :
31
30
n_jobs_vals = list (range (1 , 1 + cpu_count ()))
32
31
33
- cache_path = current_path / ' cache'
32
+ cache_path = current_path / " cache"
34
33
cache_path .mkdir (exist_ok = True )
35
- (cache_path / ' estimators' ).mkdir (exist_ok = True )
36
- (cache_path / ' tmp' ).mkdir (exist_ok = True )
34
+ (cache_path / " estimators" ).mkdir (exist_ok = True )
35
+ (cache_path / " tmp" ).mkdir (exist_ok = True )
37
36
38
- save_estimators = os .getenv ('SKLBENCH_SAVE_ESTIMATORS' ,
39
- config ['save_estimators' ])
40
- save_dir = os .getenv ('ASV_COMMIT' , 'new' )[:8 ]
37
+ save_estimators = os .getenv ("SKLBENCH_SAVE_ESTIMATORS" , config ["save_estimators" ])
38
+ save_dir = os .getenv ("ASV_COMMIT" , "new" )[:8 ]
41
39
42
40
if save_estimators :
43
- (cache_path / ' estimators' / save_dir ).mkdir (exist_ok = True )
41
+ (cache_path / " estimators" / save_dir ).mkdir (exist_ok = True )
44
42
45
- base_commit = os .getenv (' SKLBENCH_BASE_COMMIT' , config [' base_commit' ])
43
+ base_commit = os .getenv (" SKLBENCH_BASE_COMMIT" , config [" base_commit" ])
46
44
47
- bench_predict = os .getenv ('SKLBENCH_PREDICT' , config ['bench_predict' ])
48
- bench_transform = os .getenv ('SKLBENCH_TRANSFORM' ,
49
- config ['bench_transform' ])
45
+ bench_predict = os .getenv ("SKLBENCH_PREDICT" , config ["bench_predict" ])
46
+ bench_transform = os .getenv ("SKLBENCH_TRANSFORM" , config ["bench_transform" ])
50
47
51
- return (profile , n_jobs_vals , save_estimators , save_dir , base_commit ,
52
- bench_predict , bench_transform )
48
+ return (
49
+ profile ,
50
+ n_jobs_vals ,
51
+ save_estimators ,
52
+ save_dir ,
53
+ base_commit ,
54
+ bench_predict ,
55
+ bench_transform ,
56
+ )
53
57
54
58
55
59
def get_estimator_path (benchmark , directory , params , save = False ):
56
60
"""Get path of pickled fitted estimator"""
57
- path = Path (__file__ ).resolve ().parent / ' cache'
58
- path = (path / ' estimators' / directory ) if save else (path / ' tmp' )
61
+ path = Path (__file__ ).resolve ().parent / " cache"
62
+ path = (path / " estimators" / directory ) if save else (path / " tmp" )
59
63
60
- filename = (benchmark .__class__ .__name__
61
- + '_estimator_' + '_' .join (list (map (str , params ))) + '.pkl' )
64
+ filename = (
65
+ benchmark .__class__ .__name__
66
+ + "_estimator_"
67
+ + "_" .join (list (map (str , params )))
68
+ + ".pkl"
69
+ )
62
70
63
71
return path / filename
64
72
65
73
66
74
def clear_tmp ():
67
75
"""Clean the tmp directory"""
68
- path = Path (__file__ ).resolve ().parent / ' cache' / ' tmp'
76
+ path = Path (__file__ ).resolve ().parent / " cache" / " tmp"
69
77
for child in path .iterdir ():
70
78
child .unlink ()
71
79
72
80
73
81
class Benchmark (ABC ):
74
82
"""Abstract base class for all the benchmarks"""
83
+
75
84
timer = timeit .default_timer # wall time
76
85
processes = 1
77
86
timeout = 500
78
87
79
- (profile , n_jobs_vals , save_estimators , save_dir , base_commit ,
80
- bench_predict , bench_transform ) = get_from_config ()
81
-
82
- if profile == 'fast' :
88
+ (
89
+ profile ,
90
+ n_jobs_vals ,
91
+ save_estimators ,
92
+ save_dir ,
93
+ base_commit ,
94
+ bench_predict ,
95
+ bench_transform ,
96
+ ) = get_from_config ()
97
+
98
+ if profile == "fast" :
83
99
warmup_time = 0
84
100
repeat = 1
85
101
number = 1
86
102
min_run_count = 1
87
- data_size = ' small'
88
- elif profile == ' regular' :
103
+ data_size = " small"
104
+ elif profile == " regular" :
89
105
warmup_time = 1
90
106
repeat = (3 , 100 , 30 )
91
- data_size = ' small'
92
- elif profile == ' large_scale' :
107
+ data_size = " small"
108
+ elif profile == " large_scale" :
93
109
warmup_time = 1
94
110
repeat = 3
95
111
number = 1
96
- data_size = ' large'
112
+ data_size = " large"
97
113
98
114
@property
99
115
@abstractmethod
@@ -103,6 +119,7 @@ def params(self):
103
119
104
120
class Estimator (ABC ):
105
121
"""Abstract base class for all benchmarks of estimators"""
122
+
106
123
@abstractmethod
107
124
def make_data (self , params ):
108
125
"""Return the dataset for a combination of parameters"""
@@ -112,8 +129,7 @@ def make_data(self, params):
112
129
113
130
@abstractmethod
114
131
def make_estimator (self , params ):
115
- """Return an instance of the estimator for a combination of parameters
116
- """
132
+ """Return an instance of the estimator for a combination of parameters"""
117
133
pass
118
134
119
135
def skip (self , params ):
@@ -137,9 +153,10 @@ def setup_cache(self):
137
153
138
154
estimator .fit (X , y )
139
155
140
- est_path = get_estimator_path (self , Benchmark .save_dir ,
141
- params , Benchmark .save_estimators )
142
- with est_path .open (mode = 'wb' ) as f :
156
+ est_path = get_estimator_path (
157
+ self , Benchmark .save_dir , params , Benchmark .save_estimators
158
+ )
159
+ with est_path .open (mode = "wb" ) as f :
143
160
pickle .dump (estimator , f )
144
161
145
162
def setup (self , * params ):
@@ -152,9 +169,10 @@ def setup(self, *params):
152
169
153
170
self .X , self .X_val , self .y , self .y_val = self .make_data (params )
154
171
155
- est_path = get_estimator_path (self , Benchmark .save_dir ,
156
- params , Benchmark .save_estimators )
157
- with est_path .open (mode = 'rb' ) as f :
172
+ est_path = get_estimator_path (
173
+ self , Benchmark .save_dir , params , Benchmark .save_estimators
174
+ )
175
+ with est_path .open (mode = "rb" ) as f :
158
176
self .estimator = pickle .load (f )
159
177
160
178
self .make_scorers ()
@@ -166,14 +184,14 @@ def peakmem_fit(self, *args):
166
184
self .estimator .fit (self .X , self .y )
167
185
168
186
def track_train_score (self , * args ):
169
- if hasattr (self .estimator , ' predict' ):
187
+ if hasattr (self .estimator , " predict" ):
170
188
y_pred = self .estimator .predict (self .X )
171
189
else :
172
190
y_pred = None
173
191
return float (self .train_scorer (self .y , y_pred ))
174
192
175
193
def track_test_score (self , * args ):
176
- if hasattr (self .estimator , ' predict' ):
194
+ if hasattr (self .estimator , " predict" ):
177
195
y_val_pred = self .estimator .predict (self .X_val )
178
196
else :
179
197
y_val_pred = None
@@ -182,18 +200,20 @@ def track_test_score(self, *args):
182
200
183
201
class Predictor (ABC ):
184
202
"""Abstract base class for benchmarks of estimators implementing predict"""
203
+
185
204
if Benchmark .bench_predict :
205
+
186
206
def time_predict (self , * args ):
187
207
self .estimator .predict (self .X )
188
208
189
209
def peakmem_predict (self , * args ):
190
210
self .estimator .predict (self .X )
191
211
192
212
if Benchmark .base_commit is not None :
213
+
193
214
def track_same_prediction (self , * args ):
194
- est_path = get_estimator_path (self , Benchmark .base_commit ,
195
- args , True )
196
- with est_path .open (mode = 'rb' ) as f :
215
+ est_path = get_estimator_path (self , Benchmark .base_commit , args , True )
216
+ with est_path .open (mode = "rb" ) as f :
197
217
estimator_base = pickle .load (f )
198
218
199
219
y_val_pred_base = estimator_base .predict (self .X_val )
@@ -208,20 +228,21 @@ def params(self):
208
228
209
229
210
230
class Transformer (ABC ):
211
- """Abstract base class for benchmarks of estimators implementing transform
212
- """
231
+ """Abstract base class for benchmarks of estimators implementing transform"""
232
+
213
233
if Benchmark .bench_transform :
234
+
214
235
def time_transform (self , * args ):
215
236
self .estimator .transform (self .X )
216
237
217
238
def peakmem_transform (self , * args ):
218
239
self .estimator .transform (self .X )
219
240
220
241
if Benchmark .base_commit is not None :
242
+
221
243
def track_same_transform (self , * args ):
222
- est_path = get_estimator_path (self , Benchmark .base_commit ,
223
- args , True )
224
- with est_path .open (mode = 'rb' ) as f :
244
+ est_path = get_estimator_path (self , Benchmark .base_commit , args , True )
245
+ with est_path .open (mode = "rb" ) as f :
225
246
estimator_base = pickle .load (f )
226
247
227
248
X_val_t_base = estimator_base .transform (self .X_val )
0 commit comments