Skip to content

Commit 82df489

Browse files
authored
MNT Applies black formatting to most of the code base (scikit-learn#18948)
1 parent 0e7761c commit 82df489

File tree

513 files changed

+59810
-42580
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

513 files changed

+59810
-42580
lines changed

.github/scripts/label_title_regex.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,9 @@
1515
title = issue.title
1616

1717

18-
regex_to_labels = [
19-
(r"\bDOC\b", "Documentation"),
20-
(r"\bCI\b", "Build / CI")
21-
]
18+
regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")]
2219

23-
labels_to_add = [
24-
label for regex, label in regex_to_labels
25-
if re.search(regex, title)
26-
]
20+
labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)]
2721

2822
if labels_to_add:
2923
issue.add_to_labels(*labels_to_add)

asv_benchmarks/benchmarks/cluster.py

+43-39
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@ class KMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
1010
Benchmarks for KMeans.
1111
"""
1212

13-
param_names = ['representation', 'algorithm', 'init']
14-
params = (['dense', 'sparse'], ['full', 'elkan'], ['random', 'k-means++'])
13+
param_names = ["representation", "algorithm", "init"]
14+
params = (["dense", "sparse"], ["full", "elkan"], ["random", "k-means++"])
1515

1616
def setup_cache(self):
1717
super().setup_cache()
1818

1919
def make_data(self, params):
2020
representation, algorithm, init = params
2121

22-
if representation == 'sparse':
22+
if representation == "sparse":
2323
data = _20newsgroups_highdim_dataset(n_samples=8000)
2424
else:
2525
data = _blobs_dataset(n_clusters=20)
@@ -29,44 +29,46 @@ def make_data(self, params):
2929
def make_estimator(self, params):
3030
representation, algorithm, init = params
3131

32-
max_iter = 30 if representation == 'sparse' else 100
32+
max_iter = 30 if representation == "sparse" else 100
3333

34-
estimator = KMeans(n_clusters=20,
35-
algorithm=algorithm,
36-
init=init,
37-
n_init=1,
38-
max_iter=max_iter,
39-
tol=-1,
40-
random_state=0)
34+
estimator = KMeans(
35+
n_clusters=20,
36+
algorithm=algorithm,
37+
init=init,
38+
n_init=1,
39+
max_iter=max_iter,
40+
tol=-1,
41+
random_state=0,
42+
)
4143

4244
return estimator
4345

4446
def make_scorers(self):
45-
self.train_scorer = (
46-
lambda _, __: neg_mean_inertia(self.X,
47-
self.estimator.predict(self.X),
48-
self.estimator.cluster_centers_))
49-
self.test_scorer = (
50-
lambda _, __: neg_mean_inertia(self.X_val,
51-
self.estimator.predict(self.X_val),
52-
self.estimator.cluster_centers_))
47+
self.train_scorer = lambda _, __: neg_mean_inertia(
48+
self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
49+
)
50+
self.test_scorer = lambda _, __: neg_mean_inertia(
51+
self.X_val,
52+
self.estimator.predict(self.X_val),
53+
self.estimator.cluster_centers_,
54+
)
5355

5456

5557
class MiniBatchKMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
5658
"""
5759
Benchmarks for MiniBatchKMeans.
5860
"""
5961

60-
param_names = ['representation', 'init']
61-
params = (['dense', 'sparse'], ['random', 'k-means++'])
62+
param_names = ["representation", "init"]
63+
params = (["dense", "sparse"], ["random", "k-means++"])
6264

6365
def setup_cache(self):
6466
super().setup_cache()
6567

6668
def make_data(self, params):
6769
representation, init = params
6870

69-
if representation == 'sparse':
71+
if representation == "sparse":
7072
data = _20newsgroups_highdim_dataset()
7173
else:
7274
data = _blobs_dataset(n_clusters=20)
@@ -76,25 +78,27 @@ def make_data(self, params):
7678
def make_estimator(self, params):
7779
representation, init = params
7880

79-
max_iter = 5 if representation == 'sparse' else 2
81+
max_iter = 5 if representation == "sparse" else 2
8082

81-
estimator = MiniBatchKMeans(n_clusters=20,
82-
init=init,
83-
n_init=1,
84-
max_iter=max_iter,
85-
batch_size=1000,
86-
max_no_improvement=None,
87-
compute_labels=False,
88-
random_state=0)
83+
estimator = MiniBatchKMeans(
84+
n_clusters=20,
85+
init=init,
86+
n_init=1,
87+
max_iter=max_iter,
88+
batch_size=1000,
89+
max_no_improvement=None,
90+
compute_labels=False,
91+
random_state=0,
92+
)
8993

9094
return estimator
9195

9296
def make_scorers(self):
93-
self.train_scorer = (
94-
lambda _, __: neg_mean_inertia(self.X,
95-
self.estimator.predict(self.X),
96-
self.estimator.cluster_centers_))
97-
self.test_scorer = (
98-
lambda _, __: neg_mean_inertia(self.X_val,
99-
self.estimator.predict(self.X_val),
100-
self.estimator.cluster_centers_))
97+
self.train_scorer = lambda _, __: neg_mean_inertia(
98+
self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
99+
)
100+
self.test_scorer = lambda _, __: neg_mean_inertia(
101+
self.X_val,
102+
self.estimator.predict(self.X_val),
103+
self.estimator.cluster_centers_,
104+
)

asv_benchmarks/benchmarks/common.py

+73-52
Original file line numberDiff line numberDiff line change
@@ -14,86 +14,102 @@ def get_from_config():
1414
"""Get benchmarks configuration from the config.json file"""
1515
current_path = Path(__file__).resolve().parent
1616

17-
config_path = current_path / 'config.json'
18-
with open(config_path, 'r') as config_file:
19-
config_file = ''.join(line for line in config_file
20-
if line and '//' not in line)
17+
config_path = current_path / "config.json"
18+
with open(config_path, "r") as config_file:
19+
config_file = "".join(line for line in config_file if line and "//" not in line)
2120
config = json.loads(config_file)
2221

23-
profile = os.getenv('SKLBENCH_PROFILE', config['profile'])
22+
profile = os.getenv("SKLBENCH_PROFILE", config["profile"])
2423

25-
n_jobs_vals_env = os.getenv('SKLBENCH_NJOBS')
24+
n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS")
2625
if n_jobs_vals_env:
2726
n_jobs_vals = eval(n_jobs_vals_env)
2827
else:
29-
n_jobs_vals = config['n_jobs_vals']
28+
n_jobs_vals = config["n_jobs_vals"]
3029
if not n_jobs_vals:
3130
n_jobs_vals = list(range(1, 1 + cpu_count()))
3231

33-
cache_path = current_path / 'cache'
32+
cache_path = current_path / "cache"
3433
cache_path.mkdir(exist_ok=True)
35-
(cache_path / 'estimators').mkdir(exist_ok=True)
36-
(cache_path / 'tmp').mkdir(exist_ok=True)
34+
(cache_path / "estimators").mkdir(exist_ok=True)
35+
(cache_path / "tmp").mkdir(exist_ok=True)
3736

38-
save_estimators = os.getenv('SKLBENCH_SAVE_ESTIMATORS',
39-
config['save_estimators'])
40-
save_dir = os.getenv('ASV_COMMIT', 'new')[:8]
37+
save_estimators = os.getenv("SKLBENCH_SAVE_ESTIMATORS", config["save_estimators"])
38+
save_dir = os.getenv("ASV_COMMIT", "new")[:8]
4139

4240
if save_estimators:
43-
(cache_path / 'estimators' / save_dir).mkdir(exist_ok=True)
41+
(cache_path / "estimators" / save_dir).mkdir(exist_ok=True)
4442

45-
base_commit = os.getenv('SKLBENCH_BASE_COMMIT', config['base_commit'])
43+
base_commit = os.getenv("SKLBENCH_BASE_COMMIT", config["base_commit"])
4644

47-
bench_predict = os.getenv('SKLBENCH_PREDICT', config['bench_predict'])
48-
bench_transform = os.getenv('SKLBENCH_TRANSFORM',
49-
config['bench_transform'])
45+
bench_predict = os.getenv("SKLBENCH_PREDICT", config["bench_predict"])
46+
bench_transform = os.getenv("SKLBENCH_TRANSFORM", config["bench_transform"])
5047

51-
return (profile, n_jobs_vals, save_estimators, save_dir, base_commit,
52-
bench_predict, bench_transform)
48+
return (
49+
profile,
50+
n_jobs_vals,
51+
save_estimators,
52+
save_dir,
53+
base_commit,
54+
bench_predict,
55+
bench_transform,
56+
)
5357

5458

5559
def get_estimator_path(benchmark, directory, params, save=False):
5660
"""Get path of pickled fitted estimator"""
57-
path = Path(__file__).resolve().parent / 'cache'
58-
path = (path / 'estimators' / directory) if save else (path / 'tmp')
61+
path = Path(__file__).resolve().parent / "cache"
62+
path = (path / "estimators" / directory) if save else (path / "tmp")
5963

60-
filename = (benchmark.__class__.__name__
61-
+ '_estimator_' + '_'.join(list(map(str, params))) + '.pkl')
64+
filename = (
65+
benchmark.__class__.__name__
66+
+ "_estimator_"
67+
+ "_".join(list(map(str, params)))
68+
+ ".pkl"
69+
)
6270

6371
return path / filename
6472

6573

6674
def clear_tmp():
6775
"""Clean the tmp directory"""
68-
path = Path(__file__).resolve().parent / 'cache' / 'tmp'
76+
path = Path(__file__).resolve().parent / "cache" / "tmp"
6977
for child in path.iterdir():
7078
child.unlink()
7179

7280

7381
class Benchmark(ABC):
7482
"""Abstract base class for all the benchmarks"""
83+
7584
timer = timeit.default_timer # wall time
7685
processes = 1
7786
timeout = 500
7887

79-
(profile, n_jobs_vals, save_estimators, save_dir, base_commit,
80-
bench_predict, bench_transform) = get_from_config()
81-
82-
if profile == 'fast':
88+
(
89+
profile,
90+
n_jobs_vals,
91+
save_estimators,
92+
save_dir,
93+
base_commit,
94+
bench_predict,
95+
bench_transform,
96+
) = get_from_config()
97+
98+
if profile == "fast":
8399
warmup_time = 0
84100
repeat = 1
85101
number = 1
86102
min_run_count = 1
87-
data_size = 'small'
88-
elif profile == 'regular':
103+
data_size = "small"
104+
elif profile == "regular":
89105
warmup_time = 1
90106
repeat = (3, 100, 30)
91-
data_size = 'small'
92-
elif profile == 'large_scale':
107+
data_size = "small"
108+
elif profile == "large_scale":
93109
warmup_time = 1
94110
repeat = 3
95111
number = 1
96-
data_size = 'large'
112+
data_size = "large"
97113

98114
@property
99115
@abstractmethod
@@ -103,6 +119,7 @@ def params(self):
103119

104120
class Estimator(ABC):
105121
"""Abstract base class for all benchmarks of estimators"""
122+
106123
@abstractmethod
107124
def make_data(self, params):
108125
"""Return the dataset for a combination of parameters"""
@@ -112,8 +129,7 @@ def make_data(self, params):
112129

113130
@abstractmethod
114131
def make_estimator(self, params):
115-
"""Return an instance of the estimator for a combination of parameters
116-
"""
132+
"""Return an instance of the estimator for a combination of parameters"""
117133
pass
118134

119135
def skip(self, params):
@@ -137,9 +153,10 @@ def setup_cache(self):
137153

138154
estimator.fit(X, y)
139155

140-
est_path = get_estimator_path(self, Benchmark.save_dir,
141-
params, Benchmark.save_estimators)
142-
with est_path.open(mode='wb') as f:
156+
est_path = get_estimator_path(
157+
self, Benchmark.save_dir, params, Benchmark.save_estimators
158+
)
159+
with est_path.open(mode="wb") as f:
143160
pickle.dump(estimator, f)
144161

145162
def setup(self, *params):
@@ -152,9 +169,10 @@ def setup(self, *params):
152169

153170
self.X, self.X_val, self.y, self.y_val = self.make_data(params)
154171

155-
est_path = get_estimator_path(self, Benchmark.save_dir,
156-
params, Benchmark.save_estimators)
157-
with est_path.open(mode='rb') as f:
172+
est_path = get_estimator_path(
173+
self, Benchmark.save_dir, params, Benchmark.save_estimators
174+
)
175+
with est_path.open(mode="rb") as f:
158176
self.estimator = pickle.load(f)
159177

160178
self.make_scorers()
@@ -166,14 +184,14 @@ def peakmem_fit(self, *args):
166184
self.estimator.fit(self.X, self.y)
167185

168186
def track_train_score(self, *args):
169-
if hasattr(self.estimator, 'predict'):
187+
if hasattr(self.estimator, "predict"):
170188
y_pred = self.estimator.predict(self.X)
171189
else:
172190
y_pred = None
173191
return float(self.train_scorer(self.y, y_pred))
174192

175193
def track_test_score(self, *args):
176-
if hasattr(self.estimator, 'predict'):
194+
if hasattr(self.estimator, "predict"):
177195
y_val_pred = self.estimator.predict(self.X_val)
178196
else:
179197
y_val_pred = None
@@ -182,18 +200,20 @@ def track_test_score(self, *args):
182200

183201
class Predictor(ABC):
184202
"""Abstract base class for benchmarks of estimators implementing predict"""
203+
185204
if Benchmark.bench_predict:
205+
186206
def time_predict(self, *args):
187207
self.estimator.predict(self.X)
188208

189209
def peakmem_predict(self, *args):
190210
self.estimator.predict(self.X)
191211

192212
if Benchmark.base_commit is not None:
213+
193214
def track_same_prediction(self, *args):
194-
est_path = get_estimator_path(self, Benchmark.base_commit,
195-
args, True)
196-
with est_path.open(mode='rb') as f:
215+
est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
216+
with est_path.open(mode="rb") as f:
197217
estimator_base = pickle.load(f)
198218

199219
y_val_pred_base = estimator_base.predict(self.X_val)
@@ -208,20 +228,21 @@ def params(self):
208228

209229

210230
class Transformer(ABC):
211-
"""Abstract base class for benchmarks of estimators implementing transform
212-
"""
231+
"""Abstract base class for benchmarks of estimators implementing transform"""
232+
213233
if Benchmark.bench_transform:
234+
214235
def time_transform(self, *args):
215236
self.estimator.transform(self.X)
216237

217238
def peakmem_transform(self, *args):
218239
self.estimator.transform(self.X)
219240

220241
if Benchmark.base_commit is not None:
242+
221243
def track_same_transform(self, *args):
222-
est_path = get_estimator_path(self, Benchmark.base_commit,
223-
args, True)
224-
with est_path.open(mode='rb') as f:
244+
est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
245+
with est_path.open(mode="rb") as f:
225246
estimator_base = pickle.load(f)
226247

227248
X_val_t_base = estimator_base.transform(self.X_val)

0 commit comments

Comments
 (0)