Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Adding Auto-Sklearn #1060

Closed
wants to merge 38 commits into from
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
319b977
add automl
herilalaina Feb 5, 2021
22701aa
final evaluation only on test set
Feb 23, 2021
ad7365a
organize imports
Feb 23, 2021
33ab16d
Merge branch 'master' into add_auto_sklearn
Apr 22, 2021
fbf1be1
upgrade autosklearn version
herilalaina Apr 23, 2021
719aa14
remove configspace.json
herilalaina Apr 23, 2021
5d88551
remove auto-sklearn from requirements.txt
herilalaina Apr 23, 2021
82c09ed
improve get config space
herilalaina Apr 26, 2021
6248658
fix autosklearn version
herilalaina Apr 26, 2021
aa39791
merge
herilalaina Apr 26, 2021
38fa3c8
format code
herilalaina Apr 26, 2021
a820364
still fixing mpy error
herilalaina Apr 26, 2021
41b2ab5
ignore mypy error in experiments
herilalaina Apr 26, 2021
19b27c3
ignore mypy error in experiments
herilalaina Apr 26, 2021
97cf5df
ignore mypy error in experiments
herilalaina Apr 26, 2021
da4059d
tmp commit
herilalaina May 6, 2021
2efba5f
Update nevergrad/functions/automl/core.py
herilalaina Aug 14, 2021
a1acc8f
Update nevergrad/functions/automl/core.py
herilalaina Aug 14, 2021
436e9fd
Update nevergrad/functions/automl/core.py
herilalaina Aug 14, 2021
59319ca
Update nevergrad/functions/automl/ngautosklearn.py
herilalaina Aug 14, 2021
931ce2f
remove auto-sklearn requirements
herilalaina Aug 15, 2021
2ca2593
fix mypy
herilalaina Aug 17, 2021
d99c54b
add submitit
herilalaina Aug 17, 2021
7ade65b
merge
herilalaina Sep 15, 2021
3a9cf12
Merge branch 'main' into add_auto_sklearn
herilalaina Sep 15, 2021
7ac0f34
upgrade auto-sklearn
herilalaina Sep 15, 2021
a754345
fix gym version for the moment.
herilalaina Sep 16, 2021
1c23e9b
add more tests
herilalaina Sep 16, 2021
064cca0
improve tests
herilalaina Sep 16, 2021
861a134
improve tests
herilalaina Sep 16, 2021
a4d2f66
relax constraint on configspace
herilalaina Sep 22, 2021
47b0ec4
Merge branch 'main' into add_auto_sklearn
herilalaina Sep 22, 2021
e7d6a39
Merge branch 'main' into add_auto_sklearn
herilalaina Nov 18, 2021
e4ec0e5
fix requirements
herilalaina Nov 18, 2021
db234f1
test ?
herilalaina Nov 18, 2021
277cba4
test ?
herilalaina Nov 18, 2021
0761d03
merge with main
herilalaina Dec 17, 2021
6089f68
register + fix mypy errors
herilalaina Dec 17, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions nevergrad/benchmark/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from nevergrad.functions import control
from nevergrad.functions import rl
from nevergrad.functions.games import game
from nevergrad.functions.automl import AutoSKlearnBenchmark
from nevergrad.functions.causaldiscovery import CausalDiscovery
from nevergrad.functions import iohprofiler
from nevergrad.functions import helpers
Expand Down Expand Up @@ -138,6 +139,75 @@ def naivemltuning(seed: tp.Optional[int] = None) -> tp.Iterator[Experiment]:
return mltuning(seed, overfitter=True)


def autosklearntuning(seed: tp.Optional[int] = None):
herilalaina marked this conversation as resolved.
Show resolved Hide resolved
seedg = create_seed_generator(seed)

# Only considered small subset of OpenML-CC18
list_tasks = [
3,
11,
15,
18,
23,
29,
31,
37,
45,
49,
53,
2079,
3022,
3549,
3560,
3902,
3903,
3913,
3917,
3918,
9946,
9957,
9964,
9971,
9978,
9981,
10093,
10101,
14954,
125920,
146800,
146817,
146819,
146821,
146822,
]
optims = [
"HyperOpt",
"RandomSearch",
"CMA",
"DE",
"BO",
]
optims += get_optimizers("splitters", seed=next(seedg)) # type: ignore

for budget in [10, 50, 100]:
for task_id in list_tasks:
for algo in optims:
for seed in range(10):
func = AutoSKlearnBenchmark(
openml_task_id=task_id,
cv=3,
overfitter=False,
time_budget_per_run=300,
memory_limit=1024 * 10,
scoring_func="balanced_accuracy",
random_state=next(seedg),
)
xp = Experiment(func, algo, budget, num_workers=1, seed=next(seedg)) # type: ignore
skip_ci(reason="Too slow")
if not xp.is_incoherent:
yield xp


# We register only the sequential counterparts for the moment.
@registry.register
def seq_keras_tuning(seed: tp.Optional[int] = None) -> tp.Iterator[Experiment]:
Expand Down
6 changes: 6 additions & 0 deletions nevergrad/functions/automl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from .core import AutoSKlearnBenchmark as AutoSKlearnBenchmark
98 changes: 98 additions & 0 deletions nevergrad/functions/automl/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Trained policies were extracted from https://github.com/modestyachts/ARS
# under their own license. See ARS_LICENSE file in this file's directory
import nevergrad.common.typing as tp
import numpy as np
import openml
import pynisher # type: ignore

from .ngautosklearn import get_parametrization, get_configuration, _eval_function, get_config_space
from .. import base


class AutoSKlearnBenchmark(base.ExperimentFunction):
def __init__(
self,
openml_task_id: int,
cv: int,
time_budget_per_run: int,
memory_limit: int,
scoring_func: str = "balanced_accuracy",
error_penalty: float = 1.0,
overfitter: bool = False,
random_state: tp.Optional[int] = None,
) -> None:

self.openml_task_id = openml_task_id
self.random_state = random_state
self.cv = cv
self.scoring_func = scoring_func
self.memory_limit = memory_limit
self.time_budget_per_run = time_budget_per_run
self.error_penalty = error_penalty
self.overfitter = overfitter
self.evaluate_on_test = False
herilalaina marked this conversation as resolved.
Show resolved Hide resolved
self.eval_func = pynisher.enforce_limits(
mem_in_mb=memory_limit, wall_time_in_s=self.time_budget_per_run
)(_eval_function)
openml_task = openml.tasks.get_task(openml_task_id)
self.dataset_name = openml_task.get_dataset().name
X, y = openml_task.get_X_and_y()
split = openml_task.get_train_test_split_indices()
self.X_train, self.y_train = X[split[0]], y[split[0]]
self.X_test, self.y_test = X[split[1]], y[split[1]]

self.config_space = get_config_space(X=self.X_train, y=self.y_train)
parametrization = get_parametrization(self.config_space)
parametrization = parametrization.set_name(f"time={time_budget_per_run}")

self.add_descriptors(
openml_task_id=openml_task_id,
cv=cv,
scoring_func=scoring_func,
memory_limit=memory_limit,
time_budget_per_run=time_budget_per_run,
error_penalty=error_penalty,
overfitter=overfitter,
herilalaina marked this conversation as resolved.
Show resolved Hide resolved
dataset_name=self.dataset_name,
)
self._descriptors.pop("random_state", None) # remove it from automatically added descriptors
self.best_loss = np.inf
self.best_config = None
super().__init__(self._simulate, parametrization)

def _simulate(self, **x) -> float:
config = get_configuration(x, self.config_space)
if not self.evaluate_on_test:
loss = self.eval_func(
config=config,
X=self.X_train,
y=self.y_train,
test_data=None,
scoring_func=self.scoring_func,
cv=self.cv,
random_state=self.random_state,
)
else:
loss = self.eval_func(
config=config,
X=self.X_train,
y=self.y_train,
test_data=(self.X_test, self.y_test),
scoring_func=self.scoring_func,
cv=self.cv,
random_state=self.random_state,
)

return loss if isinstance(loss, float) else self.error_penalty

def print_configuration(self, config):
print(get_configuration(config.kwargs, self.config_space))

def evaluation_function(self, *args) -> float:
self.evaluate_on_test = not self.overfitter
herilalaina marked this conversation as resolved.
Show resolved Hide resolved
return super().evaluation_function(*args)
167 changes: 167 additions & 0 deletions nevergrad/functions/automl/ngautosklearn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import warnings

import ConfigSpace as cs # type: ignore
import nevergrad as ng
import numpy as np
import scipy
from sklearn.metrics import get_scorer
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

try:
from autosklearn.constants import BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION # type: ignore
from autosklearn.util.pipeline import get_configuration_space # type: ignore
from autosklearn.pipeline.classification import SimpleClassificationPipeline # type: ignore
except ImportError:
raise ImportError("Auto-Sklearn not installed. Run: python -m pip install auto-sklearn==0.11.0")


def _eval_function(
config: cs.Configuration, X, y, scoring_func: str, cv: int, random_state: int, test_data: tuple = None
):
try:
# Naive skip SVM training for big datasets because it can not be interrupt by pynisher.
if (config["classifier:__choice__"] in ["liblinear_svc", "libsvm_svc"]) and (
X.shape[0] > 1500 or X.shape[1] > 1000
):
return 1

classifier = SimpleClassificationPipeline(config=config, random_state=random_state)
scorer = get_scorer(scoring_func)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if test_data is None:
scores = cross_val_score(
estimator=classifier,
X=X,
y=y,
cv=StratifiedKFold(n_splits=cv, random_state=random_state, shuffle=True),
scoring=scorer,
n_jobs=1,
)
return 1 - np.mean(scores)
else:
classifier.fit(X, y)
return 1 - scorer(classifier, test_data[0], test_data[1])
except Exception:
return 1


def check_configuration(config_space, values):
val_dict = to_dict(values[1])
try:
cs.Configuration(configuration_space=config_space, values=val_dict, allow_inactive_with_values=False)
except Exception:
return False
return True


def get_config_space(X, y):
dataset_properties = {
"task": BINARY_CLASSIFICATION if len(np.unique(y)) == 2 else MULTICLASS_CLASSIFICATION,
"is_sparse": scipy.sparse.issparse(X),
}
return get_configuration_space(dataset_properties)


def get_instrumention(param):
if isinstance(param, cs.hyperparameters.CategoricalHyperparameter):
return ng.p.Choice(param.choices)
elif isinstance(param, cs.hyperparameters.UniformIntegerHyperparameter):
if param.log == False:
return ng.p.Scalar(
lower=param.lower, upper=param.upper, init=param.default_value
).set_integer_casting()
else:
return ng.p.Log(
lower=param.lower, upper=param.upper, init=param.default_value
).set_integer_casting()
elif isinstance(param, cs.hyperparameters.UniformFloatHyperparameter):
if param.log == False:
return ng.p.Scalar(lower=param.lower, upper=param.upper, init=param.default_value)
else:
return ng.p.Log(lower=param.lower, upper=param.upper, init=param.default_value)
elif isinstance(param, cs.hyperparameters.Constant):
return ng.p.Constant(param.value)
raise Exception(r"{param} type not known")


def get_parametrization(config_space: cs.ConfigurationSpace):
base_pipeline = [
"balancing:strategy",
"classifier:__choice__",
"data_preprocessing:categorical_transformer:categorical_encoding:__choice__",
"data_preprocessing:categorical_transformer:category_coalescence:__choice__",
"data_preprocessing:numerical_transformer:imputation:strategy",
"data_preprocessing:numerical_transformer:rescaling:__choice__",
"feature_preprocessor:__choice__",
]

params = {}

for param in config_space.get_hyperparameters():
if param.name in base_pipeline:
if param.name in [
"classifier:__choice__",
"feature_preprocessor:__choice__",
"data_preprocessing:numerical_transformer:rescaling:__choice__",
"data_preprocessing:categorical_transformer:category_coalescence:__choice__",
]:
params[param.name] = ng.p.Choice(
[
ng.p.Tuple(
ng.p.Constant(param_choice),
herilalaina marked this conversation as resolved.
Show resolved Hide resolved
ng.p.Dict(
**{
hp.name: get_instrumention(hp)
for hp in config_space.get_hyperparameters()
if param_choice in hp.name
}
),
)
for param_choice in param.choices
]
)
else:
params[param.name] = get_instrumention(param)

inst = ng.p.Instrumentation(**params)
from functools import partial

constraint_check_func = partial(check_configuration, config_space)
inst.register_cheap_constraint(constraint_check_func)
return inst


def get_configuration(values, config_space):
val_dict = to_dict(values)
return cs.Configuration(
configuration_space=config_space, values=val_dict, allow_inactive_with_values=False
)


def to_dict(values):
clf = values["classifier:__choice__"]
features = values["feature_preprocessor:__choice__"]
trans_cat = values["data_preprocessing:categorical_transformer:category_coalescence:__choice__"]
trans_num = values["data_preprocessing:numerical_transformer:rescaling:__choice__"]
del values["classifier:__choice__"]
del values["feature_preprocessor:__choice__"]
del values["data_preprocessing:categorical_transformer:category_coalescence:__choice__"]
del values["data_preprocessing:numerical_transformer:rescaling:__choice__"]
values["classifier:__choice__"] = clf[0]
values.update(clf[1])
values["feature_preprocessor:__choice__"] = features[0]
values.update(features[1])
values["data_preprocessing:categorical_transformer:category_coalescence:__choice__"] = trans_cat[0]
if len(trans_cat[1]) > 0:
values.update(trans_cat[1])
values["data_preprocessing:numerical_transformer:rescaling:__choice__"] = trans_num[0]
if len(trans_num[1]) > 0:
values.update(trans_num[1])
return values
Loading