diff --git a/CHANGELOG.md b/CHANGELOG.md index 501919b07..000dcd804 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ # 2.3.0 +## Features +- Improved batch sampling: Fantasize points in batch/parallel mode (#1154). + ## Documentation - Update windows install guide (#952) - Correct intensifier for Algorithm Configuration Facade (#1162, #1165) diff --git a/examples/1_basics/7_0_parallelization_fantasize.py b/examples/1_basics/7_0_parallelization_fantasize.py new file mode 100644 index 000000000..0c2a4dec1 --- /dev/null +++ b/examples/1_basics/7_0_parallelization_fantasize.py @@ -0,0 +1,112 @@ +"""Example of using SMAC with parallelization and fantasization vs. no estimation for pending evaluations. + +This example will take some time because the target function is artificially slowed down to demonstrate the effect of +fantasization. The example will plot the incumbent found by SMAC with and without fantasization. +""" +from __future__ import annotations + +import numpy as np +from ConfigSpace import Configuration, ConfigurationSpace, Float + +from matplotlib import pyplot as plt + +from smac import BlackBoxFacade, Scenario +from smac.facade import AbstractFacade + +from rich import inspect +import time + +def plot_trajectory(facades: list[AbstractFacade], names: list[str]) -> None: + # Plot incumbent + cmap = plt.get_cmap("tab10") + + fig = plt.figure() + axes = fig.subplots(1, 2) + + for ax_i, x_axis in zip(axes, ["walltime", "trial"]): + for i, facade in enumerate(facades): + X, Y = [], [] + inspect(facade.intensifier.trajectory) + for item in facade.intensifier.trajectory: + # Single-objective optimization + assert len(item.config_ids) == 1 + assert len(item.costs) == 1 + + y = item.costs[0] + x = getattr(item, x_axis) + + X.append(x) + Y.append(y) + + ax_i.plot(X, Y, label=names[i], color=cmap(i)) + ax_i.scatter(X, Y, marker="x", color=cmap(i)) + ax_i.set_xlabel(x_axis) + ax_i.set_ylabel(facades[0].scenario.objectives) + ax_i.set_yscale("log") + ax_i.legend() + + plt.show() + +class Branin(): + @property + def configspace(self) -> ConfigurationSpace: + # Build Configuration Space which defines all parameters and their ranges + cs = ConfigurationSpace(seed=0) + + # First we create our hyperparameters + x1 = Float("x1", (-5, 10), default=0) + x2 = Float("x2", (0, 15), default=0) + + # Add hyperparameters and conditions to our configspace + cs.add([x1, x2]) + + time.sleep(10) + + return cs + + def train(self, config: Configuration, seed: int) -> float: + x1 = config["x1"] + x2 = config["x2"] + a = 1.0 + b = 5.1 / (4.0 * np.pi**2) + c = 5.0 / np.pi + r = 6.0 + s = 10.0 + t = 1.0 / (8.0 * np.pi) + + cost = a * (x2 - b * x1**2 + c * x1 - r) ** 2 + s * (1 - t) * np.cos(x1) + s + regret = cost - 0.397887 + + return regret + +if __name__ == "__main__": + seed = 345455 + scenario = Scenario(n_trials=100, configspace=Branin().configspace, n_workers=4, seed=seed) + facade = BlackBoxFacade + + smac_noestimation = facade( + scenario=scenario, + target_function=Branin().train, + overwrite=True, + ) + smac_fantasize = facade( + scenario=scenario, + target_function=Branin().train, + config_selector=facade.get_config_selector( + scenario=scenario, + batch_sampling_estimation_strategy="kriging_believer" + ), + overwrite=True, + logging_level=0 + ) + + incumbent_noestimation = smac_noestimation.optimize() + incumbent_fantasize = smac_fantasize.optimize() + + plot_trajectory(facades=[ + smac_noestimation, + smac_fantasize, + ], names=["No Estimation", "Fantasize"]) + + del smac_noestimation + del smac_fantasize diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py index 048378802..69c75cb72 100644 --- a/smac/facade/abstract_facade.py +++ b/smac/facade/abstract_facade.py @@ -423,9 +423,47 @@ def get_config_selector( *, retrain_after: int = 8, retries: int = 16, + min_trials: int = 1, + batch_sampling_estimation_strategy: str = "no_estimate", ) -> ConfigSelector: - """Returns the default configuration selector.""" - return ConfigSelector(scenario, retrain_after=retrain_after, max_new_config_tries=retries) + """Returns the default configuration selector. + + Parameters + ---------- + retrain_after : int, defaults to 8 + How many configurations should be returned before the surrogate model is retrained. + retries : int, defaults to 16 + How often to retry receiving a new configuration before giving up. + min_trials: int, defaults to 1 + How many samples are required to train the surrogate model. If budgets are involved, + the highest budgets are checked first. For example, if min_trials is three, but we find only + two trials in the runhistory for the highest budget, we will use trials of a lower budget + instead. + batch_sampling_estimation_strategy: str, defaults to no_estimate + + Warning: This is intended to work in the black box optimization setting with a Gaussian Process and + only works sensibly for non-multifidelity. + + Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might + need to suggest new samples while some configurations are still running. This argument determines if we want + to make use of this information and fantasize the new estimations. If no_estimate is applied, we do not use + the information from the running configurations. If the strategy is kriging_believer, we use the predicted + mean from our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we + use the min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy + is sample, we use our surrogate model (in this case, only GP is allowed) to sample new configurations. + + Returns + ------- + ConfigSelector + The instantiated configuration selector proposing new configurations (optimize acquisition function). + """ + return ConfigSelector( + scenario, + retrain_after=retrain_after, + max_new_config_tries=retries, + min_trials=min_trials, + batch_sampling_estimation_strategy=batch_sampling_estimation_strategy, + ) def _get_optimizer(self) -> SMBO: """Fills the SMBO with all the pre-initialized components.""" diff --git a/smac/facade/blackbox_facade.py b/smac/facade/blackbox_facade.py index ac4936f78..95e96ba96 100644 --- a/smac/facade/blackbox_facade.py +++ b/smac/facade/blackbox_facade.py @@ -318,9 +318,41 @@ def get_config_selector( scenario: Scenario, *, retrain_after: int = 1, + min_trials: int = 1, retries: int = 16, + batch_sampling_estimation_strategy: str = "no_estimate", ) -> ConfigSelector: - """Returns the default configuration selector.""" + """Returns the default configuration selector. + + Parameters + ---------- + retrain_after : int, defaults to 1 + How many configurations should be returned before the surrogate model is retrained. + retries : int, defaults to 16 + How often to retry receiving a new configuration before giving up. + min_trials: int, defaults to 1 + How many samples are required to train the surrogate model. If budgets are involved, + the highest budgets are checked first. For example, if min_trials is three, but we find only + two trials in the runhistory for the highest budget, we will use trials of a lower budget + instead. + batch_sampling_estimation_strategy: str, defaults to no_estimation + Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might + need to suggest new samples while some configurations are still running. This argument determines if we want + to make use of this information and fantasize the new estimations. If no_estimate is applied, we do not use + the information from the running configurations. If the strategy is kriging_believer, we use the predicted + mean from our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we + use the min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy + is sample, we use our surrogate model (in this case, only GP is allowed) to sample new configurations. + + Returns + ------- + ConfigSelector + The instantiated configuration selector proposing new configurations (optimize acquisition function). + """ return super(BlackBoxFacade, BlackBoxFacade).get_config_selector( - scenario, retrain_after=retrain_after, retries=retries + scenario, + retrain_after=retrain_after, + min_trials=min_trials, + retries=retries, + batch_sampling_estimation_strategy=batch_sampling_estimation_strategy, ) diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py index 4b8f62068..55f29db38 100644 --- a/smac/main/config_selector.py +++ b/smac/main/config_selector.py @@ -16,6 +16,8 @@ from smac.callback.callback import Callback from smac.initial_design import AbstractInitialDesign from smac.model.abstract_model import AbstractModel +from smac.model.gaussian_process import GaussianProcess +from smac.model.random_forest import RandomForest from smac.random_design.abstract_random_design import AbstractRandomDesign from smac.runhistory.encoder.abstract_encoder import AbstractRunHistoryEncoder from smac.runhistory.runhistory import RunHistory @@ -44,6 +46,14 @@ class ConfigSelector: the highest budgets are checked first. For example, if min_trials is three, but we find only two trials in the runhistory for the highest budget, we will use trials of a lower budget instead. + batch_sampling_estimation_strategy: str, defaults to no_estimation + Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might need + to suggest new samples while some configurations are still running. This argument determines if we want to make + use of this information and fantasize the new estimations. If no_estimate is applied, we do not use the + information from the running configurations. If the strategy is kriging_believer, we use the predicted mean from + our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we use the + min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy is sample, + we use our surrogate model (in this case, only GP is allowed) to sample new configurations. """ def __init__( @@ -53,6 +63,7 @@ def __init__( retrain_after: int = 8, max_new_config_tries: int = 16, min_trials: int = 1, + batch_sampling_estimation_strategy: str = "no_estimate", ) -> None: # Those are the configs sampled from the passed initial design # Selecting configurations from initial design @@ -82,6 +93,9 @@ def __init__( # Processed configurations should be stored here; this is important to not return the same configuration twice self._processed_configs: list[Configuration] = [] + # for batch sampling setting + self._batch_sampling_estimation_strategy = batch_sampling_estimation_strategy + def _set_components( self, initial_design: AbstractInitialDesign, @@ -286,6 +300,24 @@ def _collect_data(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: # Possible add running configs? configs_array = self._runhistory_encoder.get_configurations(budget_subset=self._considered_budgets) + # add running configurations + # If our batch size is 1, then no running configuration should exist, we could then skip this part. + # Therefore, there is no need to check the number of workers in this case + + X_running = self._runhistory_encoder.transform_running_configs(budget_subset=[b]) + if self._batch_sampling_estimation_strategy != "no_estimate": + Y_estimated = self.estimate_running_config_costs( + X_running, Y, self._batch_sampling_estimation_strategy + ) + # if there is no running configurations, we directly return X, Y and configs_array + if Y_estimated is not None: + configs_array_running = self._runhistory_encoder.get_running_configurations( + budget_subset=self._considered_budgets + ) + X = np.concatenate([X, X_running], axis=0) + Y = np.concatenate([Y, Y_estimated], axis=0) + configs_array = np.concatenate([configs_array, configs_array_running], axis=0) + return X, Y, configs_array return ( @@ -302,6 +334,76 @@ def _get_evaluated_configs(self) -> list[Configuration]: assert self._runhistory is not None return self._runhistory.get_configs_per_budget(budget_subset=self._considered_budgets) + def estimate_running_config_costs( + self, X_running: np.ndarray, Y_evaluated: np.ndarray, estimation_strategy: str = "CL_max" + ) -> np.ndarray: + """This function is implemented to estimate the still pending/ running configurations + + Parameters + ---------- + X_running : np.ndarray + a np array with size (n_running_configs, D) that represents the array values of the running configurations + Y_evaluated : np.ndarray + a np array with size (n_evaluated_configs, n_obj) that records the costs of all the previous evaluated + configurations + + estimation_strategy: str + how do we estimate the target y_running values, we have the following strategy: + CL_max: constant liar max, we take the maximal of all the evaluated Y and apply them to the running X + CL_min: constant liar min, we take the minimal of all the evaluated Y and apply them to the running X + CL_mean: constant liar mean, we take the mean of all the evaluated Y and apply them to the running X + kriging_believer: kriging believer, we apply the predicted means from the surrogate model to running X + values + sample: estimations for X are sampled from the surrogate models. Since the samples need to be sampled from a + joint distribution for all X, we only allow sample strategy with GP as surrogate models. + + Returns + ------- + Y_running_estimated : np.ndarray + the estimated running y values + """ + n_running_points = len(X_running) + if n_running_points == 0: + return None + if estimation_strategy == "CL_max": + # constant liar max, we take the maximal values of all the evaluated Y and apply them to the running X + Y_estimated = np.nanmax(Y_evaluated, axis=0, keepdims=True) + return np.repeat(Y_estimated, n_running_points, 0) + elif estimation_strategy == "CL_min": + # constant liar min, we take the minimal values of all the evaluated Y and apply them to the running X + Y_estimated = np.nanmin(Y_evaluated, axis=0, keepdims=True) + return np.repeat(Y_estimated, n_running_points, 0) + elif estimation_strategy == "CL_mean": + # constant liar mean, we take the mean values of all the evaluated Y and apply them to the running X + Y_estimated = np.nanmean(Y_evaluated, axis=0, keepdims=True) + return np.repeat(Y_estimated, n_running_points, 0) + elif estimation_strategy == "kriging_believer": + # kriging believer, we apply the predicted means of the surrogate model to estimate the running X + # Check whether model has been trained already + if ( + isinstance(self._model, GaussianProcess) + and not self._model._is_trained + or isinstance(self._model, RandomForest) + and self._model._rf is None + ): + logger.debug( + "Model has not been trained yet. Skip estimation and use constant liar mean " + "(mean of all samples)." + ) + Y_estimated = np.nanmean(Y_evaluated, axis=0, keepdims=True) + return np.repeat(Y_estimated, n_running_points, 0) + return self._model.predict_marginalized(X_running)[0] # type: ignore[union-attr] + elif estimation_strategy == "sample": + # https://papers.nips.cc/paper_files/paper/2012/file/05311655a15b75fab86956663e1819cd-Paper.pdf + # since this requires a multi-variant gaussian distribution for the candidates, we need to restrict the + # model to be a gaussian process + assert isinstance( + self._model, GaussianProcess + ), "Sample based estimate strategy only allows GP as surrogate model!" + return self._model.sample_functions(X_test=X_running, n_funcs=1) + else: + raise ValueError(f"Unknown estimating strategy: {estimation_strategy}") + def _get_x_best(self, X: np.ndarray) -> tuple[np.ndarray, float]: """Get value, configuration, and array representation of the *best* configuration. diff --git a/smac/runhistory/encoder/abstract_encoder.py b/smac/runhistory/encoder/abstract_encoder.py index 94a5f4bf6..c295de6f3 100644 --- a/smac/runhistory/encoder/abstract_encoder.py +++ b/smac/runhistory/encoder/abstract_encoder.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import abstractmethod -from typing import Any, Mapping +from typing import Any, Iterable, Mapping import numpy as np @@ -188,11 +188,55 @@ def _get_considered_trials( return trials + def _get_running_trials( + self, + budget_subset: list | None = None, + ) -> dict[TrialKey, TrialValue]: + """Returns all trials that are still running. + + Parameters + ---------- + budget_subset : list | None + If None, retrieve all running trials. Otherwise, retrieve only running + trials with budgets in budget_subset. + + Returns + ------- + trials : dict[TrialKey, TrialValue] + A dictionary containing the running trials. + """ + if budget_subset is not None: + trials = { + trial: self.runhistory[trial] + for trial in self.runhistory + if self.runhistory[trial].status == StatusType.RUNNING and trial.budget in budget_subset + } + else: + trials = { + trial: self.runhistory[trial] + for trial in self.runhistory + if self.runhistory[trial].status == StatusType.RUNNING + } + + return trials + def _get_timeout_trials( self, budget_subset: list | None = None, ) -> dict[TrialKey, TrialValue]: - """Returns all trials that did have a timeout.""" + """Returns all trials that did have a timeout. + + Parameters + ---------- + budget_subset : list | None + If None, retrieve all timeout trials. Otherwise, retrieve only timeout + trials with budgets in budget_subset. + + Returns + ------- + trials : dict[TrialKey, TrialValue] + A dictionary containing the timeout trials. + """ if budget_subset is not None: trials = { trial: self.runhistory[trial] @@ -211,6 +255,24 @@ def _get_timeout_trials( return trials + def _convert_config_ids_to_array(self, config_ids: Iterable[int]) -> np.ndarray: + """Extract the configurations from runhistory from their ids and transform them into np.ndarray. + + Parameters + ---------- + config_ids : Iterable[int] + A collections of configuration ids. + + Returns + ------- + configs_array : np.ndarray + The corresponding configuration arrays. + + """ + configurations = [self.runhistory._ids_config[config_id] for config_id in config_ids] + configs_array = convert_configurations_to_array(configurations) + return configs_array + def get_configurations( self, budget_subset: list | None = None, @@ -236,11 +298,31 @@ def get_configurations( t_trials = self._get_timeout_trials(budget_subset) t_config_ids = set(t_trial.config_id for t_trial in t_trials) config_ids = s_config_ids | t_config_ids - configurations = [self.runhistory._ids_config[config_id] for config_id in config_ids] - configs_array = convert_configurations_to_array(configurations) + configs_array = self._convert_config_ids_to_array(config_ids) return configs_array + def get_running_configurations( + self, + budget_subset: list | None = None, + ) -> np.ndarray: + """Returns vector representation of the configurations that are still running. + + Parameters + ---------- + budget_subset : list | None, defaults to none + List of budgets to consider. + + Returns + ------- + X : np.ndarray + Configuration vector and instance features. + """ + r_trials = self._get_running_trials(budget_subset) + r_ids = set(r_trial.config_id for r_trial in r_trials) + configs_array = self._convert_config_ids_to_array(r_ids) + return configs_array + def transform( self, budget_subset: list | None = None, @@ -282,6 +364,29 @@ def transform( logger.debug("Converted %d observations." % (X.shape[0])) return X, Y + def transform_running_configs( + self, + budget_subset: list | None = None, + ) -> np.ndarray: + """Transform the running configurations. + + Parameters + ---------- + budget_subset : list | None, defaults to none + List of budgets to consider. + + Returns + ------- + X : np.ndarray + Configuration vector and instance features. + """ + logger.debug("Transforming Running Configurations into X format...") + running_trials = self._get_running_trials(budget_subset) + # Y is not required for running configurations + X, _ = self._build_matrix(trials=running_trials, store_statistics=True) + logger.debug("Converted %d running observations." % (X.shape[0])) + return X + @abstractmethod def transform_response_values( self, diff --git a/tests/test_main/test_config_selector.py b/tests/test_main/test_config_selector.py new file mode 100644 index 000000000..36b2bf5a8 --- /dev/null +++ b/tests/test_main/test_config_selector.py @@ -0,0 +1,101 @@ +from __future__ import annotations +import pytest + +from ConfigSpace import ConfigurationSpace, Configuration, Float +import numpy as np + +from smac.runhistory.dataclasses import TrialValue +from smac.acquisition.function.confidence_bound import LCB +from smac.initial_design.random_design import RandomInitialDesign +from smac import BlackBoxFacade, HyperparameterOptimizationFacade, Scenario +from smac.main.config_selector import ConfigSelector +from smac.main import config_selector + + +def test_estimated_config_values_are_trained_by_models(rosenbrock): + scenario = Scenario(rosenbrock.configspace, n_trials=100, n_workers=2, deterministic=True) + smac = BlackBoxFacade( + scenario, + rosenbrock.train, # We pass the target function here + overwrite=True, # Overrides any previous results that are found that are inconsistent with the meta-data + config_selector=ConfigSelector( + scenario=scenario, + retrain_after=1, + batch_sampling_estimation_strategy='no_estimate' + ), + initial_design=BlackBoxFacade.get_initial_design(scenario=scenario, n_configs=5), + acquisition_function=LCB() # this ensures that we can record the number of data in the acquisition function + ) + # we first initialize multiple configurations as the starting points + + n_data_in_acq_func = 5 + for _ in range(n_data_in_acq_func): + info = smac.ask() # we need the seed from the configuration + + cost = rosenbrock.train(info.config, seed=info.seed, budget=info.budget, instance=info.instance) + value = TrialValue(cost=cost, time=0.5) + + smac.tell(info, value) + + # for naive approach, no point configuration values is hallucinate + all_asked_infos = [] + for i in range(3): + all_asked_infos.append(smac.ask()) + assert smac._acquisition_function._num_data == n_data_in_acq_func + + # each time when we provide a new running configuration, we can estimate the configuration values for new + # suggestions and use this information to retrain our model. Hence, each time a new point is asked, we should + # have _num_data +1 for LCB model + + n_data_in_acq_func += 3 + for estimate_strategy in ['CL_max', 'CL_min', 'CL_mean', 'kriging_believer', 'sample']: + smac._config_selector._batch_sampling_estimation_strategy = estimate_strategy + for i in range(3): + all_asked_infos.append(smac.ask()) + assert smac._acquisition_function._num_data == n_data_in_acq_func + n_data_in_acq_func += 1 + + for info in all_asked_infos: + value = TrialValue(cost=rosenbrock.train(info.config, instance=info.instance, seed=info.seed), ) + smac.tell(info=info, value=value) + + # now we recover to the vanilla approach, in this case, all the evaluations are exact evaluations, the number of + # data in the runhistory should not increase + _ = smac.ask() + assert smac._acquisition_function._num_data == n_data_in_acq_func + + +@pytest.mark.parametrize("estimation_strategy", ['CL_max', 'CL_min', 'CL_mean', 'kriging_believer', 'sample']) +def test_batch_estimation_methods(rosenbrock, estimation_strategy): + config_space = rosenbrock.configspace + scenario = Scenario(config_space, n_trials=100, n_workers=2, deterministic=True) + config_selector = ConfigSelector( + scenario=scenario, + retrain_after=1, + batch_sampling_estimation_strategy=estimation_strategy + ) + model = BlackBoxFacade.get_model(scenario=scenario) + X_evaluated = config_space.sample_configuration(5) + y_train = np.asarray([rosenbrock.train(x) for x in X_evaluated]) + x_train = np.asarray([x.get_array() for x in X_evaluated]) + + model.train(x_train, y_train) + + X_running = np.asarray([x.get_array() for x in config_space.sample_configuration(3)]) + config_selector._model = model + + estimations = config_selector.estimate_running_config_costs( + X_running, y_train, estimation_strategy=estimation_strategy, + ) + if estimation_strategy == 'CL_max': + assert (estimations == y_train.max()).all() + elif estimation_strategy == 'CL_min': + assert (estimations == y_train.min()).all() + elif estimation_strategy == 'CL_mean': + assert (estimations == y_train.mean()).all() + else: + if estimation_strategy == 'kriging_believer': + assert np.allclose(model.predict_marginalized(X_running)[0], estimations) + else: + # for sampling strategy, we simply check if the shape of the two results are the same + assert np.equal(estimations.shape, (3, 1)).all()