automl · dengdifan · Oct 23, 2024 · Oct 23, 2024 · Oct 29, 2024 · Dec 2, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,9 @@
 
 # 2.3.0
 
+## Features
+- Improved batch sampling: Fantasize points in batch/parallel mode (#1154).
+
 ## Documentation
 - Update windows install guide (#952)
 - Correct intensifier for Algorithm Configuration Facade (#1162, #1165)

diff --git a/examples/1_basics/7_0_parallelization_fantasize.py b/examples/1_basics/7_0_parallelization_fantasize.py
@@ -0,0 +1,112 @@
+"""Example of using SMAC with parallelization and fantasization vs. no estimation for pending evaluations.
+
+This example will take some time because the target function is artificially slowed down to demonstrate the effect of
+fantasization. The example will plot the incumbent found by SMAC with and without fantasization.
+"""
+from __future__ import annotations
+
+import numpy as np
+from ConfigSpace import Configuration, ConfigurationSpace, Float
+
+from matplotlib import pyplot as plt
+
+from smac import BlackBoxFacade, Scenario
+from smac.facade import AbstractFacade
+
+from rich import inspect
+import time
+
+def plot_trajectory(facades: list[AbstractFacade], names: list[str]) -> None:
+    # Plot incumbent
+    cmap = plt.get_cmap("tab10")
+
+    fig = plt.figure()
+    axes = fig.subplots(1, 2)
+
+    for ax_i, x_axis in zip(axes, ["walltime", "trial"]):
+        for i, facade in enumerate(facades):
+            X, Y = [], []
+            inspect(facade.intensifier.trajectory)
+            for item in facade.intensifier.trajectory:
+                # Single-objective optimization
+                assert len(item.config_ids) == 1
+                assert len(item.costs) == 1
+
+                y = item.costs[0]
+                x = getattr(item, x_axis)
+
+                X.append(x)
+                Y.append(y)
+
+            ax_i.plot(X, Y, label=names[i], color=cmap(i))
+            ax_i.scatter(X, Y, marker="x", color=cmap(i))
+            ax_i.set_xlabel(x_axis)
+            ax_i.set_ylabel(facades[0].scenario.objectives)
+            ax_i.set_yscale("log")
+            ax_i.legend()
+
+    plt.show()
+
+class Branin():
+    @property
+    def configspace(self) -> ConfigurationSpace:
+        # Build Configuration Space which defines all parameters and their ranges
+        cs = ConfigurationSpace(seed=0)
+
+        # First we create our hyperparameters
+        x1 = Float("x1", (-5, 10), default=0)
+        x2 = Float("x2", (0, 15), default=0)
+
+        # Add hyperparameters and conditions to our configspace
+        cs.add([x1, x2])
+
+        time.sleep(10)
+
+        return cs
+
+    def train(self, config: Configuration, seed: int) -> float:
+        x1 = config["x1"]
+        x2 = config["x2"]
+        a = 1.0
+        b = 5.1 / (4.0 * np.pi**2)
+        c = 5.0 / np.pi
+        r = 6.0
+        s = 10.0
+        t = 1.0 / (8.0 * np.pi)
+
+        cost = a * (x2 - b * x1**2 + c * x1 - r) ** 2 + s * (1 - t) * np.cos(x1) + s
+        regret = cost - 0.397887
+
+        return regret
+
+if __name__ == "__main__":
+    seed = 345455
+    scenario = Scenario(n_trials=100, configspace=Branin().configspace, n_workers=4, seed=seed)
+    facade = BlackBoxFacade
+
+    smac_noestimation = facade(
+        scenario=scenario,
+        target_function=Branin().train,
+        overwrite=True, 
+    )
+    smac_fantasize = facade(
+        scenario=scenario,
+        target_function=Branin().train,
+        config_selector=facade.get_config_selector(
+            scenario=scenario,
+            batch_sampling_estimation_strategy="kriging_believer"
+        ),
+        overwrite=True,
+        logging_level=0
+    )
+
+    incumbent_noestimation = smac_noestimation.optimize()
+    incumbent_fantasize = smac_fantasize.optimize()
+
+    plot_trajectory(facades=[
+        smac_noestimation,
+        smac_fantasize,
+        ], names=["No Estimation", "Fantasize"])
+
+    del smac_noestimation
+    del smac_fantasize
diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py
@@ -423,9 +423,47 @@ def get_config_selector(
         *,
         retrain_after: int = 8,
         retries: int = 16,
+        min_trials: int = 1,
+        batch_sampling_estimation_strategy: str = "no_estimate",
     ) -> ConfigSelector:
-        """Returns the default configuration selector."""
-        return ConfigSelector(scenario, retrain_after=retrain_after, max_new_config_tries=retries)
+        """Returns the default configuration selector.
+
+        Parameters
+        ----------
+        retrain_after : int, defaults to 8
+            How many configurations should be returned before the surrogate model is retrained.
+        retries : int, defaults to 16
+            How often to retry receiving a new configuration before giving up.
+        min_trials: int, defaults to 1
+            How many samples are required to train the surrogate model. If budgets are involved,
+            the highest budgets are checked first. For example, if min_trials is three, but we find only
+            two trials in the runhistory for the highest budget, we will use trials of a lower budget
+            instead.
+        batch_sampling_estimation_strategy: str, defaults to no_estimate
+
+            Warning: This is intended to work in the black box optimization setting with a Gaussian Process and
+            only works sensibly for non-multifidelity.
+
+            Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might
+            need to suggest new samples while some configurations are still running. This argument determines if we want
+            to make use of this information and fantasize the new estimations. If no_estimate is applied, we do not use
+            the information from the running configurations. If the strategy is kriging_believer, we use the predicted
+            mean from our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we
+            use the min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy
+            is sample, we use our surrogate model (in this case, only GP is allowed) to sample new configurations.
+
+        Returns
+        -------
+        ConfigSelector
+            The instantiated configuration selector proposing new configurations (optimize acquisition function).
+        """
+        return ConfigSelector(
+            scenario,
+            retrain_after=retrain_after,
+            max_new_config_tries=retries,
+            min_trials=min_trials,
+            batch_sampling_estimation_strategy=batch_sampling_estimation_strategy,
+        )
 
     def _get_optimizer(self) -> SMBO:
         """Fills the SMBO with all the pre-initialized components."""

diff --git a/smac/facade/blackbox_facade.py b/smac/facade/blackbox_facade.py
@@ -318,9 +318,41 @@ def get_config_selector(
         scenario: Scenario,
         *,
         retrain_after: int = 1,
+        min_trials: int = 1,
         retries: int = 16,
+        batch_sampling_estimation_strategy: str = "no_estimate",
     ) -> ConfigSelector:
-        """Returns the default configuration selector."""
+        """Returns the default configuration selector.
+
+        Parameters
+        ----------
+        retrain_after : int, defaults to 1
+            How many configurations should be returned before the surrogate model is retrained.
+        retries : int, defaults to 16
+            How often to retry receiving a new configuration before giving up.
+        min_trials: int, defaults to 1
+            How many samples are required to train the surrogate model. If budgets are involved,
+            the highest budgets are checked first. For example, if min_trials is three, but we find only
+            two trials in the runhistory for the highest budget, we will use trials of a lower budget
+            instead.
+        batch_sampling_estimation_strategy: str, defaults to no_estimation
+            Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might
+            need to suggest new samples while some configurations are still running. This argument determines if we want
+            to make use of this information and fantasize the new estimations. If no_estimate is applied, we do not use
+            the information from the running configurations. If the strategy is kriging_believer, we use the predicted
+            mean from our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we
+            use the min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy
+            is sample, we use our surrogate model (in this case, only GP is allowed) to sample new configurations.
+
+        Returns
+        -------
+        ConfigSelector
+            The instantiated configuration selector proposing new configurations (optimize acquisition function).
+        """
         return super(BlackBoxFacade, BlackBoxFacade).get_config_selector(
-            scenario, retrain_after=retrain_after, retries=retries
+            scenario,
+            retrain_after=retrain_after,
+            min_trials=min_trials,
+            retries=retries,
+            batch_sampling_estimation_strategy=batch_sampling_estimation_strategy,
         )
diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py
@@ -16,6 +16,8 @@
 from smac.callback.callback import Callback
 from smac.initial_design import AbstractInitialDesign
 from smac.model.abstract_model import AbstractModel
+from smac.model.gaussian_process import GaussianProcess
+from smac.model.random_forest import RandomForest
 from smac.random_design.abstract_random_design import AbstractRandomDesign
 from smac.runhistory.encoder.abstract_encoder import AbstractRunHistoryEncoder
 from smac.runhistory.runhistory import RunHistory
@@ -44,6 +46,14 @@ class ConfigSelector:
         the highest budgets are checked first. For example, if min_trials is three, but we find only
         two trials in the runhistory for the highest budget, we will use trials of a lower budget
         instead.
+    batch_sampling_estimation_strategy: str, defaults to no_estimation
+        Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might need
+        to suggest new samples while some configurations are still running. This argument determines if we want to make
+        use of this information and fantasize the new estimations. If no_estimate is applied, we do not use the
+        information from the running configurations. If the strategy is kriging_believer, we use the predicted mean from
+        our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we use the
+        min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy is sample,
+        we use our surrogate model (in this case, only GP is allowed) to sample new configurations.
     """
 
     def __init__(
@@ -53,6 +63,7 @@ def __init__(
         retrain_after: int = 8,
         max_new_config_tries: int = 16,
         min_trials: int = 1,
+        batch_sampling_estimation_strategy: str = "no_estimate",
     ) -> None:
         # Those are the configs sampled from the passed initial design
         # Selecting configurations from initial design
@@ -82,6 +93,9 @@ def __init__(
         # Processed configurations should be stored here; this is important to not return the same configuration twice
         self._processed_configs: list[Configuration] = []
 
+        # for batch sampling setting
+        self._batch_sampling_estimation_strategy = batch_sampling_estimation_strategy
+
     def _set_components(
         self,
         initial_design: AbstractInitialDesign,
@@ -286,6 +300,24 @@ def _collect_data(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
                 # Possible add running configs?
                 configs_array = self._runhistory_encoder.get_configurations(budget_subset=self._considered_budgets)
 
+                # add running configurations
+                # If our batch size is 1, then no running configuration should exist, we could then skip this part.
+                # Therefore, there is no need to check the number of workers in this case
+
+                X_running = self._runhistory_encoder.transform_running_configs(budget_subset=[b])
+                if self._batch_sampling_estimation_strategy != "no_estimate":
+                    Y_estimated = self.estimate_running_config_costs(
+                        X_running, Y, self._batch_sampling_estimation_strategy
+                    )
+                    # if there is no running configurations, we directly return X, Y and configs_array
+                    if Y_estimated is not None:
+                        configs_array_running = self._runhistory_encoder.get_running_configurations(
+                            budget_subset=self._considered_budgets
+                        )
+                        X = np.concatenate([X, X_running], axis=0)
+                        Y = np.concatenate([Y, Y_estimated], axis=0)
+                        configs_array = np.concatenate([configs_array, configs_array_running], axis=0)
+
                 return X, Y, configs_array
 
         return (
@@ -302,6 +334,76 @@ def _get_evaluated_configs(self) -> list[Configuration]:
         assert self._runhistory is not None
         return self._runhistory.get_configs_per_budget(budget_subset=self._considered_budgets)
 
+    def estimate_running_config_costs(
+        self, X_running: np.ndarray, Y_evaluated: np.ndarray, estimation_strategy: str = "CL_max"
+    ) -> np.ndarray:
+        """This function is implemented to estimate the still pending/ running configurations
+
+        Parameters
+        ----------
+        X_running : np.ndarray
+            a np array with size (n_running_configs, D) that represents the array values of the running configurations
+        Y_evaluated : np.ndarray
+            a np array with size (n_evaluated_configs, n_obj) that records the costs of all the previous evaluated
+            configurations
+
+        estimation_strategy: str
+            how do we estimate the target y_running values, we have the following strategy:
+            CL_max: constant liar max, we take the maximal of all the evaluated Y and apply them to the running X
+            CL_min: constant liar min, we take the minimal of all the evaluated Y and apply them to the running X
+            CL_mean: constant liar mean, we take the mean of all the evaluated Y and apply them to the running X
+            kriging_believer: kriging believer, we apply the predicted means from the surrogate model to running X
+             values
+            sample: estimations for X are sampled from the surrogate models. Since the samples need to be sampled from a
+              joint distribution for all X, we only allow sample strategy with GP as surrogate models.
+
+        Returns
+        -------
+        Y_running_estimated : np.ndarray
+            the estimated running y values
+        """
+        n_running_points = len(X_running)
+        if n_running_points == 0:
+            return None
+        if estimation_strategy == "CL_max":
+            # constant liar max, we take the maximal values of all the evaluated Y and apply them to the running X
+            Y_estimated = np.nanmax(Y_evaluated, axis=0, keepdims=True)
+            return np.repeat(Y_estimated, n_running_points, 0)
+        elif estimation_strategy == "CL_min":
+            # constant liar min, we take the minimal values of all the evaluated Y and apply them to the running X
+            Y_estimated = np.nanmin(Y_evaluated, axis=0, keepdims=True)
+            return np.repeat(Y_estimated, n_running_points, 0)
+        elif estimation_strategy == "CL_mean":
+            # constant liar mean, we take the mean values of all the evaluated Y and apply them to the running X
+            Y_estimated = np.nanmean(Y_evaluated, axis=0, keepdims=True)
+            return np.repeat(Y_estimated, n_running_points, 0)
+        elif estimation_strategy == "kriging_believer":
+            # kriging believer, we apply the predicted means of the surrogate model to estimate the running X
+            # Check whether model has been trained already
+            if (
+                isinstance(self._model, GaussianProcess)
+                and not self._model._is_trained
+                or isinstance(self._model, RandomForest)
+                and self._model._rf is None
+            ):
+                logger.debug(
+                    "Model has not been trained yet. Skip estimation and use constant liar mean "
+                    "(mean of all samples)."
+                )
+                Y_estimated = np.nanmean(Y_evaluated, axis=0, keepdims=True)
+                return np.repeat(Y_estimated, n_running_points, 0)
+            return self._model.predict_marginalized(X_running)[0]  # type: ignore[union-attr]
+        elif estimation_strategy == "sample":
+            # https://papers.nips.cc/paper_files/paper/2012/file/05311655a15b75fab86956663e1819cd-Paper.pdf
+            # since this requires a multi-variant gaussian distribution for the candidates, we need to restrict the
+            # model to be a gaussian process
+            assert isinstance(
+                self._model, GaussianProcess
+            ), "Sample based estimate strategy only allows GP as surrogate model!"
+            return self._model.sample_functions(X_test=X_running, n_funcs=1)
+        else:
+            raise ValueError(f"Unknown estimating strategy: {estimation_strategy}")
+
     def _get_x_best(self, X: np.ndarray) -> tuple[np.ndarray, float]:
         """Get value, configuration, and array representation of the *best* configuration.