From 0df0325d6ed1d4e1302abd9b48dc724c25b96434 Mon Sep 17 00:00:00 2001 From: Jeremy Rapin Date: Tue, 25 Jan 2022 16:55:15 +0100 Subject: [PATCH 1/5] [WIP] Allow delegating the archive --- nevergrad/optimization/base.py | 23 +++++++++-------- nevergrad/optimization/optimizerlib.py | 4 +++ nevergrad/optimization/test_utils.py | 6 ++--- nevergrad/optimization/utils.py | 35 +++++++++++++++++++------- 4 files changed, 46 insertions(+), 22 deletions(-) diff --git a/nevergrad/optimization/base.py b/nevergrad/optimization/base.py index 84b5801db..a81c44967 100644 --- a/nevergrad/optimization/base.py +++ b/nevergrad/optimization/base.py @@ -26,7 +26,7 @@ X = tp.TypeVar("X", bound="Optimizer") Y = tp.TypeVar("Y") IntOrParameter = tp.Union[int, p.Parameter] -_PruningCallable = tp.Callable[[utils.Archive[utils.MultiValue]], utils.Archive[utils.MultiValue]] +_PruningCallable = tp.Callable[[utils.Archive[utils.MultiValue]], None] def _loss(param: p.Parameter) -> float: @@ -405,14 +405,17 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) if np.isnan(loss) or loss == np.inf: self._warn(f"Updating fitness with {loss} value", errors.BadLossWarning) mvalue: tp.Optional[utils.MultiValue] = None - if x not in self.archive: - self.archive[x] = utils.MultiValue(candidate, loss, reference=self.parametrization) + if not self.archive.is_delegated: + if x not in self.archive: + self.archive[x] = utils.MultiValue(candidate, loss, reference=self.parametrization) + else: + mvalue = self.archive[x] + mvalue.add_evaluation(loss) + # both parameters should be non-None + if mvalue.parameter.loss > candidate.loss: # type: ignore + mvalue.parameter = candidate # keep best candidate else: - mvalue = self.archive[x] - mvalue.add_evaluation(loss) - # both parameters should be non-None - if mvalue.parameter.loss > candidate.loss: # type: ignore - mvalue.parameter = candidate # keep best candidate + mvalue = self.archive[x] # should exist for sure # update current best records # this may have to be improved if we want to keep more kinds of best losss @@ -432,8 +435,8 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) # max(v.get_estimation(name) for v in self.archive.values())) # raise RuntimeError(f"Best value should exist in the archive at num_tell={self.num_tell})\n" # f"Best value is {bval} and archive is within range {avals} for {name}") - if self.pruning is not None: - self.archive = self.pruning(self.archive) + if self.pruning is not None and not self.archive.is_delegated: + self.pruning(self.archive) def ask(self) -> p.Parameter: """Provides a point to explore. diff --git a/nevergrad/optimization/optimizerlib.py b/nevergrad/optimization/optimizerlib.py index 955d0ff1e..7f8a7879c 100644 --- a/nevergrad/optimization/optimizerlib.py +++ b/nevergrad/optimization/optimizerlib.py @@ -1049,6 +1049,7 @@ def __init__( ) -> None: super().__init__(parametrization, budget=budget, num_workers=num_workers) self._optimizer = base_optimizer(self.parametrization, budget=budget, num_workers=num_workers) + self._optimizer.archive.delegate_to(self.archive) self._subcandidates: tp.Dict[str, p.Parameter] = {} if scale is None: assert self.budget is not None, "Either scale or budget must be known in _Rescaled." @@ -1374,6 +1375,8 @@ def __init__( num_workers=sub_workers, ) ) + for optim in self.optims: + optim.archive.delegate_to(self.archive) # current optimizer choice self._selected_ind: tp.Optional[int] = None self._current = -1 @@ -2407,6 +2410,7 @@ def optim(self) -> base.Optimizer: if self._optim is None: self._optim = self._select_optimizer_cls()(self.parametrization, self.budget, self.num_workers) self._optim = self._optim if not isinstance(self._optim, NGOptBase) else self._optim.optim + self._optim.archive.delegate_to(self.archive) logger.debug("%s selected %s optimizer.", *(x.name for x in (self, self._optim))) return self._optim diff --git a/nevergrad/optimization/test_utils.py b/nevergrad/optimization/test_utils.py index 0c79903a9..bb2d220ec 100644 --- a/nevergrad/optimization/test_utils.py +++ b/nevergrad/optimization/test_utils.py @@ -97,12 +97,12 @@ def test_pruning() -> None: # pruning pruning = utils.Pruning(min_len=1, max_len=3) # 0 is best optimistic and average, and 3 is best pessimistic (variance=0) - archive = pruning(archive) + pruning(archive) testing.assert_set_equal([x[0] for x in archive.keys_as_arrays()], [0, 3], err_msg=f"Repetition #{k+1}") pickle.dumps(archive) # should be picklable # should not change anything this time - archive2 = pruning(archive) - testing.assert_set_equal([x[0] for x in archive2.keys_as_arrays()], [0, 3], err_msg=f"Repetition #{k+1}") + pruning(archive) + testing.assert_set_equal([x[0] for x in archive.keys_as_arrays()], [0, 3], err_msg=f"Repetition #{k+1}") @pytest.mark.parametrize( # type: ignore diff --git a/nevergrad/optimization/utils.py b/nevergrad/optimization/utils.py index 74aab135d..922fd21ca 100644 --- a/nevergrad/optimization/utils.py +++ b/nevergrad/optimization/utils.py @@ -173,9 +173,27 @@ class Archive(tp.Generic[Y]): """ def __init__(self) -> None: - self.bytesdict: tp.Dict[bytes, Y] = {} + self._data: tp.Union["Archive[Y]", tp.Dict[bytes, Y]] = {} + + @property + def bytesdict(self) -> tp.Dict[bytes, Y]: + if not isinstance(self._data, Archive): + return self._data + else: + while self._data.is_delegated: # unroll + self._data = self._data._data + return self._data._data # type: ignore + + def delegate_to(self, archive: "Archive[Y]") -> None: + self._data = archive + + @property + def is_delegated(self) -> bool: + return isinstance(self._data, Archive) def __setitem__(self, x: tp.ArrayLike, value: Y) -> None: + if self.is_delegated: + raise RuntimeError("Cannot set from a delegated instance") self.bytesdict[_tobytes(x)] = value def __getitem__(self, x: tp.ArrayLike) -> Y: @@ -255,13 +273,13 @@ def __init__(self, min_len: int, max_len: int): self.max_len = max_len self._num_prunings = 0 # for testing it is not called too often - def __call__(self, archive: Archive[MultiValue]) -> Archive[MultiValue]: - if len(archive) < self.max_len: - return archive - return self._prune(archive) + def __call__(self, archive: Archive[MultiValue]) -> None: + if len(archive) >= self.max_len: + self._prune(archive) - def _prune(self, archive: Archive[MultiValue]) -> Archive[MultiValue]: + def _prune(self, archive: Archive[MultiValue]) -> None: self._num_prunings += 1 + assert not isinstance(archive._data, Archive), "Cannot prune on delegated instance" # separate function to ease profiling quantiles: tp.Dict[str, float] = {} threshold = float(self.min_len + 1) / len(archive) @@ -270,14 +288,13 @@ def _prune(self, archive: Archive[MultiValue]) -> Archive[MultiValue]: quantiles[name] = np.quantile( [v.get_estimation(name) for v in archive.values()], threshold, interpolation="lower" ) - new_archive: Archive[MultiValue] = Archive() - new_archive.bytesdict = { + bytesdict = { b: v for b, v in archive.bytesdict.items() if any(v.get_estimation(n) < quantiles[n] for n in names) } # strict comparison to make sure we prune even for values repeated maaany times # this may remove all points though, but nevermind for now - return new_archive + archive._data = bytesdict @classmethod def sensible_default(cls, num_workers: int, dimension: int) -> "Pruning": From f06bf9b1eade620200918b685facc6f2d60f676a Mon Sep 17 00:00:00 2001 From: Jeremy Rapin Date: Wed, 26 Jan 2022 16:33:31 +0100 Subject: [PATCH 2/5] rmrescale --- nevergrad/optimization/base.py | 2 ++ nevergrad/optimization/optimizerlib.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/nevergrad/optimization/base.py b/nevergrad/optimization/base.py index a81c44967..ff5794319 100644 --- a/nevergrad/optimization/base.py +++ b/nevergrad/optimization/base.py @@ -406,6 +406,7 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) self._warn(f"Updating fitness with {loss} value", errors.BadLossWarning) mvalue: tp.Optional[utils.MultiValue] = None if not self.archive.is_delegated: + print(f"Updating archive for {candidate.uid[:8]} in {self.__class__.__name__}") if x not in self.archive: self.archive[x] = utils.MultiValue(candidate, loss, reference=self.parametrization) else: @@ -415,6 +416,7 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) if mvalue.parameter.loss > candidate.loss: # type: ignore mvalue.parameter = candidate # keep best candidate else: + print(f"Delegated archive for {candidate.uid[:8]} in {self.__class__.__name__}") mvalue = self.archive[x] # should exist for sure # update current best records # this may have to be improved if we want to keep more kinds of best losss diff --git a/nevergrad/optimization/optimizerlib.py b/nevergrad/optimization/optimizerlib.py index 7f8a7879c..1a7a4ca97 100644 --- a/nevergrad/optimization/optimizerlib.py +++ b/nevergrad/optimization/optimizerlib.py @@ -1049,7 +1049,6 @@ def __init__( ) -> None: super().__init__(parametrization, budget=budget, num_workers=num_workers) self._optimizer = base_optimizer(self.parametrization, budget=budget, num_workers=num_workers) - self._optimizer.archive.delegate_to(self.archive) self._subcandidates: tp.Dict[str, p.Parameter] = {} if scale is None: assert self.budget is not None, "Either scale or budget must be known in _Rescaled." From bf66d9a181ca70cd76e5c903fa77c25fa7d895aa Mon Sep 17 00:00:00 2001 From: Jeremy Rapin Date: Wed, 26 Jan 2022 16:37:04 +0100 Subject: [PATCH 3/5] exists --- nevergrad/optimization/base.py | 8 +++----- nevergrad/optimization/optimizerlib.py | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/nevergrad/optimization/base.py b/nevergrad/optimization/base.py index ff5794319..581a38119 100644 --- a/nevergrad/optimization/base.py +++ b/nevergrad/optimization/base.py @@ -404,9 +404,8 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) ) if np.isnan(loss) or loss == np.inf: self._warn(f"Updating fitness with {loss} value", errors.BadLossWarning) - mvalue: tp.Optional[utils.MultiValue] = None if not self.archive.is_delegated: - print(f"Updating archive for {candidate.uid[:8]} in {self.__class__.__name__}") + # print(f"Updating archive for {candidate.uid[:8]} in {self.__class__.__name__}") if x not in self.archive: self.archive[x] = utils.MultiValue(candidate, loss, reference=self.parametrization) else: @@ -415,9 +414,7 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) # both parameters should be non-None if mvalue.parameter.loss > candidate.loss: # type: ignore mvalue.parameter = candidate # keep best candidate - else: - print(f"Delegated archive for {candidate.uid[:8]} in {self.__class__.__name__}") - mvalue = self.archive[x] # should exist for sure + mvalue = self.archive[x] # should exist for sure # update current best records # this may have to be improved if we want to keep more kinds of best losss @@ -439,6 +436,7 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) # f"Best value is {bval} and archive is within range {avals} for {name}") if self.pruning is not None and not self.archive.is_delegated: self.pruning(self.archive) + self.archive[x] = mvalue # we must make sure that the current point is available for suboptim def ask(self) -> p.Parameter: """Provides a point to explore. diff --git a/nevergrad/optimization/optimizerlib.py b/nevergrad/optimization/optimizerlib.py index 1a7a4ca97..17b74c892 100644 --- a/nevergrad/optimization/optimizerlib.py +++ b/nevergrad/optimization/optimizerlib.py @@ -1049,6 +1049,7 @@ def __init__( ) -> None: super().__init__(parametrization, budget=budget, num_workers=num_workers) self._optimizer = base_optimizer(self.parametrization, budget=budget, num_workers=num_workers) + # cannot delegate the archive since the parametrization is different :( self._subcandidates: tp.Dict[str, p.Parameter] = {} if scale is None: assert self.budget is not None, "Either scale or budget must be known in _Rescaled." From 1d8b95f0c016eb1aed34b7e9b2da90dfdf67a8dc Mon Sep 17 00:00:00 2001 From: Jeremy Rapin Date: Wed, 26 Jan 2022 16:54:37 +0100 Subject: [PATCH 4/5] simplify --- nevergrad/optimization/base.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/nevergrad/optimization/base.py b/nevergrad/optimization/base.py index 581a38119..852aa0ee0 100644 --- a/nevergrad/optimization/base.py +++ b/nevergrad/optimization/base.py @@ -4,6 +4,7 @@ # LICENSE file in the root directory of this source tree. import pickle +import logging import warnings from pathlib import Path from numbers import Real @@ -27,6 +28,7 @@ Y = tp.TypeVar("Y") IntOrParameter = tp.Union[int, p.Parameter] _PruningCallable = tp.Callable[[utils.Archive[utils.MultiValue]], None] +logger = logging.getLogger(__name__) def _loss(param: p.Parameter) -> float: @@ -404,17 +406,21 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) ) if np.isnan(loss) or loss == np.inf: self._warn(f"Updating fitness with {loss} value", errors.BadLossWarning) + mvalue = utils.MultiValue(candidate, loss, reference=self.parametrization) if not self.archive.is_delegated: # print(f"Updating archive for {candidate.uid[:8]} in {self.__class__.__name__}") if x not in self.archive: - self.archive[x] = utils.MultiValue(candidate, loss, reference=self.parametrization) - else: + self.archive[x] = mvalue + else: # reevaluation: needs updating mvalue = self.archive[x] mvalue.add_evaluation(loss) # both parameters should be non-None if mvalue.parameter.loss > candidate.loss: # type: ignore mvalue.parameter = candidate # keep best candidate - mvalue = self.archive[x] # should exist for sure + # the following should not happen since delegating archives are used afterwards + if x not in self.archive: + logger.warning("Archive is not correctly filled, please open an issue.") + mvalue = self.archive.get(x, mvalue) # type: ignore # update with master archive # update current best records # this may have to be improved if we want to keep more kinds of best losss From 158740f067f99cf954e32b67113b76f9d93e88a8 Mon Sep 17 00:00:00 2001 From: Jeremy Rapin Date: Wed, 26 Jan 2022 16:55:01 +0100 Subject: [PATCH 5/5] fix --- nevergrad/optimization/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nevergrad/optimization/base.py b/nevergrad/optimization/base.py index 852aa0ee0..082ce33d4 100644 --- a/nevergrad/optimization/base.py +++ b/nevergrad/optimization/base.py @@ -423,7 +423,6 @@ def _update_archive_and_bests(self, candidate: p.Parameter, loss: tp.FloatLoss) mvalue = self.archive.get(x, mvalue) # type: ignore # update with master archive # update current best records # this may have to be improved if we want to keep more kinds of best losss - for name in self.current_bests: if mvalue is self.current_bests[name]: # reboot best = min(self.archive.values(), key=lambda mv, n=name: mv.get_estimation(n)) # type: ignore