From 4ce23bd92eb12825cd1f513c112411a73fc9f6e4 Mon Sep 17 00:00:00 2001 From: sami-ka Date: Sat, 20 Apr 2024 22:52:05 +0200 Subject: [PATCH 1/4] feat: add chosen_metric attribute in early stopping callback class --- python-package/lightgbm/callback.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index e776ea953bd1..a89d311e0d60 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -279,9 +279,16 @@ def __init__( first_metric_only: bool = False, verbose: bool = True, min_delta: Union[float, List[float]] = 0.0, + chosen_metric: str = None, ) -> None: self.enabled = _should_enable_early_stopping(stopping_rounds) + # Test if both parameters are used + if (first_metric_only + (chosen_metric is not None)) == 2: + error_message = """ + Only one of first_metric_only and chosen_metric parameters should be used""" + raise ValueError(error_message) + self.order = 30 self.before_iteration = False @@ -289,6 +296,7 @@ def __init__( self.first_metric_only = first_metric_only self.verbose = verbose self.min_delta = min_delta + self.chosen_metric = chosen_metric self._reset_storages() @@ -345,7 +353,13 @@ def _init(self, env: CallbackEnv) -> None: self._reset_storages() - n_metrics = len({m[1] for m in env.evaluation_result_list}) + list_metrics = {m[1] for m in env.evaluation_result_list} + if (self.chosen_metric is not None) and (self.chosen_metric not in list_metrics): + error_message = f"""Chosen callback metric: {self.chosen_metric} is not in the evaluation list. + The list of available metrics for early stopping is: {list_metrics}.""" + raise ValueError(error_message) + + n_metrics = len(list_metrics) n_datasets = len(env.evaluation_result_list) // n_metrics if isinstance(self.min_delta, list): if not all(t >= 0 for t in self.min_delta): @@ -363,11 +377,14 @@ def _init(self, env: CallbackEnv) -> None: raise ValueError("Must provide a single value for min_delta or as many as metrics.") if self.first_metric_only and self.verbose: _log_info(f"Using only {self.min_delta[0]} as early stopping min_delta.") + if (self.chosen_metric is not None) and self.verbose: + index_chosen_metric = list_metrics.index(self.chosen_metric) + _log_info(f"Using only {self.min_delta[index_chosen_metric]} as early stopping min_delta.") deltas = self.min_delta * n_datasets else: if self.min_delta < 0: raise ValueError("Early stopping min_delta must be non-negative.") - if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and self.verbose: + if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and (self.index_chosen_metric is None) and self.verbose: _log_info(f"Using {self.min_delta} as min_delta for all metrics.") deltas = [self.min_delta] * n_datasets * n_metrics @@ -391,6 +408,8 @@ def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str] ) if self.first_metric_only: _log_info(f"Evaluated only: {eval_name_splitted[-1]}") + if self.chosen_metric is not None: + _log_info(f"Evaluated only: {self.chosen_metric}") raise EarlyStopException(self.best_iter[i], self.best_score_list[i]) def __call__(self, env: CallbackEnv) -> None: @@ -418,6 +437,8 @@ def __call__(self, env: CallbackEnv) -> None: eval_name_splitted = env.evaluation_result_list[i][1].split(" ") if self.first_metric_only and self.first_metric != eval_name_splitted[-1]: continue # use only the first metric for early stopping + if (self.chosen_metric is not None) and self.chosen_metric != eval_name_splitted[-1]: + continue # use only the first metric for early stopping if self._is_train_set( ds_name=env.evaluation_result_list[i][0], eval_name=eval_name_splitted[0], @@ -432,6 +453,8 @@ def __call__(self, env: CallbackEnv) -> None: _log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}") if self.first_metric_only: _log_info(f"Evaluated only: {eval_name_splitted[-1]}") + if self.chosen_metric is not None: + _log_info(f"Evaluated only: {self.chosen_metric}") raise EarlyStopException(self.best_iter[i], self.best_score_list[i]) self._final_iteration_check(env, eval_name_splitted, i) @@ -453,6 +476,7 @@ def early_stopping( first_metric_only: bool = False, verbose: bool = True, min_delta: Union[float, List[float]] = 0.0, + chosen_metric: str = None, ) -> _EarlyStoppingCallback: """Create a callback that activates early stopping. @@ -492,4 +516,5 @@ def early_stopping( first_metric_only=first_metric_only, verbose=verbose, min_delta=min_delta, + chosen_metric=chosen_metric ) From dbdc17c43de32431acb5e91b18d347d51b6d2694 Mon Sep 17 00:00:00 2001 From: sami-ka Date: Sat, 20 Apr 2024 22:52:57 +0200 Subject: [PATCH 2/4] feat: add chosen_metric_early_stopping parameter to create early stopping callback through parameters --- python-package/lightgbm/engine.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index a19b29e7b584..e074f4b7787e 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -191,7 +191,13 @@ def train( if params["early_stopping_round"] is None: params.pop("early_stopping_round") first_metric_only = params.get("first_metric_only", False) - + chosen_metric_early_stopping = params.get("chosen_metric_early_stopping", None) + # Test if both parameters are used + if (first_metric_only + (chosen_metric_early_stopping is not None)) == 2: + error_message = """ + Only one of first_metric_only and chosen_metric_early_stopping parameters should be used""" + raise ValueError(error_message) + predictor: Optional[_InnerPredictor] = None if isinstance(init_model, (str, Path)): predictor = _InnerPredictor.from_model_file(model_file=init_model, pred_parameter=params) @@ -241,6 +247,7 @@ def train( callback.early_stopping( stopping_rounds=params["early_stopping_round"], # type: ignore[arg-type] first_metric_only=first_metric_only, + chosen_metric=chosen_metric_early_stopping, verbose=_choose_param_value( main_param_name="verbosity", params=params, @@ -716,6 +723,12 @@ def cv( if params["early_stopping_round"] is None: params.pop("early_stopping_round") first_metric_only = params.get("first_metric_only", False) + chosen_metric_early_stopping = params.get("chosen_metric_early_stopping", None) + # Test if both parameters are used + if (first_metric_only + (chosen_metric_early_stopping is not None)) == 2: + error_message = """ + Only one of first_metric_only and chosen_metric_early_stopping parameters should be used""" + raise ValueError(error_message) if isinstance(init_model, (str, Path)): predictor = _InnerPredictor.from_model_file( @@ -765,6 +778,7 @@ def cv( callback.early_stopping( stopping_rounds=params["early_stopping_round"], # type: ignore[arg-type] first_metric_only=first_metric_only, + chosen_metric=chosen_metric_early_stopping, verbose=_choose_param_value( main_param_name="verbosity", params=params, From 53698f73215edb49e394774289668c19e965f253 Mon Sep 17 00:00:00 2001 From: sami-ka Date: Sat, 20 Apr 2024 22:54:00 +0200 Subject: [PATCH 3/4] feat: add chosen_metric_early_stopping to the list of parameters --- include/LightGBM/config.h | 3 +++ src/io/config_auto.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index a2f1a02370b7..bc05bdae9215 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -397,6 +397,9 @@ struct Config { // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping bool first_metric_only = false; + // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to a specific metric name, if you want to use only this metric for early stopping + std::string chosen_metric_early_stopping; + // alias = max_tree_output, max_leaf_output // desc = used to limit the max output of tree leaves // desc = ``<= 0`` means no constraint diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 394614af3f33..188f6fad08e8 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -815,6 +815,7 @@ const std::unordered_map>& Config::paramet {"extra_seed", {}}, {"early_stopping_round", {"early_stopping_rounds", "early_stopping", "n_iter_no_change"}}, {"first_metric_only", {}}, + {"chosen_metric_early_stopping", {}}, {"max_delta_step", {"max_tree_output", "max_leaf_output"}}, {"lambda_l1", {"reg_alpha", "l1_regularization"}}, {"lambda_l2", {"reg_lambda", "lambda", "l2_regularization"}}, From 44fcae278557b5793bf96bb763f40f0a0136a613 Mon Sep 17 00:00:00 2001 From: sami-ka Date: Fri, 26 Apr 2024 23:10:23 +0200 Subject: [PATCH 4/4] refactor: replace argument name and add chekcs +warning --- python-package/lightgbm/callback.py | 54 +++++++++++++++++------------ 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index a89d311e0d60..2dfa0d181c8c 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -279,15 +279,20 @@ def __init__( first_metric_only: bool = False, verbose: bool = True, min_delta: Union[float, List[float]] = 0.0, - chosen_metric: str = None, + metric_name: Optional[str] = None, ) -> None: self.enabled = _should_enable_early_stopping(stopping_rounds) # Test if both parameters are used - if (first_metric_only + (chosen_metric is not None)) == 2: - error_message = """ - Only one of first_metric_only and chosen_metric parameters should be used""" - raise ValueError(error_message) + if first_metric_only and (metric_name is not None): + error_msg = """ + Only one of 'first_metric_only' and 'chosen_metric' should be used""" + raise ValueError(error_msg) + + # If metric_name is used, min_delta must be a scalar + if isinstance(min_delta, list) and (metric_name is not None): + error_msg = "Use a scalar value for 'min_delta' when using 'chosen_metric'." + raise ValueError(error_msg) self.order = 30 self.before_iteration = False @@ -296,7 +301,7 @@ def __init__( self.first_metric_only = first_metric_only self.verbose = verbose self.min_delta = min_delta - self.chosen_metric = chosen_metric + self.metric_name = metric_name self._reset_storages() @@ -353,13 +358,13 @@ def _init(self, env: CallbackEnv) -> None: self._reset_storages() - list_metrics = {m[1] for m in env.evaluation_result_list} - if (self.chosen_metric is not None) and (self.chosen_metric not in list_metrics): - error_message = f"""Chosen callback metric: {self.chosen_metric} is not in the evaluation list. - The list of available metrics for early stopping is: {list_metrics}.""" + set_metrics = {m[1] for m in env.evaluation_result_list} + if (self.metric_name is not None) and (self.metric_name not in set_metrics): + error_message = f"""Chosen callback metric:{self.metric_name} is not in the evaluation list. + The set of available metrics for early stopping is : {set_metrics}.""" raise ValueError(error_message) - n_metrics = len(list_metrics) + n_metrics = len(set_metrics) n_datasets = len(env.evaluation_result_list) // n_metrics if isinstance(self.min_delta, list): if not all(t >= 0 for t in self.min_delta): @@ -377,14 +382,11 @@ def _init(self, env: CallbackEnv) -> None: raise ValueError("Must provide a single value for min_delta or as many as metrics.") if self.first_metric_only and self.verbose: _log_info(f"Using only {self.min_delta[0]} as early stopping min_delta.") - if (self.chosen_metric is not None) and self.verbose: - index_chosen_metric = list_metrics.index(self.chosen_metric) - _log_info(f"Using only {self.min_delta[index_chosen_metric]} as early stopping min_delta.") deltas = self.min_delta * n_datasets else: if self.min_delta < 0: raise ValueError("Early stopping min_delta must be non-negative.") - if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and (self.index_chosen_metric is None) and self.verbose: + if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and (self.metric_name is None) and self.verbose: _log_info(f"Using {self.min_delta} as min_delta for all metrics.") deltas = [self.min_delta] * n_datasets * n_metrics @@ -408,8 +410,8 @@ def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str] ) if self.first_metric_only: _log_info(f"Evaluated only: {eval_name_splitted[-1]}") - if self.chosen_metric is not None: - _log_info(f"Evaluated only: {self.chosen_metric}") + if self.metric_name is not None: + _log_info(f"Evaluated only: {self.metric_name}") raise EarlyStopException(self.best_iter[i], self.best_score_list[i]) def __call__(self, env: CallbackEnv) -> None: @@ -437,7 +439,7 @@ def __call__(self, env: CallbackEnv) -> None: eval_name_splitted = env.evaluation_result_list[i][1].split(" ") if self.first_metric_only and self.first_metric != eval_name_splitted[-1]: continue # use only the first metric for early stopping - if (self.chosen_metric is not None) and self.chosen_metric != eval_name_splitted[-1]: + if (self.metric_name is not None) and self.metric_name != eval_name_splitted[-1]: continue # use only the first metric for early stopping if self._is_train_set( ds_name=env.evaluation_result_list[i][0], @@ -453,8 +455,8 @@ def __call__(self, env: CallbackEnv) -> None: _log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}") if self.first_metric_only: _log_info(f"Evaluated only: {eval_name_splitted[-1]}") - if self.chosen_metric is not None: - _log_info(f"Evaluated only: {self.chosen_metric}") + if self.metric_name is not None: + _log_info(f"Evaluated only: {self.metric_name}") raise EarlyStopException(self.best_iter[i], self.best_score_list[i]) self._final_iteration_check(env, eval_name_splitted, i) @@ -476,7 +478,7 @@ def early_stopping( first_metric_only: bool = False, verbose: bool = True, min_delta: Union[float, List[float]] = 0.0, - chosen_metric: str = None, + metric_name: Optional[str] = None, ) -> _EarlyStoppingCallback: """Create a callback that activates early stopping. @@ -511,10 +513,18 @@ def early_stopping( callback : _EarlyStoppingCallback The callback that activates early stopping. """ + + if first_metric_only: + warning_message = """ + 'first_metric_only' parameter is deprecated. + It will be removed in a future release of lightgbm. + """ + _log_warning(warning_message) + return _EarlyStoppingCallback( stopping_rounds=stopping_rounds, first_metric_only=first_metric_only, verbose=verbose, min_delta=min_delta, - chosen_metric=chosen_metric + metric_name=metric_name )