From 4ce23bd92eb12825cd1f513c112411a73fc9f6e4 Mon Sep 17 00:00:00 2001
From: sami-ka <sami.kaddani@gmail.com>
Date: Sat, 20 Apr 2024 22:52:05 +0200
Subject: [PATCH 1/4] feat: add chosen_metric attribute in early stopping
 callback class

---
 python-package/lightgbm/callback.py | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py
index e776ea953bd1..a89d311e0d60 100644
--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -279,9 +279,16 @@ def __init__(
         first_metric_only: bool = False,
         verbose: bool = True,
         min_delta: Union[float, List[float]] = 0.0,
+        chosen_metric: str = None,
     ) -> None:
         self.enabled = _should_enable_early_stopping(stopping_rounds)
 
+        # Test if both parameters are used
+        if (first_metric_only + (chosen_metric is not None)) == 2:
+            error_message = """
+            Only one of first_metric_only and chosen_metric parameters should be used"""
+            raise ValueError(error_message)
+
         self.order = 30
         self.before_iteration = False
 
@@ -289,6 +296,7 @@ def __init__(
         self.first_metric_only = first_metric_only
         self.verbose = verbose
         self.min_delta = min_delta
+        self.chosen_metric = chosen_metric
 
         self._reset_storages()
 
@@ -345,7 +353,13 @@ def _init(self, env: CallbackEnv) -> None:
 
         self._reset_storages()
 
-        n_metrics = len({m[1] for m in env.evaluation_result_list})
+        list_metrics = {m[1] for m in env.evaluation_result_list}
+        if (self.chosen_metric is not None) and (self.chosen_metric not in list_metrics):
+            error_message = f"""Chosen callback metric: {self.chosen_metric} is not in the evaluation list.
+            The list of available metrics for early stopping is: {list_metrics}."""
+            raise ValueError(error_message)
+        
+        n_metrics = len(list_metrics)
         n_datasets = len(env.evaluation_result_list) // n_metrics
         if isinstance(self.min_delta, list):
             if not all(t >= 0 for t in self.min_delta):
@@ -363,11 +377,14 @@ def _init(self, env: CallbackEnv) -> None:
                     raise ValueError("Must provide a single value for min_delta or as many as metrics.")
                 if self.first_metric_only and self.verbose:
                     _log_info(f"Using only {self.min_delta[0]} as early stopping min_delta.")
+                if (self.chosen_metric is not None) and self.verbose:
+                    index_chosen_metric = list_metrics.index(self.chosen_metric)
+                    _log_info(f"Using only {self.min_delta[index_chosen_metric]} as early stopping min_delta.")
                 deltas = self.min_delta * n_datasets
         else:
             if self.min_delta < 0:
                 raise ValueError("Early stopping min_delta must be non-negative.")
-            if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and self.verbose:
+            if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and (self.index_chosen_metric is None) and self.verbose:
                 _log_info(f"Using {self.min_delta} as min_delta for all metrics.")
             deltas = [self.min_delta] * n_datasets * n_metrics
 
@@ -391,6 +408,8 @@ def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str]
                 )
                 if self.first_metric_only:
                     _log_info(f"Evaluated only: {eval_name_splitted[-1]}")
+                if self.chosen_metric is not None:
+                    _log_info(f"Evaluated only: {self.chosen_metric}")
             raise EarlyStopException(self.best_iter[i], self.best_score_list[i])
 
     def __call__(self, env: CallbackEnv) -> None:
@@ -418,6 +437,8 @@ def __call__(self, env: CallbackEnv) -> None:
             eval_name_splitted = env.evaluation_result_list[i][1].split(" ")
             if self.first_metric_only and self.first_metric != eval_name_splitted[-1]:
                 continue  # use only the first metric for early stopping
+            if (self.chosen_metric is not None) and self.chosen_metric != eval_name_splitted[-1]:
+                continue  # use only the first metric for early stopping
             if self._is_train_set(
                 ds_name=env.evaluation_result_list[i][0],
                 eval_name=eval_name_splitted[0],
@@ -432,6 +453,8 @@ def __call__(self, env: CallbackEnv) -> None:
                     _log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}")
                     if self.first_metric_only:
                         _log_info(f"Evaluated only: {eval_name_splitted[-1]}")
+                    if self.chosen_metric is not None:
+                        _log_info(f"Evaluated only: {self.chosen_metric}")
                 raise EarlyStopException(self.best_iter[i], self.best_score_list[i])
             self._final_iteration_check(env, eval_name_splitted, i)
 
@@ -453,6 +476,7 @@ def early_stopping(
     first_metric_only: bool = False,
     verbose: bool = True,
     min_delta: Union[float, List[float]] = 0.0,
+    chosen_metric: str = None,
 ) -> _EarlyStoppingCallback:
     """Create a callback that activates early stopping.
 
@@ -492,4 +516,5 @@ def early_stopping(
         first_metric_only=first_metric_only,
         verbose=verbose,
         min_delta=min_delta,
+        chosen_metric=chosen_metric
     )

From dbdc17c43de32431acb5e91b18d347d51b6d2694 Mon Sep 17 00:00:00 2001
From: sami-ka <sami.kaddani@gmail.com>
Date: Sat, 20 Apr 2024 22:52:57 +0200
Subject: [PATCH 2/4] feat: add chosen_metric_early_stopping parameter to
 create early stopping callback through parameters

---
 python-package/lightgbm/engine.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index a19b29e7b584..e074f4b7787e 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -191,7 +191,13 @@ def train(
     if params["early_stopping_round"] is None:
         params.pop("early_stopping_round")
     first_metric_only = params.get("first_metric_only", False)
-
+    chosen_metric_early_stopping = params.get("chosen_metric_early_stopping", None)
+    # Test if both parameters are used
+    if (first_metric_only + (chosen_metric_early_stopping is not None)) == 2:
+        error_message = """
+        Only one of first_metric_only and chosen_metric_early_stopping parameters should be used"""
+        raise ValueError(error_message)
+    
     predictor: Optional[_InnerPredictor] = None
     if isinstance(init_model, (str, Path)):
         predictor = _InnerPredictor.from_model_file(model_file=init_model, pred_parameter=params)
@@ -241,6 +247,7 @@ def train(
             callback.early_stopping(
                 stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type]
                 first_metric_only=first_metric_only,
+                chosen_metric=chosen_metric_early_stopping,
                 verbose=_choose_param_value(
                     main_param_name="verbosity",
                     params=params,
@@ -716,6 +723,12 @@ def cv(
     if params["early_stopping_round"] is None:
         params.pop("early_stopping_round")
     first_metric_only = params.get("first_metric_only", False)
+    chosen_metric_early_stopping = params.get("chosen_metric_early_stopping", None)
+    # Test if both parameters are used
+    if (first_metric_only + (chosen_metric_early_stopping is not None)) == 2:
+        error_message = """
+        Only one of first_metric_only and chosen_metric_early_stopping parameters should be used"""
+        raise ValueError(error_message)
 
     if isinstance(init_model, (str, Path)):
         predictor = _InnerPredictor.from_model_file(
@@ -765,6 +778,7 @@ def cv(
             callback.early_stopping(
                 stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type]
                 first_metric_only=first_metric_only,
+                chosen_metric=chosen_metric_early_stopping,
                 verbose=_choose_param_value(
                     main_param_name="verbosity",
                     params=params,

From 53698f73215edb49e394774289668c19e965f253 Mon Sep 17 00:00:00 2001
From: sami-ka <sami.kaddani@gmail.com>
Date: Sat, 20 Apr 2024 22:54:00 +0200
Subject: [PATCH 3/4] feat: add chosen_metric_early_stopping to the list of
 parameters

---
 include/LightGBM/config.h | 3 +++
 src/io/config_auto.cpp    | 1 +
 2 files changed, 4 insertions(+)

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index a2f1a02370b7..bc05bdae9215 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -397,6 +397,9 @@ struct Config {
   // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to ``true``, if you want to use only the first metric for early stopping
   bool first_metric_only = false;
 
+  // desc = LightGBM allows you to provide multiple evaluation metrics. Set this to a specific metric name, if you want to use only this metric for early stopping
+  std::string chosen_metric_early_stopping;
+
   // alias = max_tree_output, max_leaf_output
   // desc = used to limit the max output of tree leaves
   // desc = ``<= 0`` means no constraint
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 394614af3f33..188f6fad08e8 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -815,6 +815,7 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
     {"extra_seed", {}},
     {"early_stopping_round", {"early_stopping_rounds", "early_stopping", "n_iter_no_change"}},
     {"first_metric_only", {}},
+    {"chosen_metric_early_stopping", {}},
     {"max_delta_step", {"max_tree_output", "max_leaf_output"}},
     {"lambda_l1", {"reg_alpha", "l1_regularization"}},
     {"lambda_l2", {"reg_lambda", "lambda", "l2_regularization"}},

From 44fcae278557b5793bf96bb763f40f0a0136a613 Mon Sep 17 00:00:00 2001
From: sami-ka <sami.kaddani@gmail.com>
Date: Fri, 26 Apr 2024 23:10:23 +0200
Subject: [PATCH 4/4] refactor: replace argument name and add chekcs +warning

---
 python-package/lightgbm/callback.py | 54 +++++++++++++++++------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py
index a89d311e0d60..2dfa0d181c8c 100644
--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -279,15 +279,20 @@ def __init__(
         first_metric_only: bool = False,
         verbose: bool = True,
         min_delta: Union[float, List[float]] = 0.0,
-        chosen_metric: str = None,
+        metric_name: Optional[str] = None,
     ) -> None:
         self.enabled = _should_enable_early_stopping(stopping_rounds)
 
         # Test if both parameters are used
-        if (first_metric_only + (chosen_metric is not None)) == 2:
-            error_message = """
-            Only one of first_metric_only and chosen_metric parameters should be used"""
-            raise ValueError(error_message)
+        if first_metric_only and (metric_name is not None):
+            error_msg = """
+            Only one of 'first_metric_only' and 'chosen_metric' should be used"""
+            raise ValueError(error_msg)
+
+        # If metric_name is used, min_delta must be a scalar
+        if isinstance(min_delta, list) and (metric_name is not None):
+            error_msg = "Use a scalar value for 'min_delta' when using 'chosen_metric'."
+            raise ValueError(error_msg)
 
         self.order = 30
         self.before_iteration = False
@@ -296,7 +301,7 @@ def __init__(
         self.first_metric_only = first_metric_only
         self.verbose = verbose
         self.min_delta = min_delta
-        self.chosen_metric = chosen_metric
+        self.metric_name = metric_name
 
         self._reset_storages()
 
@@ -353,13 +358,13 @@ def _init(self, env: CallbackEnv) -> None:
 
         self._reset_storages()
 
-        list_metrics = {m[1] for m in env.evaluation_result_list}
-        if (self.chosen_metric is not None) and (self.chosen_metric not in list_metrics):
-            error_message = f"""Chosen callback metric: {self.chosen_metric} is not in the evaluation list.
-            The list of available metrics for early stopping is: {list_metrics}."""
+        set_metrics = {m[1] for m in env.evaluation_result_list}
+        if (self.metric_name is not None) and (self.metric_name not in set_metrics):
+            error_message = f"""Chosen callback metric:{self.metric_name} is not in the evaluation list.
+            The set of available metrics for early stopping is : {set_metrics}."""
             raise ValueError(error_message)
         
-        n_metrics = len(list_metrics)
+        n_metrics = len(set_metrics)
         n_datasets = len(env.evaluation_result_list) // n_metrics
         if isinstance(self.min_delta, list):
             if not all(t >= 0 for t in self.min_delta):
@@ -377,14 +382,11 @@ def _init(self, env: CallbackEnv) -> None:
                     raise ValueError("Must provide a single value for min_delta or as many as metrics.")
                 if self.first_metric_only and self.verbose:
                     _log_info(f"Using only {self.min_delta[0]} as early stopping min_delta.")
-                if (self.chosen_metric is not None) and self.verbose:
-                    index_chosen_metric = list_metrics.index(self.chosen_metric)
-                    _log_info(f"Using only {self.min_delta[index_chosen_metric]} as early stopping min_delta.")
                 deltas = self.min_delta * n_datasets
         else:
             if self.min_delta < 0:
                 raise ValueError("Early stopping min_delta must be non-negative.")
-            if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and (self.index_chosen_metric is None) and self.verbose:
+            if self.min_delta > 0 and n_metrics > 1 and not self.first_metric_only and (self.metric_name is None) and self.verbose:
                 _log_info(f"Using {self.min_delta} as min_delta for all metrics.")
             deltas = [self.min_delta] * n_datasets * n_metrics
 
@@ -408,8 +410,8 @@ def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str]
                 )
                 if self.first_metric_only:
                     _log_info(f"Evaluated only: {eval_name_splitted[-1]}")
-                if self.chosen_metric is not None:
-                    _log_info(f"Evaluated only: {self.chosen_metric}")
+                if self.metric_name is not None:
+                    _log_info(f"Evaluated only: {self.metric_name}")
             raise EarlyStopException(self.best_iter[i], self.best_score_list[i])
 
     def __call__(self, env: CallbackEnv) -> None:
@@ -437,7 +439,7 @@ def __call__(self, env: CallbackEnv) -> None:
             eval_name_splitted = env.evaluation_result_list[i][1].split(" ")
             if self.first_metric_only and self.first_metric != eval_name_splitted[-1]:
                 continue  # use only the first metric for early stopping
-            if (self.chosen_metric is not None) and self.chosen_metric != eval_name_splitted[-1]:
+            if (self.metric_name is not None) and self.metric_name != eval_name_splitted[-1]:
                 continue  # use only the first metric for early stopping
             if self._is_train_set(
                 ds_name=env.evaluation_result_list[i][0],
@@ -453,8 +455,8 @@ def __call__(self, env: CallbackEnv) -> None:
                     _log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}")
                     if self.first_metric_only:
                         _log_info(f"Evaluated only: {eval_name_splitted[-1]}")
-                    if self.chosen_metric is not None:
-                        _log_info(f"Evaluated only: {self.chosen_metric}")
+                    if self.metric_name is not None:
+                        _log_info(f"Evaluated only: {self.metric_name}")
                 raise EarlyStopException(self.best_iter[i], self.best_score_list[i])
             self._final_iteration_check(env, eval_name_splitted, i)
 
@@ -476,7 +478,7 @@ def early_stopping(
     first_metric_only: bool = False,
     verbose: bool = True,
     min_delta: Union[float, List[float]] = 0.0,
-    chosen_metric: str = None,
+    metric_name: Optional[str] = None,
 ) -> _EarlyStoppingCallback:
     """Create a callback that activates early stopping.
 
@@ -511,10 +513,18 @@ def early_stopping(
     callback : _EarlyStoppingCallback
         The callback that activates early stopping.
     """
+
+    if first_metric_only:
+        warning_message = """
+        'first_metric_only' parameter is deprecated.
+        It will be removed in a future release of lightgbm.
+        """
+        _log_warning(warning_message)
+
     return _EarlyStoppingCallback(
         stopping_rounds=stopping_rounds,
         first_metric_only=first_metric_only,
         verbose=verbose,
         min_delta=min_delta,
-        chosen_metric=chosen_metric
+        metric_name=metric_name
     )