From 9819cf178397de4d47dc321434cb5da57adc7168 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Jan 2025 18:31:53 -0800 Subject: [PATCH 1/2] Added early stopping & patience parameter, needs cleanup --- detectron2/engine/hooks.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/detectron2/engine/hooks.py b/detectron2/engine/hooks.py index fc37af0fd3..a14da69ce1 100644 --- a/detectron2/engine/hooks.py +++ b/detectron2/engine/hooks.py @@ -220,6 +220,7 @@ def __init__( eval_period: int, checkpointer: Checkpointer, val_metric: str, + patience: int, # Added by Peter mode: str = "max", file_prefix: str = "model_best", ) -> None: @@ -235,6 +236,7 @@ def __init__( self._logger = logging.getLogger(__name__) self._period = eval_period self._val_metric = val_metric + self._patience = patience # Added by Peter assert mode in [ "max", "min", @@ -297,6 +299,19 @@ def after_step(self): and next_iter != self.trainer.max_iter ): self._best_checking() + + # Early stopping code added by Peter + if (self.best_iter is None): + self._logger.warning("Best iteration is None. This is likely due to the metric not being computed yet.") + return + iterations_without_improvement = (self.trainer.iter-self.best_iter) // self._period + if(iterations_without_improvement == 0): + return + + self._logger.warning(f"Model has not improved for {iterations_without_improvement} evaluation cycles. RAW={self.trainer.iter-self.best_iter}Max is {self._patience}") + if(iterations_without_improvement > self._patience): + self._logger.warning(f"Early stopping at iteration: {self.trainer.iter}") + raise Exception("Early stopping exception. Terminating 'gracefully'") def after_train(self): # same conditions as `EvalHook` From 3ccb191685371f2007e8e5f2af33ee9813bda87c Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 23 Jan 2025 11:49:05 -0800 Subject: [PATCH 2/2] Code cleanup --- detectron2/engine/hooks.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/detectron2/engine/hooks.py b/detectron2/engine/hooks.py index a14da69ce1..26bc045d9f 100644 --- a/detectron2/engine/hooks.py +++ b/detectron2/engine/hooks.py @@ -220,9 +220,9 @@ def __init__( eval_period: int, checkpointer: Checkpointer, val_metric: str, - patience: int, # Added by Peter mode: str = "max", file_prefix: str = "model_best", + patience: int = None, ) -> None: """ Args: @@ -232,11 +232,12 @@ def __init__( mode (str): one of {'max', 'min'}. controls whether the chosen val metric should be maximized or minimized, e.g. for "bbox/AP50" it should be "max" file_prefix (str): the prefix of checkpoint's filename, defaults to "model_best" + patience (int): the number of evaluation cycles without improvement before early stopping """ self._logger = logging.getLogger(__name__) self._period = eval_period self._val_metric = val_metric - self._patience = patience # Added by Peter + self._patience = patience assert mode in [ "max", "min", @@ -300,18 +301,23 @@ def after_step(self): ): self._best_checking() - # Early stopping code added by Peter - if (self.best_iter is None): - self._logger.warning("Best iteration is None. This is likely due to the metric not being computed yet.") + if self._patience is None or self.best_iter is None: return - iterations_without_improvement = (self.trainer.iter-self.best_iter) // self._period - if(iterations_without_improvement == 0): - return - self._logger.warning(f"Model has not improved for {iterations_without_improvement} evaluation cycles. RAW={self.trainer.iter-self.best_iter}Max is {self._patience}") + iterations_without_improvement = (self.trainer.iter-self.best_iter) // self._period + if(iterations_without_improvement > self._patience): - self._logger.warning(f"Early stopping at iteration: {self.trainer.iter}") - raise Exception("Early stopping exception. Terminating 'gracefully'") + self._logger.info( + f"Early stopping triggered at iteration {self.trainer.iter} due to lack of improvement " + f"after {iterations_without_improvement} cycles." + ) + raise Exception("Early stopping triggered. Terminating training process.") + + if(iterations_without_improvement > 0): + self._logger.info( + f"No improvement detected in the last {iterations_without_improvement} evaluation cycles. " + f"{self._patience - iterations_without_improvement} cycles remain before early stopping." + ) def after_train(self): # same conditions as `EvalHook`