From 35c2290a9574fede4d0ceac5b0ac5ee524af29f3 Mon Sep 17 00:00:00 2001
From: Xiaojing Zhang <80235074+zhangxjohn@users.noreply.github.com>
Date: Fri, 11 Mar 2022 09:57:14 +0800
Subject: [PATCH] Rebuild TSFinalTrainStep.

---
 hyperts/experiment.py                |  4 +++-
 hyperts/framework/compete.py         | 30 +++++++++++++++++++++++++---
 hyperts/framework/dl/models/_base.py |  9 +++++----
 hyperts/utils/__init__.py            |  2 +-
 hyperts/utils/consts.py              |  3 ++-
 5 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/hyperts/experiment.py b/hyperts/experiment.py
index a777c3c..38d2471 100644
--- a/hyperts/experiment.py
+++ b/hyperts/experiment.py
@@ -282,7 +282,7 @@ def append_early_stopping_callbacks(cbs):
         raise ValueError("Forecast task 'timestamp' cannot be None.")
 
     if task in consts.TASK_LIST_FORECAST and covariables is None:
-        logger.warning('If the data contains covariables, specify the covariable column names.')
+        logger.info('If the data contains covariables, specify the covariable column names.')
 
     if mode != consts.Mode_STATS:
         try:
@@ -358,6 +358,8 @@ def append_early_stopping_callbacks(cbs):
 
         if freq is None:
             freq = tb.infer_ts_freq(X_train, ts_name=timestamp)
+            if freq is None:
+                raise RuntimeError('Unable to infer correct frequency, please check data or specify frequency.')
 
     # 7. Covarite Transformer
     if covariables is not None:
diff --git a/hyperts/framework/compete.py b/hyperts/framework/compete.py
index 713bc15..651c072 100644
--- a/hyperts/framework/compete.py
+++ b/hyperts/framework/compete.py
@@ -60,10 +60,10 @@ def fit_transform(self, hyper_model, X_train, y_train, X_test=None, X_eval=None,
         # 4. eval variables data process
         if X_eval is None or y_eval is None:
             if self.task in consts.TASK_LIST_FORECAST:
-                if X_train.shape[0] <= 2*consts.DEFAULT_FORECAST_EVAL_SIZE or isinstance(self.experiment.eval_size, int):
+                if int(X_train.shape[0]*consts.DEFAULT_MIN_EVAL_SIZE)<=10 or isinstance(self.experiment.eval_size, int):
                     eval_horizon = self.experiment.eval_size
                 else:
-                    eval_horizon = consts.DEFAULT_FORECAST_EVAL_SIZE
+                    eval_horizon = consts.DEFAULT_MIN_EVAL_SIZE
                 X_train, X_eval, y_train, y_eval = \
                     tb.temporal_train_test_split(X_train, y_train, test_size=eval_horizon)
                 self.step_progress('split into train set and eval set')
@@ -284,6 +284,30 @@ def get_ensemble(self, estimators, X_train, y_train):
         return tb.greedy_ensemble(ensemble_task, estimators, scoring=self.scorer, ensemble_size=self.ensemble_size)
 
 
+class TSFinalTrainStep(FinalTrainStep):
+    def __init__(self, experiment, name, mode=None, retrain_on_wholedata=False):
+        super().__init__(experiment, name)
+
+        self.mode = mode
+        self.retrain_on_wholedata = retrain_on_wholedata
+
+    def build_estimator(self, hyper_model, X_train, y_train, X_test=None, X_eval=None, y_eval=None, **kwargs):
+        if self.retrain_on_wholedata:
+            trial = hyper_model.get_best_trial()
+            tb = get_tool_box(X_train, X_eval)
+            X_all = tb.concat_df([X_train, X_eval], axis=0)
+            y_all = tb.concat_df([y_train, y_eval], axis=0)
+
+            if self.mode != consts.Mode_STATS:
+                kwargs.update({'epochs': consts.FINAL_TRAINING_EPOCHS})
+
+            estimator = hyper_model.final_train(trial.space_sample, X_all, y_all, **kwargs)
+        else:
+            estimator = hyper_model.load_estimator(hyper_model.get_best_trial().model_file)
+
+        return estimator
+
+
 class TSPipeline:
     """Pipeline Extension for Time Series Analysis.
 
@@ -731,7 +755,7 @@ def __init__(self, hyper_model, X_train, y_train, X_eval=None, y_eval=None, X_te
         #                                 ensemble_size=ensemble_size))
         # else:
         # final train step
-        steps.append(FinalTrainStep(self, consts.StepName_FINAL_TRAINING, retrain_on_wholedata=False))
+        steps.append(TSFinalTrainStep(self, consts.StepName_FINAL_TRAINING, retrain_on_wholedata=True))
 
         # ignore warnings
         import warnings
diff --git a/hyperts/framework/dl/models/_base.py b/hyperts/framework/dl/models/_base.py
index 4500b2e..2009301 100644
--- a/hyperts/framework/dl/models/_base.py
+++ b/hyperts/framework/dl/models/_base.py
@@ -191,7 +191,8 @@ def fit(self,
             validation_freq=1,
             max_queue_size=10,
             workers=1,
-            use_multiprocessing=False):
+            use_multiprocessing=False,
+            **kwargs):
         """Trains the model for a fixed number of epochs (iterations on a dataset).
 
         Parameters
@@ -666,15 +667,15 @@ def _from_tensor_slices(self, X, y, batch_size, epochs=None, shuffle=False, drop
 
         dataset = tf.data.Dataset.from_tensor_slices((data, y))
 
-        if epochs is not None:
-            dataset = dataset.repeat(epochs)
-
         if shuffle:
             dataset = dataset.shuffle(y.shape[0])
 
         dataset = dataset.batch(batch_size, drop_remainder=drop_remainder and y.shape[0] >= batch_size)
         dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
 
+        if epochs is not None:
+            dataset = dataset.repeat(epochs+1)
+
         return dataset
 
     def _preprocessor(self, X, y):
diff --git a/hyperts/utils/__init__.py b/hyperts/utils/__init__.py
index 1203c3f..128f854 100644
--- a/hyperts/utils/__init__.py
+++ b/hyperts/utils/__init__.py
@@ -26,7 +26,7 @@ def set_random_state(seed=9527, mode=consts.Mode_STATS):
 
     random.seed(seed)
     os.environ['PYTHONHASHSEED'] = str(seed)
-    os.environ['TF_DETERMINISTIC_OPS'] = '0'
+    os.environ['TF_DETERMINISTIC_OPS'] = '1'
     np.random.seed(seed)
 
     if mode == consts.Mode_DL:
diff --git a/hyperts/utils/consts.py b/hyperts/utils/consts.py
index c66ac53..e6fe4cc 100644
--- a/hyperts/utils/consts.py
+++ b/hyperts/utils/consts.py
@@ -3,8 +3,9 @@
 
 TIMESTAMP                          = 'timestamp'
 DEFAULT_EVAL_SIZE                  = 0.2
-DEFAULT_FORECAST_EVAL_SIZE         = 10
+DEFAULT_MIN_EVAL_SIZE              = 0.05
 NAN_DROP_SIZE                      = 0.6
+FINAL_TRAINING_EPOCHS              = 120
 
 Task_UNIVARIATE_FORECAST           = 'univariate-forecast'
 Task_MULTIVARIATE_FORECAST         = 'multivariate-forecast'