From 35c2290a9574fede4d0ceac5b0ac5ee524af29f3 Mon Sep 17 00:00:00 2001 From: Xiaojing Zhang <80235074+zhangxjohn@users.noreply.github.com> Date: Fri, 11 Mar 2022 09:57:14 +0800 Subject: [PATCH] Rebuild TSFinalTrainStep. --- hyperts/experiment.py | 4 +++- hyperts/framework/compete.py | 30 +++++++++++++++++++++++++--- hyperts/framework/dl/models/_base.py | 9 +++++---- hyperts/utils/__init__.py | 2 +- hyperts/utils/consts.py | 3 ++- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/hyperts/experiment.py b/hyperts/experiment.py index a777c3c..38d2471 100644 --- a/hyperts/experiment.py +++ b/hyperts/experiment.py @@ -282,7 +282,7 @@ def append_early_stopping_callbacks(cbs): raise ValueError("Forecast task 'timestamp' cannot be None.") if task in consts.TASK_LIST_FORECAST and covariables is None: - logger.warning('If the data contains covariables, specify the covariable column names.') + logger.info('If the data contains covariables, specify the covariable column names.') if mode != consts.Mode_STATS: try: @@ -358,6 +358,8 @@ def append_early_stopping_callbacks(cbs): if freq is None: freq = tb.infer_ts_freq(X_train, ts_name=timestamp) + if freq is None: + raise RuntimeError('Unable to infer correct frequency, please check data or specify frequency.') # 7. Covarite Transformer if covariables is not None: diff --git a/hyperts/framework/compete.py b/hyperts/framework/compete.py index 713bc15..651c072 100644 --- a/hyperts/framework/compete.py +++ b/hyperts/framework/compete.py @@ -60,10 +60,10 @@ def fit_transform(self, hyper_model, X_train, y_train, X_test=None, X_eval=None, # 4. eval variables data process if X_eval is None or y_eval is None: if self.task in consts.TASK_LIST_FORECAST: - if X_train.shape[0] <= 2*consts.DEFAULT_FORECAST_EVAL_SIZE or isinstance(self.experiment.eval_size, int): + if int(X_train.shape[0]*consts.DEFAULT_MIN_EVAL_SIZE)<=10 or isinstance(self.experiment.eval_size, int): eval_horizon = self.experiment.eval_size else: - eval_horizon = consts.DEFAULT_FORECAST_EVAL_SIZE + eval_horizon = consts.DEFAULT_MIN_EVAL_SIZE X_train, X_eval, y_train, y_eval = \ tb.temporal_train_test_split(X_train, y_train, test_size=eval_horizon) self.step_progress('split into train set and eval set') @@ -284,6 +284,30 @@ def get_ensemble(self, estimators, X_train, y_train): return tb.greedy_ensemble(ensemble_task, estimators, scoring=self.scorer, ensemble_size=self.ensemble_size) +class TSFinalTrainStep(FinalTrainStep): + def __init__(self, experiment, name, mode=None, retrain_on_wholedata=False): + super().__init__(experiment, name) + + self.mode = mode + self.retrain_on_wholedata = retrain_on_wholedata + + def build_estimator(self, hyper_model, X_train, y_train, X_test=None, X_eval=None, y_eval=None, **kwargs): + if self.retrain_on_wholedata: + trial = hyper_model.get_best_trial() + tb = get_tool_box(X_train, X_eval) + X_all = tb.concat_df([X_train, X_eval], axis=0) + y_all = tb.concat_df([y_train, y_eval], axis=0) + + if self.mode != consts.Mode_STATS: + kwargs.update({'epochs': consts.FINAL_TRAINING_EPOCHS}) + + estimator = hyper_model.final_train(trial.space_sample, X_all, y_all, **kwargs) + else: + estimator = hyper_model.load_estimator(hyper_model.get_best_trial().model_file) + + return estimator + + class TSPipeline: """Pipeline Extension for Time Series Analysis. @@ -731,7 +755,7 @@ def __init__(self, hyper_model, X_train, y_train, X_eval=None, y_eval=None, X_te # ensemble_size=ensemble_size)) # else: # final train step - steps.append(FinalTrainStep(self, consts.StepName_FINAL_TRAINING, retrain_on_wholedata=False)) + steps.append(TSFinalTrainStep(self, consts.StepName_FINAL_TRAINING, retrain_on_wholedata=True)) # ignore warnings import warnings diff --git a/hyperts/framework/dl/models/_base.py b/hyperts/framework/dl/models/_base.py index 4500b2e..2009301 100644 --- a/hyperts/framework/dl/models/_base.py +++ b/hyperts/framework/dl/models/_base.py @@ -191,7 +191,8 @@ def fit(self, validation_freq=1, max_queue_size=10, workers=1, - use_multiprocessing=False): + use_multiprocessing=False, + **kwargs): """Trains the model for a fixed number of epochs (iterations on a dataset). Parameters @@ -666,15 +667,15 @@ def _from_tensor_slices(self, X, y, batch_size, epochs=None, shuffle=False, drop dataset = tf.data.Dataset.from_tensor_slices((data, y)) - if epochs is not None: - dataset = dataset.repeat(epochs) - if shuffle: dataset = dataset.shuffle(y.shape[0]) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder and y.shape[0] >= batch_size) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) + if epochs is not None: + dataset = dataset.repeat(epochs+1) + return dataset def _preprocessor(self, X, y): diff --git a/hyperts/utils/__init__.py b/hyperts/utils/__init__.py index 1203c3f..128f854 100644 --- a/hyperts/utils/__init__.py +++ b/hyperts/utils/__init__.py @@ -26,7 +26,7 @@ def set_random_state(seed=9527, mode=consts.Mode_STATS): random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) - os.environ['TF_DETERMINISTIC_OPS'] = '0' + os.environ['TF_DETERMINISTIC_OPS'] = '1' np.random.seed(seed) if mode == consts.Mode_DL: diff --git a/hyperts/utils/consts.py b/hyperts/utils/consts.py index c66ac53..e6fe4cc 100644 --- a/hyperts/utils/consts.py +++ b/hyperts/utils/consts.py @@ -3,8 +3,9 @@ TIMESTAMP = 'timestamp' DEFAULT_EVAL_SIZE = 0.2 -DEFAULT_FORECAST_EVAL_SIZE = 10 +DEFAULT_MIN_EVAL_SIZE = 0.05 NAN_DROP_SIZE = 0.6 +FINAL_TRAINING_EPOCHS = 120 Task_UNIVARIATE_FORECAST = 'univariate-forecast' Task_MULTIVARIATE_FORECAST = 'multivariate-forecast'