From f4fed490239dbb22eb9c4bddb8b50c51eb39a16a Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Wed, 24 May 2023 19:16:52 +0900 Subject: [PATCH 1/6] fix predict call --- lightwood/mixer/nhits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightwood/mixer/nhits.py b/lightwood/mixer/nhits.py index 4b1f892b3..6979f0fb7 100644 --- a/lightwood/mixer/nhits.py +++ b/lightwood/mixer/nhits.py @@ -160,7 +160,7 @@ def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], group_ends = [] for group in input_df['unique_id'].unique(): group_ends.append(input_df[input_df['unique_id'] == group]['index'].iloc[-1]) - fcst = self.model.predict(futr_df=input_df).reset_index() + fcst = self.model.predict(input_df).reset_index() for gidx, group in zip(group_ends, input_df['unique_id'].unique()): for pred_col, target_col in zip(pred_cols, target_cols): From f81a7b18a022b0c5fe3446caca469a0cdbcd06d9 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 25 May 2023 11:48:01 +0900 Subject: [PATCH 2/6] version bump: 23.5.1.1 --- lightwood/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightwood/__about__.py b/lightwood/__about__.py index 7ddb2a3f7..42dc89079 100644 --- a/lightwood/__about__.py +++ b/lightwood/__about__.py @@ -1,6 +1,6 @@ __title__ = 'lightwood' __package_name__ = 'lightwood' -__version__ = '23.5.1.0' +__version__ = '23.5.1.1' __description__ = "Lightwood is a toolkit for automatic machine learning model building" __email__ = "community@mindsdb.com" __author__ = 'MindsDB Inc' From ab693577f0fdfc81c08f362d44497ef56830eb4a Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 25 May 2023 12:40:26 +0900 Subject: [PATCH 3/6] fix levels args --- lightwood/mixer/nhits.py | 21 ++++++++++++++++----- tests/unit_tests/mixer/test_nhits.py | 8 +++++--- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lightwood/mixer/nhits.py b/lightwood/mixer/nhits.py index 6979f0fb7..c00ad2fd1 100644 --- a/lightwood/mixer/nhits.py +++ b/lightwood/mixer/nhits.py @@ -55,7 +55,10 @@ def __init__( self.ts_analysis = ts_analysis self.grouped_by = ['__default'] if not ts_analysis['tss'].group_by else ts_analysis['tss'].group_by self.train_args = train_args.get('trainer_args', {}) if train_args else {} - self.conf_level = self.train_args.pop('conf_level', 90) + self.conf_level = self.train_args.pop('conf_level', [90]) + for level in self.conf_level: + assert 0 <= level <= 100, f'A provided level is not in the [0, 100] range (found: {level})' + assert isinstance(level, int), f'A provided level is not an integer (found: {level})' self.pretrained = pretrained self.base_url = 'https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/' @@ -120,7 +123,7 @@ def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None: new_window = max(1, n_time - self.horizon - 1) self.window = new_window log.info(f'Window {self.window} is too long for data provided (group: {df[gby].value_counts()[::-1].index[0]}), reducing window to {new_window}.') # noqa - model = NHITS(h=n_time_out, input_size=self.window, **self.train_args, loss=MQLoss(level=[self.conf_level])) + model = NHITS(h=n_time_out, input_size=self.window, **self.train_args, loss=MQLoss(level=self.conf_level)) self.model = NeuralForecast(models=[model], freq=self.ts_analysis['sample_freqs']['__default']) self.model.fit(df=Y_df, val_size=n_ts_val) log.info('Successfully trained N-HITS forecasting model.') @@ -152,8 +155,16 @@ def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], input_df = self._make_initial_df(deepcopy(ds.data_frame)) ydf['index'] = input_df['index'] - pred_cols = [f'NHITS-lo-{self.conf_level}', 'NHITS-median', f'NHITS-hi-{self.conf_level}'] - target_cols = ['lower', 'prediction', 'upper'] + pred_cols = ['NHITS-median'] + + # provided quantile must match one of the training levels, else we default to the largest one of these + if args.fixed_confidence is not None and int(args.fixed_confidence*100) in self.conf_level: + level = int(args.fixed_confidence*100) + else: + level = max(self.conf_level) + pred_cols.extend([f'NHITS-lo-{level}', f'NHITS-hi-{level}']) + + target_cols = ['prediction', 'lower', 'upper'] for target_col in target_cols: ydf[target_col] = [[0 for _ in range(self.horizon)] for _ in range(len(ydf))] # zero-filled arrays @@ -168,7 +179,7 @@ def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], idx = ydf[ydf['index'] == gidx].index[0] ydf.at[idx, target_col] = group_preds - ydf['confidence'] = 0.9 # TODO: set through `args` + ydf['confidence'] = level/100 return ydf def _make_initial_df(self, df): diff --git a/tests/unit_tests/mixer/test_nhits.py b/tests/unit_tests/mixer/test_nhits.py index fcb23113a..04264f20f 100644 --- a/tests/unit_tests/mixer/test_nhits.py +++ b/tests/unit_tests/mixer/test_nhits.py @@ -15,8 +15,10 @@ def get_submodels(self): 'module': 'NHitsMixer', 'args': { 'train_args': { - 'trainer_args': {'max_epochs': 10}, - 'conf_levels': [90], + 'trainer_args': { + 'max_epochs': 10, + 'conf_level': [90, 95], + }, } } }, @@ -39,4 +41,4 @@ def test_0_regression(self): predictor = predictor_from_code(code) predictor.learn(df) - predictor.predict(df) + predictor.predict(df, args={'fixed_confidence': 0.9}) From b1aee11688d8187a41178df181a774e39f1ce5b5 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Thu, 25 May 2023 12:41:08 +0900 Subject: [PATCH 4/6] lint: flake8 --- lightwood/mixer/nhits.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightwood/mixer/nhits.py b/lightwood/mixer/nhits.py index c00ad2fd1..72fe29a12 100644 --- a/lightwood/mixer/nhits.py +++ b/lightwood/mixer/nhits.py @@ -158,8 +158,8 @@ def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], pred_cols = ['NHITS-median'] # provided quantile must match one of the training levels, else we default to the largest one of these - if args.fixed_confidence is not None and int(args.fixed_confidence*100) in self.conf_level: - level = int(args.fixed_confidence*100) + if args.fixed_confidence is not None and int(args.fixed_confidence * 100) in self.conf_level: + level = int(args.fixed_confidence * 100) else: level = max(self.conf_level) pred_cols.extend([f'NHITS-lo-{level}', f'NHITS-hi-{level}']) @@ -179,7 +179,7 @@ def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs], idx = ydf[ydf['index'] == gidx].index[0] ydf.at[idx, target_col] = group_preds - ydf['confidence'] = level/100 + ydf['confidence'] = level / 100 return ydf def _make_initial_df(self, df): From 1f348c02f0f4fbab8d440fc1737769ac8932c1b0 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Fri, 26 May 2023 15:51:27 +0300 Subject: [PATCH 5/6] update dill --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 234cbc961..a07b5a275 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ setuptools >=21.2.1 wheel >=0.32.2 scikit-learn >=1.0.0, <=1.0.2 dataclasses_json >=0.5.4 -dill ==0.3.4 +dill ==0.3.6 sktime >=0.14.0,<0.15.0 statsforecast ==1.4.0 torch_optimizer ==0.1.0 From bd5731ffbddc8a5cf6176ad65b96af939e78fffd Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Tue, 30 May 2023 16:26:35 +0900 Subject: [PATCH 6/6] early stop by default --- lightwood/mixer/nhits.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lightwood/mixer/nhits.py b/lightwood/mixer/nhits.py index 72fe29a12..7d22c4764 100644 --- a/lightwood/mixer/nhits.py +++ b/lightwood/mixer/nhits.py @@ -55,6 +55,7 @@ def __init__( self.ts_analysis = ts_analysis self.grouped_by = ['__default'] if not ts_analysis['tss'].group_by else ts_analysis['tss'].group_by self.train_args = train_args.get('trainer_args', {}) if train_args else {} + self.train_args['early_stop_patience_steps'] = self.train_args.get('early_stop_patience_steps', 10) self.conf_level = self.train_args.pop('conf_level', [90]) for level in self.conf_level: assert 0 <= level <= 100, f'A provided level is not in the [0, 100] range (found: {level})' @@ -131,7 +132,7 @@ def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None: def partial_fit(self, train_data: EncodedDs, dev_data: EncodedDs, args: Optional[dict] = None) -> None: # TODO: reimplement this with automatic novel-row differential self.hyperparam_search = False - self.fit(dev_data, train_data) + self.fit(dev_data, train_data) # TODO: add support for passing args (e.g. n_epochs) self.prepared = True def __call__(self, ds: Union[EncodedDs, ConcatedEncodedDs],