diff --git a/ads/opctl/operator/lowcode/common/transformations.py b/ads/opctl/operator/lowcode/common/transformations.py index b5367fe19..ccd168b08 100644 --- a/ads/opctl/operator/lowcode/common/transformations.py +++ b/ads/opctl/operator/lowcode/common/transformations.py @@ -329,6 +329,8 @@ def build_fforms_meta_features(self, data, target_col=None, group_cols=None): if target_col not in data.columns: raise ValueError(f"Target column '{target_col}' not found in DataFrame") + data[target_col] = data[target_col].fillna(0) + # Check if group_cols are provided and valid if group_cols is not None: if not isinstance(group_cols, list): diff --git a/ads/opctl/operator/lowcode/forecast/__main__.py b/ads/opctl/operator/lowcode/forecast/__main__.py index 57809886d..8bcf21c3c 100644 --- a/ads/opctl/operator/lowcode/forecast/__main__.py +++ b/ads/opctl/operator/lowcode/forecast/__main__.py @@ -9,14 +9,13 @@ import sys from typing import Dict, List -import pandas as pd import yaml from ads.opctl import logger from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS from ads.opctl.operator.common.utils import _parse_input_args -from .const import AUTO_SELECT_SERIES +from .const import AUTO_SELECT, AUTO_SELECT_SERIES from .model.forecast_datasets import ForecastDatasets, ForecastResults from .operator_config import ForecastOperatorConfig from .whatifserve import ModelDeploymentManager @@ -29,8 +28,10 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: datasets = ForecastDatasets(operator_config) model = ForecastOperatorModelFactory.get_model(operator_config, datasets) - if operator_config.spec.model == AUTO_SELECT_SERIES and hasattr( - operator_config.spec, "meta_features" + if ( + operator_config.spec.model == AUTO_SELECT_SERIES + and hasattr(operator_config.spec, "meta_features") + and operator_config.spec.target_category_columns ): # For AUTO_SELECT_SERIES, handle each series with its specific model meta_features = operator_config.spec.meta_features @@ -64,8 +65,6 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: ) sub_results_list.append(sub_results) - # results_df = pd.concat([results_df, sub_result_df], ignore_index=True, axis=0) - # elapsed_time += sub_elapsed_time # Merge all sub_results into a single ForecastResults object if sub_results_list: results = sub_results_list[0] @@ -75,6 +74,20 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: results = None else: + # When AUTO_SELECT_SERIES is specified but target_category_columns is not, + # we fall back to AUTO_SELECT behavior. + if ( + operator_config.spec.model == AUTO_SELECT_SERIES + and not operator_config.spec.target_category_columns + ): + + logger.warning( + "AUTO_SELECT_SERIES cannot be run with a single-series dataset or when " + "'target_category_columns' is not provided. Falling back to AUTO_SELECT." + ) + + operator_config.spec.model = AUTO_SELECT + model = ForecastOperatorModelFactory.get_model(operator_config, datasets) # For other cases, use the single selected model results = model.generate_report() # saving to model catalog diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index 8460bbea7..d4e129f49 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -12,6 +12,7 @@ import pandas as pd import pytest import yaml +import numpy as np from ads.opctl.operator.cmd import run from ads.opctl.operator.lowcode.forecast.__main__ import operate as forecast_operate @@ -413,5 +414,44 @@ def run_operator( # generate_train_metrics = True +def test_missing_data_autoselect_series(): + """Test case for auto-select-series with missing data.""" + data = { + "Date": pd.to_datetime( + [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06", + "2023-01-07", + "2023-01-08", + "2023-01-09", + "2023-01-10", + ] + ), + "Y": [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10], + "Category": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + } + df = pd.DataFrame(data) + + with tempfile.TemporaryDirectory() as tmpdirname: + output_data_path = f"{tmpdirname}/results" + yaml_i = deepcopy(TEMPLATE_YAML) + yaml_i["spec"]["model"] = "auto-select-series" + yaml_i["spec"]["historical_data"].pop("url") + yaml_i["spec"]["historical_data"]["data"] = df + yaml_i["spec"]["target_column"] = "Y" + yaml_i["spec"]["datetime_column"]["name"] = "Date" + yaml_i["spec"]["target_category_columns"] = ["Category"] + yaml_i["spec"]["horizon"] = 2 + yaml_i["spec"]["output_directory"]["url"] = output_data_path + + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + forecast_operate(operator_config) + check_output_for_errors(output_data_path) + + if __name__ == "__main__": pass