Skip to content

Commit da1a47b

Browse files
authored
Merge branch 'main' into ODSC-55290/update_job_byoc_api
2 parents fa95dc4 + 18b69bc commit da1a47b

File tree

6 files changed

+41
-17
lines changed

6 files changed

+41
-17
lines changed

ads/opctl/operator/lowcode/common/errors.py

+6
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,9 @@ def __init__(self, error: str):
3939
"complies with the required schema for the operator. \n"
4040
f"{error}"
4141
)
42+
43+
44+
class InsufficientDataError(Exception):
45+
def __init__(self, message: str):
46+
self.message = message
47+
super().__init__(message)

ads/opctl/operator/lowcode/forecast/model/base_model.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -249,20 +249,28 @@ def generate_report(self):
249249
train_metrics_sections = [sec9_text, sec9]
250250

251251
backtest_sections = []
252+
output_dir = self.spec.output_directory.url
253+
backtest_report_name = "backtest_stats.csv"
254+
file_path = f"{output_dir}/{backtest_report_name}"
252255
if self.spec.model == AUTO_SELECT:
253-
output_dir = self.spec.output_directory.url
254-
backtest_report_name = "backtest_stats.csv"
255-
backtest_stats = pd.read_csv(f"{output_dir}/{backtest_report_name}")
256-
average_dict = backtest_stats.mean().to_dict()
257-
del average_dict['backtest']
258-
best_model = min(average_dict, key=average_dict.get)
259-
backtest_text = rc.Heading("Back Testing Metrics", level=2)
260-
summary_text = rc.Text(
261-
f"Overall, the average scores for the models are {average_dict}, with {best_model}"
262-
f" being identified as the top-performing model during backtesting.")
263-
backtest_table = rc.DataTable(backtest_stats, index=True)
264-
liner_plot = get_auto_select_plot(backtest_stats)
265-
backtest_sections = [backtest_text, backtest_table, summary_text, liner_plot]
256+
backtest_sections.append(rc.Heading("Auto-select statistics", level=2))
257+
if not os.path.exists(file_path):
258+
failure_msg = rc.Text("auto-select could not be executed. Please check the "
259+
"logs for more details.")
260+
backtest_sections.append(failure_msg)
261+
else:
262+
backtest_stats = pd.read_csv(file_path)
263+
average_dict = backtest_stats.mean().to_dict()
264+
del average_dict['backtest']
265+
best_model = min(average_dict, key=average_dict.get)
266+
backtest_text = rc.Heading("Back Testing Metrics", level=3)
267+
summary_text = rc.Text(
268+
f"Overall, the average scores for the models are {average_dict}, with {best_model}"
269+
f" being identified as the top-performing model during backtesting.")
270+
backtest_table = rc.DataTable(backtest_stats, index=True)
271+
liner_plot = get_auto_select_plot(backtest_stats)
272+
backtest_sections.extend([backtest_text, backtest_table, summary_text,
273+
liner_plot])
266274

267275

268276
forecast_plots = []

ads/opctl/operator/lowcode/forecast/model_evaluator.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from ads.opctl.operator.lowcode.common.const import DataColumns
1313
from .model.forecast_datasets import ForecastDatasets
1414
from .operator_config import ForecastOperatorConfig
15-
15+
from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
16+
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
1617

1718
class ModelEvaluator:
1819
"""
@@ -61,6 +62,9 @@ def generate_k_fold_data(self, datasets: ForecastDatasets, operator_config: Fore
6162
unique_dates = min_series_data[date_col].unique()
6263

6364
cut_offs = self.generate_cutoffs(unique_dates, horizon)
65+
if not len(cut_offs):
66+
raise InsufficientDataError("Insufficient data to evaluate multiple models. Please specify a model "
67+
"instead of using auto-select.")
6468
training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
6569
in cut_offs]
6670
test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
@@ -137,7 +141,12 @@ def run_all_models(self, datasets: ForecastDatasets, operator_config: ForecastOp
137141
return metrics
138142

139143
def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
140-
metrics = self.run_all_models(datasets, operator_config)
144+
try:
145+
metrics = self.run_all_models(datasets, operator_config)
146+
except InsufficientDataError as e:
147+
model = SupportedModels.Prophet
148+
logger.error(f"Running {model} model as auto-select failed with the following error: {e.message}")
149+
return model
141150
avg_backtests_metrics = {key: sum(value.values()) / len(value.values()) for key, value in metrics.items()}
142151
best_model = min(avg_backtests_metrics, key=avg_backtests_metrics.get)
143152
logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")

docs/source/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Oracle Accelerated Data Science (ADS)
4545
user_guide/operators/forecasting_operator/index
4646
user_guide/operators/anomaly_detection_operator/index
4747
user_guide/operators/pii_operator/index
48+
user_guide/operators/recommender_operator/index
4849

4950
.. toctree::
5051
:hidden:

docs/source/user_guide/operators/recommender/index.rst renamed to docs/source/user_guide/operators/recommender_operator/index.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
===
1+
===========
22
Recommender
3-
===
3+
===========
44

55
The Recommender Operator utilizes advanced algorithms to provide personalized recommendations based on user behavior and preferences. This operator streamlines the data science workflow by automating the process of selecting the best recommendation algorithms, tuning hyperparameters, and extracting relevant features, ensuring that users receive the most relevant and effective suggestions for their needs.
66

0 commit comments

Comments
 (0)