NVIDIA
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.dockerignore‎
Lines changed: 4 additions & 0 deletions b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.dockerignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/Dockerfile‎
Lines changed: 26 additions & 7 deletions b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/Dockerfile‎
Lines changed: 26 additions & 7 deletions
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/NOTICE‎
Lines changed: 1 addition & 1 deletion b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/NOTICE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/README.md‎
Lines changed: 429 additions & 108 deletions b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/README.md‎
Lines changed: 429 additions & 108 deletions
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/callbacks.py‎
Lines changed: 2 additions & 1 deletion b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/callbacks.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/ctl_callbacks.py‎
Lines changed: 2 additions & 1 deletion b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/ctl_callbacks.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/hydra_callbacks.py‎
Lines changed: 16 additions & 3 deletions b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/hydra_callbacks.py‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/conf_utils.py‎
Lines changed: 7 additions & 3 deletions b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/conf_utils.py‎
Lines changed: 7 additions & 3 deletions
@@ -6,3 +6,7 @@
 .gitignore
 Dockerfile
 .dockerignore
+/outputs/
+/datasets/
+/multirun/
+/notebooks/
@@ -3,3 +3,5 @@ __pycache__
 /outputs/
 *.zip
 /datasets/*/
+/datasets/
+/notebooks/
@@ -1,5 +1,19 @@
+# Copyright 2021-2024 NVIDIA CORPORATION
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 #SPDX-License-Identifier: Apache-2.0
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:22.04-py3
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:22.12-py3
 
 FROM ${FROM_IMAGE_NAME}
 
@@ -30,26 +44,31 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/*
 
 
-# Install perf_client required library
 RUN apt-get update && \
     apt-get install -y libb64-dev libb64-0d curl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# Set workdir and python path
 WORKDIR /workspace
 ENV PYTHONPATH /workspace
 
+RUN rm /usr/lib/libxgboost.so
+
 ADD requirements.txt /workspace/requirements.txt
 ADD triton/requirements.txt /workspace/triton/requirements.txt
 RUN pip install -r /workspace/requirements.txt
 RUN pip install -r /workspace/triton/requirements.txt
 RUN pip install nvidia-pyindex
 RUN pip install git+https://github.com/NVIDIA/dllogger#egg=dllogger
-RUN pip install --no-cache-dir -r requirements.txt -f https://data.dgl.ai/wheels/repo.html
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install dgl==1.0.1 -f https://data.dgl.ai/wheels/cu117/repo.html
 
-# Add model files to workspace
-ADD . /workspace
+ADD ./hydra_plugins /workspace/hydra_plugins
+RUN pip install /workspace/hydra_plugins/hydra_optuna_sweeper/
+RUN pip install /workspace/hydra_plugins/hydra_joblib_launcher/
+RUN pip install /workspace/hydra_plugins/hydra_multiprocessing_launcher/
+RUN pip install /workspace/hydra_plugins/hydra_torchrun_launcher/
+RUN cp /workspace/hydra_plugins/optuna_sweeper.py /usr/local/lib/python3.8/dist-packages/hydra/plugins/sweeper.py
 
-RUN pip install -e distributed_launcher
+ADD . /workspace
 RUN rm -rf examples docker-examples tutorials
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2021-2022 NVIDIA Corporation
+   Copyright [yyyy] [name of copyright owner]
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -190,7 +190,7 @@ This repository contains code from https://github.com/rwightman/pytorch-image-mo
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2021-2022 NVIDIA Corporation
+   Copyright [yyyy] [name of copyright owner]
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# SPDX-License-Identifier: Apache-2.0
 class Callback(object):
     """
     Base class for building new callbacks.
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# SPDX-License-Identifier: Apache-2.0
 import time
 
 import dllogger
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,26 +13,27 @@
 # limitations under the License.
 
 import os
+import mlflow
 import pandas as pd
 
 from omegaconf import OmegaConf
 from hydra.experimental.callback import Callback
 
 from loggers.log_helper import jsonlog_2_df
+from mlflow.entities import Metric, Param
 
 class MergeLogs(Callback):
     def on_multirun_end(self, config, **kwargs):
         OmegaConf.resolve(config)
 
-        ALLOWED_KEYS=['timestamp', 'elapsed_time', 'step', 'loss', 'val_loss', 'MAE', 'MSE', 'RMSE', 'P50', 'P90']
+        ALLOWED_KEYS=['timestamp', 'elapsed_time', 'step', 'loss', 'val_loss', 'MAE', 'MSE', 'RMSE', 'P50', 'P90', 'SMAPE', 'TDI']
 
         dfs = []
         for p, sub_dirs, files in os.walk(config.hydra.sweep.dir):
             if 'log.json' in files:
                 path = os.path.join(p, 'log.json')
                 df = jsonlog_2_df(path, ALLOWED_KEYS)
                 dfs.append(df)
-
         # Transpose dataframes
         plots = {}
         for c in dfs[0].columns:
@@ -49,3 +50,15 @@ def on_multirun_end(self, config, **kwargs):
         timestamps = (timestamps * 1000).astype(int)
         if not timestamps.is_monotonic:
             raise ValueError('Timestamps are not monotonic')
+
+        metrics = [Metric('_'.join((k,name)), v, timestamp, step)
+                for k, df in plots.items()
+                for timestamp, (step, series) in zip(timestamps, df.iterrows())
+                for name, v in series.items()
+                ]
+        client = mlflow.tracking.MlflowClient(tracking_uri=config.trainer.config.mlflow_store)
+        exp = client.get_experiment_by_name(config.trainer.config.get('experiment_name', ''))
+        run = client.create_run(exp.experiment_id if exp else '0')
+        for i in range(0, len(metrics), 1000):
+            client.log_batch(run.info.run_id, metrics=metrics[i:i+1000])
+        client.set_terminated(run.info.run_id)
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,10 @@
 
 from omegaconf import OmegaConf
 from data.data_utils import InputTypes, DataTypes, FeatureSpec
+import functools
+from hydra.utils import get_method
 
-OmegaConf.register_new_resolver("and", lambda x, y: x and y, use_cache=True)
+OmegaConf.register_new_resolver("and", lambda x, y: bool(x and y), use_cache=True)
 OmegaConf.register_new_resolver("feature.selector",
         lambda x,feat_type,embed_type:
             OmegaConf.create([elem for elem in x if elem.feature_type == feat_type and elem.feature_embed_type == embed_type])
@@ -27,10 +29,12 @@
 OmegaConf.register_new_resolver("cmp", lambda x, y: x == y)
 OmegaConf.register_new_resolver("cont.lower", lambda x, y: y.lower() in x.lower())
 
-# XXX I don't know whether it is the best idea to allow user to sum over nested structure without checks
 def sum_nested(*args):
     if len(args) == 1 and isinstance(args[0], (int, float)):
         return args[0]
     return sum(arg if isinstance(arg, (int, float)) else sum_nested(*arg) for arg in args)
 
 OmegaConf.register_new_resolver("sum", sum_nested)
+
+def partial(func, *args, **kwargs):
+    return functools.partial(get_method(func), *args, **kwargs)