Implementation of NormalizedReluBounding for bounding with non-zero t…

…hresholds (#64) * Implementation of NormalizedReluBounding to allow a min threshold vallue different from zero. This version of the Relu bounding works for all the types of normalization. * Tests have been implemented and example configurations are added to the anemoi-training configs, but commented. * CHANGELOG of models have been updated. --------- Co-authored-by: sahahner <[email protected]> Reviewed-by: @jakob-schloer Comments-from: @gabrieloks
ecmwf · Jan 21, 2025 · 600f01e · 600f01e
1 parent db587fe
commit 600f01e
Show file tree

Hide file tree

Showing 8 changed files with 203 additions and 3 deletions.
diff --git a/models/CHANGELOG.md b/models/CHANGELOG.md
@@ -18,6 +18,7 @@ Keep it human-readable, your future self will thank you!
 - Reduced memory usage when using chunking in the mapper [#84](https://github.com/ecmwf/anemoi-models/pull/84)
 - Added `supporting_arrays` argument, which contains arrays to store in checkpoints. [#97](https://github.com/ecmwf/anemoi-models/pull/97)
 - Add remappers, e.g. link functions to apply during training to facilitate learning of variables with a difficult distribution [#88](https://github.com/ecmwf/anemoi-models/pull/88)
+- Add Normalized Relu Bounding for minimum bounding thresholds different than 0 [#64](https://github.com/ecmwf/anemoi-core/pull/64)
 - 'predict\_step' can take an optional model comm group. [#77](https://github.com/ecmwf/anemoi-core/pull/77)
 
 ## [0.4.0](https://github.com/ecmwf/anemoi-models/compare/0.3.0...0.4.0) - Improvements to Model Design

diff --git a/models/src/anemoi/models/interface/__init__.py b/models/src/anemoi/models/interface/__init__.py
@@ -89,6 +89,7 @@ def _build_model(self) -> None:
             self.config.model.model,
             model_config=self.config,
             data_indices=self.data_indices,
+            statistics=self.statistics,
             graph_data=self.graph_data,
             _recursive_=False,  # Disables recursive instantiation by Hydra
         )

diff --git a/models/src/anemoi/models/layers/bounding.py b/models/src/anemoi/models/layers/bounding.py
@@ -11,6 +11,7 @@
 
 from abc import ABC
 from abc import abstractmethod
+from typing import Optional
 
 import torch
 from torch import nn
@@ -30,12 +31,28 @@ def __init__(
         *,
         variables: list[str],
         name_to_index: dict,
+        statistics: Optional[dict] = None,
+        name_to_index_stats: Optional[dict] = None,
     ) -> None:
+        """Initializes the bounding strategy.
+        Parameters
+        ----------
+        variables : list[str]
+            A list of strings representing the variables that will be bounded.
+        name_to_index : dict
+            A dictionary mapping the variable names to their corresponding indices.
+        statistics : dict, optional
+            A dictionary containing the statistics of the variables.
+        name_to_index_stats : dict, optional
+            A dictionary mapping the variable names to their corresponding indices in the statistics dictionary
+        """
         super().__init__()
 
         self.name_to_index = name_to_index
         self.variables = variables
         self.data_index = self._create_index(variables=self.variables)
+        self.statistics = statistics
+        self.name_to_index_stats = name_to_index_stats
 
     def _create_index(self, variables: list[str]) -> InputTensorIndex:
         return InputTensorIndex(includes=variables, excludes=[], name_to_index=self.name_to_index)._only
@@ -65,6 +82,97 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return x
 
 
+class NormalizedReluBounding(BaseBounding):
+    """Bounding variable with a ReLU activation and customizable normalized thresholds."""
+
+    def __init__(
+        self,
+        *,
+        variables: list[str],
+        name_to_index: dict,
+        min_val: list[float],
+        normalizer: list[str],
+        statistics: dict,
+        name_to_index_stats: dict,
+    ) -> None:
+        """Initializes the NormalizedReluBounding with the specified parameters.
+
+        Parameters
+        ----------
+        variables : list[str]
+            A list of strings representing the variables that will be bounded.
+        name_to_index : dict
+            A dictionary mapping the variable names to their corresponding indices.
+        statistics : dict
+            A dictionary containing the statistics of the variables (mean, std, min, max, etc.).
+        min_val : list[float]
+            The minimum values for the ReLU activation. It should be given in the same order as the variables.
+        normalizer : list[str]
+            A list of normalization types to apply, one per variable. Options: 'mean-std', 'min-max', 'max', 'std'.
+        name_to_index_stats : dict
+            A dictionary mapping the variable names to their corresponding indices in the statistics dictionary.
+        """
+        super().__init__(
+            variables=variables,
+            name_to_index=name_to_index,
+            statistics=statistics,
+            name_to_index_stats=name_to_index_stats,
+        )
+        self.min_val = min_val
+        self.normalizer = normalizer
+
+        # Validate normalizer input
+        if not all(norm in {"mean-std", "min-max", "max", "std"} for norm in self.normalizer):
+            raise ValueError(
+                "Each normalizer must be one of: 'mean-std', 'min-max', 'max', 'std' in NormalizedReluBounding."
+            )
+        if len(self.normalizer) != len(variables):
+            raise ValueError(
+                "The length of the normalizer list must match the number of variables in NormalizedReluBounding."
+            )
+        if len(self.min_val) != len(variables):
+            raise ValueError(
+                "The length of the min_val list must match the number of variables in NormalizedReluBounding."
+            )
+
+        self.norm_min_val = torch.zeros(len(variables))
+        for ii, variable in enumerate(variables):
+            stat_index = self.name_to_index_stats[variable]
+            if self.normalizer[ii] == "mean-std":
+                mean = self.statistics["mean"][stat_index]
+                std = self.statistics["stdev"][stat_index]
+                self.norm_min_val[ii] = (min_val[ii] - mean) / std
+            elif self.normalizer[ii] == "min-max":
+                min_stat = self.statistics["min"][stat_index]
+                max_stat = self.statistics["max"][stat_index]
+                self.norm_min_val[ii] = (min_val[ii] - min_stat) / (max_stat - min_stat)
+            elif self.normalizer[ii] == "max":
+                max_stat = self.statistics["max"][stat_index]
+                self.norm_min_val[ii] = min_val[ii] / max_stat
+            elif self.normalizer[ii] == "std":
+                std = self.statistics["stdev"][stat_index]
+                self.norm_min_val[ii] = min_val[ii] / std
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Applies the ReLU activation with the normalized minimum values to the input tensor.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            The input tensor to process.
+
+        Returns
+        -------
+        torch.Tensor
+            The processed tensor with bounding applied.
+        """
+        self.norm_min_val = self.norm_min_val.to(x.device)
+        x[..., self.data_index] = (
+            torch.nn.functional.relu(x[..., self.data_index] - self.norm_min_val) + self.norm_min_val
+        )
+        return x
+
+
 class HardtanhBounding(BaseBounding):
     """Initializes the bounding with specified minimum and maximum values for bounding.
 
@@ -80,7 +188,16 @@ class HardtanhBounding(BaseBounding):
         The maximum value for the HardTanh activation.
     """
 
-    def __init__(self, *, variables: list[str], name_to_index: dict, min_val: float, max_val: float) -> None:
+    def __init__(
+        self,
+        *,
+        variables: list[str],
+        name_to_index: dict,
+        min_val: float,
+        max_val: float,
+        statistics: Optional[dict] = None,
+        name_to_index_stats: Optional[dict] = None,
+    ) -> None:
         super().__init__(variables=variables, name_to_index=name_to_index)
         self.min_val = min_val
         self.max_val = max_val
@@ -111,7 +228,15 @@ class FractionBounding(HardtanhBounding):
     """
 
     def __init__(
-        self, *, variables: list[str], name_to_index: dict, min_val: float, max_val: float, total_var: str
+        self,
+        *,
+        variables: list[str],
+        name_to_index: dict,
+        min_val: float,
+        max_val: float,
+        total_var: str,
+        statistics: Optional[dict] = None,
+        name_to_index_stats: Optional[dict] = None,
     ) -> None:
         super().__init__(variables=variables, name_to_index=name_to_index, min_val=min_val, max_val=max_val)
         self.total_variable = self._create_index(variables=[total_var])

diff --git a/models/src/anemoi/models/models/encoder_processor_decoder.py b/models/src/anemoi/models/models/encoder_processor_decoder.py
@@ -35,6 +35,7 @@ def __init__(
         *,
         model_config: DotDict,
         data_indices: dict,
+        statistics: dict,
         graph_data: HeteroData,
     ) -> None:
         """Initializes the graph neural network.
@@ -57,6 +58,7 @@ def __init__(
         self._calculate_shapes_and_indices(data_indices)
         self._assert_matching_indices(data_indices)
         self.data_indices = data_indices
+        self.statistics = statistics
 
         self.multi_step = model_config.training.multistep_input
         self.num_channels = model_config.model.num_channels
@@ -100,7 +102,12 @@ def __init__(
         # Instantiation of model output bounding functions (e.g., to ensure outputs like TP are positive definite)
         self.boundings = nn.ModuleList(
             [
-                instantiate(cfg, name_to_index=self.data_indices.internal_model.output.name_to_index)
+                instantiate(
+                    cfg,
+                    name_to_index=self.data_indices.internal_model.output.name_to_index,
+                    statistics=self.statistics,
+                    name_to_index_stats=self.data_indices.data.input.name_to_index,
+                )
                 for cfg in getattr(model_config.model, "bounding", [])
             ]
         )

diff --git a/models/tests/layers/test_bounding.py b/models/tests/layers/test_bounding.py
@@ -7,12 +7,14 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
+import numpy as np
 import pytest
 import torch
 from hydra.utils import instantiate
 
 from anemoi.models.layers.bounding import FractionBounding
 from anemoi.models.layers.bounding import HardtanhBounding
+from anemoi.models.layers.bounding import NormalizedReluBounding
 from anemoi.models.layers.bounding import ReluBounding
 from anemoi.utils.config import DotDict
 
@@ -27,18 +29,49 @@ def name_to_index():
     return {"var1": 0, "var2": 1, "total_var": 2}
 
 
+@pytest.fixture
+def name_to_index_stats():
+    return {"var1": 0, "var2": 1, "total_var": 2}
+
+
 @pytest.fixture
 def input_tensor():
     return torch.tensor([[-1.0, 2.0, 3.0], [4.0, -5.0, 6.0], [0.5, 0.5, 0.5]])
 
 
+@pytest.fixture
+def statistics():
+    statistics = {
+        "mean": np.array([1.0, 2.0, 3.0]),
+        "stdev": np.array([0.5, 0.5, 0.5]),
+        "min": np.array([1.0, 1.0, 1.0]),
+        "max": np.array([11.0, 10.0, 10.0]),
+    }
+    return statistics
+
+
 def test_relu_bounding(config, name_to_index, input_tensor):
     bounding = ReluBounding(variables=config.variables, name_to_index=name_to_index)
     output = bounding(input_tensor.clone())
     expected_output = torch.tensor([[0.0, 2.0, 3.0], [4.0, 0.0, 6.0], [0.5, 0.5, 0.5]])
     assert torch.equal(output, expected_output)
 
 
+def test_normalized_relu_bounding(config, name_to_index, name_to_index_stats, input_tensor, statistics):
+    bounding = NormalizedReluBounding(
+        variables=config.variables,
+        name_to_index=name_to_index,
+        min_val=[2.0, 2.0],
+        normalizer=["mean-std", "min-max"],
+        statistics=statistics,
+        name_to_index_stats=name_to_index_stats,
+    )
+    output = bounding(input_tensor.clone())
+    # breakpoint()
+    expected_output = torch.tensor([[2.0, 2.0, 3.0], [4.0, 0.1111, 6.0], [2.0, 0.5, 0.5]])
+    assert torch.allclose(output, expected_output, atol=1e-4)
+
+
 def test_hardtanh_bounding(config, name_to_index, input_tensor):
     minimum, maximum = -1.0, 1.0
     bounding = HardtanhBounding(

diff --git a/training/src/anemoi/training/config/model/gnn.yaml b/training/src/anemoi/training/config/model/gnn.yaml
@@ -67,3 +67,14 @@ bounding: #These are applied in order
   #   min_val: 0
   #   max_val: 1
   #   total_var: tp
+
+  # [OPTIONAL] NormalizedReluBounding
+  # This is an extension of the Relu bounding in case the thrshold to be used
+  # is not 0. For example, in case of the sea surface temperature we don't use
+  # [0, infinity), buth rather [-2C, infinity). We do not want the water
+  # temperature to be below the freezing temperature.
+
+  # - _target_: anemoi.models.layers.bounding.NormalizedReluBounding
+  #   variables: [sst]
+  #   min_val: [-2]
+  #   normalizer: ['mean-std']
diff --git a/training/src/anemoi/training/config/model/graphtransformer.yaml b/training/src/anemoi/training/config/model/graphtransformer.yaml
@@ -72,3 +72,14 @@ bounding: #These are applied in order
   #   min_val: 0
   #   max_val: 1
   #   total_var: tp
+
+  # [OPTIONAL] NormalizedReluBounding
+  # This is an extension of the Relu bounding in case the thrshold to be used
+  # is not 0. For example, in case of the sea surface temperature we don't use
+  # [0, infinity), buth rather [-2C, infinity). We do not want the water
+  # temperature to be below the freezing temperature.
+
+  # - _target_: anemoi.models.layers.bounding.NormalizedReluBounding
+  #   variables: [sst]
+  #   min_val: [-2]
+  #   normalizer: ['mean-std']
diff --git a/training/src/anemoi/training/config/model/transformer.yaml b/training/src/anemoi/training/config/model/transformer.yaml
@@ -71,3 +71,14 @@ bounding: #These are applied in order
   #   min_val: 0
   #   max_val: 1
   #   total_var: tp
+
+  # [OPTIONAL] NormalizedReluBounding
+  # This is an extension of the Relu bounding in case the thrshold to be used
+  # is not 0. For example, in case of the sea surface temperature we don't use
+  # [0, infinity), buth rather [-2C, infinity). We do not want the water
+  # temperature to be below the freezing temperature.
+
+  # - _target_: anemoi.models.layers.bounding.NormalizedReluBounding
+  #   variables: [sst]
+  #   min_val: [-2]
+  #   normalizer: ['mean-std']