Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor variable scaling, pressure level scalings only applied in specific circumstances #52

Draft
wants to merge 12 commits into
base: develop
Choose a base branch
from
42 changes: 22 additions & 20 deletions training/src/anemoi/training/config/training/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ training_loss:
# Available scalars include:
# - 'variable': See `variable_loss_scaling` for more information
# - 'loss_weights_mask': Giving imputed NaNs a zero weight in the loss function
scalars: ['variable', 'loss_weights_mask']
scalars: ['variable', 'variable_pressure_level', 'loss_weights_mask']

ignore_nans: False

Expand Down Expand Up @@ -109,33 +109,35 @@ lr:
# Variable loss scaling
# 'variable' must be included in `scalars` in the losses for this to be applied.
variable_loss_scaling:
variable_groups:
default: sfc
pl: [q, t, u, v, w, z]
default: 1
pl:
q: 0.6 #1
t: 6 #1
u: 0.8 #0.5
v: 0.5 #0.33
w: 0.001
z: 12 #1
sfc:
sp: 10
10u: 0.1
10v: 0.1
2d: 0.5
tp: 0.025
cp: 0.0025
q: 0.6 #1
t: 6 #1
u: 0.8 #0.5
v: 0.5 #0.33
w: 0.001
z: 12 #1
sp: 10
10u: 0.1
10v: 0.1
2d: 0.5
tp: 0.025
cp: 0.0025
# 'variable_pressure_level' must be included in `scalars` in the losses for this to be applied.
pressure_level_scaler:
_target_: anemoi.training.train.scaling.ReluVariableLevelScaler
group: pl
y_intercept: 0.2
slope: 0.001

metrics:
- z_500
- t_850
- u_850
- v_850

pressure_level_scaler:
_target_: anemoi.training.data.scaling.ReluPressureLevelScaler
minimum: 0.2
slope: 0.001

node_loss_weights:
_target_: anemoi.training.losses.nodeweights.GraphNodeAttribute
target_nodes: ${graph.data}
Expand Down
79 changes: 0 additions & 79 deletions training/src/anemoi/training/data/scaling.py

This file was deleted.

55 changes: 14 additions & 41 deletions training/src/anemoi/training/train/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from typing import Optional
from typing import Union

import numpy as np
import pytorch_lightning as pl
import torch
from hydra.utils import instantiate
Expand All @@ -31,6 +30,7 @@
from anemoi.models.interface import AnemoiModelInterface
from anemoi.training.losses.utils import grad_scaler
from anemoi.training.losses.weightedloss import BaseWeightedLoss
from anemoi.training.train.scaling import GeneralVariableLossScaler
from anemoi.training.utils.jsonify import map_config_to_primitives
from anemoi.training.utils.masks import Boolean1DMask
from anemoi.training.utils.masks import NoOutputMask
Expand Down Expand Up @@ -98,7 +98,18 @@ def __init__(

self.logger_enabled = config.diagnostics.log.wandb.enabled or config.diagnostics.log.mlflow.enabled

variable_scaling = self.get_variable_scaling(config, data_indices)
variable_scaling = GeneralVariableLossScaler(
config.training.variable_loss_scaling,
data_indices,
).get_variable_scaling()

# Instantiate the pressure level scaling class with the training configuration
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if this is possible but I wonder if here we could instantiate from a list of scalars rather than specific ones? I think this is how it is done for the validation metrics

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this would be useful, as we want to allow more scalar methods for model levels/tendency/etc.
I will have a look.

pressurelevelscaler = instantiate(
config.training.pressure_level_scaler,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this config location is wrong? It should be training.variable_loss_scaling.pressure_level_scalar

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, in the config file, the pressure_level_scalar should be defined directly in training as before.

scaling_config=config.training.variable_loss_scaling,
data_indices=data_indices,
)
pressure_level_scaling = pressurelevelscaler.get_variable_scaling()

self.internal_metric_ranges, self.val_metric_ranges = self.get_val_metric_ranges(config, data_indices)

Expand All @@ -117,6 +128,7 @@ def __init__(
# Filled after first application of preprocessor. dimension=[-2, -1] (latlon, n_outputs).
self.scalars = {
"variable": (-1, variable_scaling),
"variable_pressure_level": (-1, pressure_level_scaling),
"loss_weights_mask": ((-2, -1), torch.ones((1, 1))),
"limited_area_mask": (2, limited_area_mask),
}
Expand Down Expand Up @@ -299,45 +311,6 @@ def get_val_metric_ranges(config: DictConfig, data_indices: IndexCollection) ->

return metric_ranges, metric_ranges_validation

@staticmethod
def get_variable_scaling(
config: DictConfig,
data_indices: IndexCollection,
) -> torch.Tensor:
variable_loss_scaling = (
np.ones((len(data_indices.internal_data.output.full),), dtype=np.float32)
* config.training.variable_loss_scaling.default
)
pressure_level = instantiate(config.training.pressure_level_scaler)

LOGGER.info(
"Pressure level scaling: use scaler %s with slope %.4f and minimum %.2f",
type(pressure_level).__name__,
pressure_level.slope,
pressure_level.minimum,
)

for key, idx in data_indices.internal_model.output.name_to_index.items():
split = key.split("_")
if len(split) > 1 and split[-1].isdigit():
# Apply pressure level scaling
if split[0] in config.training.variable_loss_scaling.pl:
variable_loss_scaling[idx] = config.training.variable_loss_scaling.pl[
split[0]
] * pressure_level.scaler(
int(split[-1]),
)
else:
LOGGER.debug("Parameter %s was not scaled.", key)
else:
# Apply surface variable scaling
if key in config.training.variable_loss_scaling.sfc:
variable_loss_scaling[idx] = config.training.variable_loss_scaling.sfc[key]
else:
LOGGER.debug("Parameter %s was not scaled.", key)

return torch.from_numpy(variable_loss_scaling)

@staticmethod
def get_node_weights(config: DictConfig, graph_data: HeteroData) -> torch.Tensor:
node_weighting = instantiate(config.training.node_loss_weights)
Expand Down
Loading
Loading