Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@
.vscode
outputs
lightning_logs
logs
.DS_Store

# softlinks
evalstore
modelstore
data
wandblogs

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand All @@ -12,6 +19,9 @@ __pycache__/
# C extensions
*.so

# era5 quantiles
era5-quantiles-*.nc

# Distribution / packaging
.Python
build/
Expand Down
1 change: 1 addition & 0 deletions geoarches/configs/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defaults:
- cluster: local # Tells hydra to use cluster/local.yaml when composing the cfg object.
- dataloader: era5
- module: archesweather
- stats: pangu # Normalization scheme (pangu or graphcast or None)
- override hydra/job_logging: none
- override hydra/hydra_logging: none
- _self_
Expand Down
10 changes: 9 additions & 1 deletion geoarches/configs/dataloader/era5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@ dataset:
path: data/era5_240/full/
lead_time_hours: 24
multistep: ${oc.select:module.train.rollout_iterations,1}
norm_scheme: pangu
load_prev: True
variables:
surface: ${stats.module.variables.surface}
level: ${stats.module.variables.level}
dimension_indexers:
level:
- 'level'
- ${stats.module.levels}
warning_on_nan: True
interpolate_nans: True. # Remove nans in the model input (ie. SST)

validation_args:
multistep: ${oc.select:module.val.rollout_iterations,1}
Expand Down
1 change: 0 additions & 1 deletion geoarches/configs/dataloader/era5pred.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ dataset:
path: data/era5_240/full/
pred_path: data/outputs/deterministic/jzh-geoaw-m-seed0
lead_time_hours: 24 # mixed
norm_scheme: pangu
load_prev: True
load_hard_neg: False

Expand Down
3 changes: 3 additions & 0 deletions geoarches/configs/module/metrics/era5_brier.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
era5_brier_metric:
_target_: geoarches.metrics.brier_skill_score.Era5BrierSkillScore
surface_variables: ${stats.module.variables.surface}
level_variables: ${stats.module.variables.level}
pressure_levels: ${stats.module.levels}
lead_time_hours: ${dataloader.dataset.lead_time_hours}
3 changes: 3 additions & 0 deletions geoarches/configs/module/metrics/era5_deterministic.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
era5_deterministic_metrics:
_target_: geoarches.metrics.deterministic_metrics.Era5DeterministicMetrics
surface_variables: ${stats.module.variables.surface}
level_variables: ${stats.module.variables.level}
pressure_levels: ${stats.module.levels}
lead_time_hours: ${dataloader.dataset.lead_time_hours}
3 changes: 3 additions & 0 deletions geoarches/configs/module/metrics/era5_ensemble.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
era5_ensemble_metrics:
_target_: geoarches.metrics.ensemble_metrics.Era5EnsembleMetrics
surface_variables: ${stats.module.variables.surface}
level_variables: ${stats.module.variables.level}
pressure_levels: ${stats.module.levels}
lead_time_hours: ${dataloader.dataset.lead_time_hours}
27 changes: 27 additions & 0 deletions geoarches/configs/stats/graphcast.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module:
_target_: geoarches.utils.normalization.NormalizationStatistics
variables:
surface:
- 10m_u_component_of_wind
- 10m_v_component_of_wind
- 2m_temperature
- mean_sea_level_pressure
level:
- geopotential
- u_component_of_wind
- v_component_of_wind
- temperature
- specific_humidity
- vertical_velocity
loss_weight_per_variable:
surface: [0.1, 0.1, 1.0, 0.1]
level: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

levels: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
norm_scheme: graphcast

compute_loss_coeffs_args:
latitude: 121
pow: 2
use_weatherbench_lat_coeffs: true
loss_delta_normalization: true
26 changes: 26 additions & 0 deletions geoarches/configs/stats/pangu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module:
_target_: geoarches.utils.normalization.NormalizationStatistics
variables:
surface:
- 10m_u_component_of_wind
- 10m_v_component_of_wind
- 2m_temperature
- mean_sea_level_pressure
level:
- geopotential
- u_component_of_wind
- v_component_of_wind
- temperature
- specific_humidity
- vertical_velocity
loss_weight_per_variable:
surface: [0.1, 0.1, 1.0, 0.1]
level: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
levels: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
norm_scheme: pangu

compute_loss_coeffs_args:
latitude: 121
pow: 2
use_weatherbench_lat_coeffs: true
loss_delta_normalization: true
14 changes: 13 additions & 1 deletion geoarches/dataloaders/dcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ def replace_nans(tensordict, value=0):
)


default_dimension_indexers = {
"level": ("plev", pressure_levels),
"latitude": ("lat", slice(None)),
"longitude": ("lon", slice(None)),
"time": ("time", slice(None)),
}


class DCPPForecast(XarrayDataset):
"""
Load DCPP data for the forecast task.
Expand All @@ -54,6 +62,7 @@ def __init__(
limit_examples: int = 0,
mask_value=0,
variables=None,
dimension_indexers: dict = default_dimension_indexers,
):
"""
Args:
Expand All @@ -67,6 +76,9 @@ def __init__(
load_clim: Whether to load climatology.
limit_examples: Return set number of examples in dataset
mask_value: what value to use as mask for nan values in dataset
dimension_indexers: dict, dimension indexers for the dataset.
Default is set to pressure levels, latitude, longitude, and time.

"""
self.__dict__.update(locals()) # concise way to update self with input arguments

Expand All @@ -76,7 +88,7 @@ def __init__(
filename_filter = filename_filters[domain]
if variables is None:
variables = dict(surface=surface_variables, level=level_variables)
dimension_indexers = {"plev": pressure_levels}

super().__init__(
path,
filename_filter=filename_filter,
Expand Down
Loading
Loading