Skip to content

Commit a5c24b6

Browse files
committed
✅ Review Round 11 COMPLETE: Official CMIP7 File Naming
Implemented per DOI: 10.5281/zenodo.17250297 FILENAME FORMAT (CORRECT): tos_tavg-u-hxy-sea_mon_glb_gn_AWI-ESM3-VEG-LR_piControl_r1i1p1f1_135001-135012.nc Components: - variable_id: tos - branding_suffix: tavg-u-hxy-sea (time-avg, undefined-vert, horiz-xy, sea) - frequency: mon (NOT table_id!) - region: glb (global) - grid_label: gn/gr - source_id: AWI-ESM3-VEG-LR - experiment_id: piControl - variant_label: r1i1p1f1 - timeRange: 135001-135012 DIRECTORY STRUCTURE (CORRECT): MIP-DRS7/CMIP7/CMIP/AWI/AWI-ESM3-VEG-LR/piControl/r1i1p1f1/glb/mon/tos/tavg-u-hxy-sea/gn/v20260313/ Changes: 1. Config: Added CMIP7 params (activity_id, institution_id, region, branding_suffix) 2. files.py: Rewrote CMIP7 create_filepath() with correct component order 3. global_attributes.py: Rewrote CMIP7 subdir_path() with MIP-DRS7 structure 4. Enabled output_subdirs for proper directory nesting Note: gn and gr must be processed separately (pycmor limitation)
1 parent 9df37f2 commit a5c24b6

File tree

3 files changed

+67
-9
lines changed

3 files changed

+67
-9
lines changed

cmorize_sst.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pycmor:
1515
enable_dask: False
1616
xarray_open_mfdataset_parallel: False
1717
pipeline_workflow_orchestrator: "native"
18+
enable_output_subdirs: True
1819

1920
pipelines:
2021
- name: default
@@ -52,6 +53,11 @@ rules:
5253
model_component: ocean
5354
grid_label: gn
5455
grid_file: /work/ab0246/a270092/input/fesom2/dars2/mesh.nc
56+
# CMIP7 required parameters
57+
activity_id: CMIP
58+
institution_id: AWI
59+
region: glb
60+
branding_suffix: "tavg-u-hxy-sea"
5561
pipelines:
5662
- default
5763
inputs:
@@ -72,6 +78,11 @@ rules:
7278
mesh_path: /work/ab0246/a270092/input/fesom2/dars2
7379
box: "-180, 180, -90, 90"
7480
target_resolution: "0.25"
81+
# CMIP7 required parameters
82+
activity_id: CMIP
83+
institution_id: AWI
84+
region: glb
85+
branding_suffix: "tavg-u-hxy-sea"
7586
pipelines:
7687
- regridded
7788
inputs:

src/pycmor/std_lib/files.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,16 +217,37 @@ def create_filepath(ds, rule):
217217
frequency_str = rule.data_request_variable.frequency
218218

219219
if mip_era == "CMIP7":
220-
# CMIP7: No institution prefix, simpler format
221-
if frequency_str == "fx" or not time_range:
220+
# CMIP7 format per official specification (DOI: 10.5281/zenodo.17250297):
221+
# <variable_id>_<branding_suffix>_<frequency>_<region>_<grid_label>_
222+
# <source_id>_<experiment_id>_<variant_label>[_<timeRange>].nc
223+
224+
# Get branding suffix from rule or data request
225+
branding_suffix = getattr(rule, 'branding_suffix', None)
226+
if not branding_suffix:
227+
branding_suffix = getattr(
228+
rule.data_request_variable, 'branding_suffix', 'unknown-u-hxy-u'
229+
)
230+
branding_suffix = _sanitize_component(branding_suffix)
231+
232+
# Get region from rule (default to global)
233+
region = getattr(rule, 'region', 'glb')
234+
region = _sanitize_component(region)
235+
236+
# Use frequency, not table_id
237+
frequency = _sanitize_component(frequency_str)
238+
239+
# Build CMIP7 filename
240+
if frequency == "fx" or not time_range:
241+
# Fixed (time-independent) variable - no timeRange
222242
filepath = (
223-
f"{out_dir}/{name}_{table_id}_{source_id}_"
224-
f"{experiment_id}_{label}_{grid}{clim_suffix}.nc"
243+
f"{out_dir}/{name}_{branding_suffix}_{frequency}_{region}_{grid}_"
244+
f"{source_id}_{experiment_id}_{label}{clim_suffix}.nc"
225245
)
226246
else:
247+
# Time-dependent variable - include timeRange
227248
filepath = (
228-
f"{out_dir}/{name}_{table_id}_{source_id}_"
229-
f"{experiment_id}_{label}_{grid}_{time_range}{clim_suffix}.nc"
249+
f"{out_dir}/{name}_{branding_suffix}_{frequency}_{region}_{grid}_"
250+
f"{source_id}_{experiment_id}_{label}_{time_range}{clim_suffix}.nc"
230251
)
231252
else:
232253
# CMIP6: Include institution prefix

src/pycmor/std_lib/global_attributes.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,42 @@ def global_attributes(self):
4646
}
4747

4848
def subdir_path(self):
49-
"""Return subdirectory path for CMIP7 output"""
49+
"""Return subdirectory path for CMIP7 output per official specification
50+
51+
Template (DOI: 10.5281/zenodo.17250297):
52+
<drs_specs>/<mip_era>/<activity_id>/<institution_id>/<source_id>/
53+
<experiment_id>/<variant_label>/<region>/<frequency>/<variable_id>/
54+
<branding_suffix>/<grid_label>/<directoryDate>
55+
"""
56+
drs_specs = "MIP-DRS7"
5057
mip_era = "CMIP7"
58+
activity_id = self.rule_dict.get("activity_id", "CMIP")
59+
institution_id = self.rule_dict.get("institution_id", "AWI")
5160
source_id = self.rule_dict.get("source_id", "")
5261
experiment_id = self.rule_dict.get("experiment_id", "")
5362
variant_label = self.rule_dict.get("variant_label", "")
54-
table_id = self.drv.table_header.table_id if hasattr(self.drv, 'table_header') else "Omon"
63+
region = self.rule_dict.get("region", "glb")
64+
65+
# Get frequency from data request (NOT table_id!)
66+
frequency = self.drv.frequency if hasattr(self.drv, 'frequency') else "mon"
67+
5568
variable_id = self.rule_dict.get("cmor_variable", "")
69+
70+
# Get branding suffix from rule or data request
71+
branding_suffix = self.rule_dict.get('branding_suffix')
72+
if branding_suffix is None:
73+
branding_suffix = getattr(self.drv, 'branding_suffix', 'unknown-u-hxy-u')
74+
5675
grid_label = self.rule_dict.get("grid_label", "")
5776
version = f"v{datetime.datetime.today().strftime('%Y%m%d')}"
58-
directory_path = f"{mip_era}/{source_id}/{experiment_id}/{variant_label}/{table_id}/{variable_id}/{grid_label}/{version}"
77+
78+
directory_path = (
79+
f"{drs_specs}/{mip_era}/{activity_id}/{institution_id}/"
80+
f"{source_id}/{experiment_id}/{variant_label}/"
81+
f"{region}/{frequency}/{variable_id}/{branding_suffix}/"
82+
f"{grid_label}/{version}"
83+
)
84+
5985
return directory_path
6086

6187
def get_tracking_id(self):

0 commit comments

Comments
 (0)