diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/add_total_iceberg_ice_shelf_melt.py b/compass/ocean/tests/global_ocean/files_for_e3sm/add_total_iceberg_ice_shelf_melt.py index 8d4a6da7f4..302010d116 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/add_total_iceberg_ice_shelf_melt.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/add_total_iceberg_ice_shelf_melt.py @@ -2,7 +2,6 @@ import numpy as np import xarray as xr -from mpas_tools.io import write_netcdf from compass.io import symlink from compass.ocean.tests.global_ocean.files_for_e3sm.files_for_e3sm_step import ( # noqa: E501 @@ -100,10 +99,10 @@ def run(self): ds[field].attrs['units'] = 'kg s-1' dib_filename = 'Iceberg_Climatology_Merino_MPAS_with_totals.nc' - write_netcdf(ds_dib, dib_filename) + self.write_netcdf(ds_dib, dib_filename) dismf_filename = 'prescribed_ismf_paolo2023_with_totals.nc' - write_netcdf(ds_dismf, dismf_filename) + self.write_netcdf(ds_dismf, dismf_filename) norm_total_dib_flux = (ds_dib.bergFreshwaterFluxData * weights * area_cell / total_flux).sum() diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/diagnostic_masks.py b/compass/ocean/tests/global_ocean/files_for_e3sm/diagnostic_masks.py index 5679a508c2..32adbd64be 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/diagnostic_masks.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/diagnostic_masks.py @@ -4,7 +4,6 @@ import xarray as xr from geometric_features import GeometricFeatures from geometric_features.aggregation import get_aggregator_by_name -from mpas_tools.io import write_netcdf from mpas_tools.logging import check_call from mpas_tools.ocean.moc import add_moc_southern_boundary_transects @@ -43,171 +42,181 @@ def run(self): """ super().run() - make_diagnostics_files(self.logger, self.mesh_short_name, - self.with_ice_shelf_cavities, - self.cpus_per_task) + self.make_diagnostics_files( + self.logger, self.mesh_short_name, self.with_ice_shelf_cavities, + self.cpus_per_task) + def make_diagnostics_files( + self, logger, mesh_short_name, with_ice_shelf_cavities, + cpus_per_task): + """ + Run this step of the testcase -def make_diagnostics_files(logger, mesh_short_name, with_ice_shelf_cavities, - cpus_per_task): - """ - Run this step of the testcase - - Parameters - ---------- - logger : logging.Logger - A logger for output from the step - - mesh_short_name : str - The E3SM short name of the mesh - - with_ice_shelf_cavities : bool - Whether the mesh has ice-shelf cavities - - cpus_per_task : int - The number of cores to use to build masks - """ - - mask_dir = '../assembled_files/diagnostics/mpas_analysis/region_masks' - try: - os.makedirs(mask_dir) - except FileExistsError: - pass - - ocean_inputdata_dir = \ - f'../assembled_files/inputdata/ocn/mpas-o/{mesh_short_name}' - moc_mask_dirs = [mask_dir, ocean_inputdata_dir] + Parameters + ---------- + logger : logging.Logger + A logger for output from the step - _make_moc_masks(mesh_short_name, logger, cpus_per_task, moc_mask_dirs) + mesh_short_name : str + The E3SM short name of the mesh - gf = GeometricFeatures() - region_groups = ['Antarctic Regions', 'Arctic Ocean Regions', - 'Arctic Sea Ice Regions', 'ISMIP6 Greenland Regions', - 'Ocean Basins', 'Ocean Subbasins', 'ISMIP6 Regions'] + with_ice_shelf_cavities : bool + Whether the mesh has ice-shelf cavities - if with_ice_shelf_cavities: - region_groups.append('Ice Shelves') + cpus_per_task : int + The number of cores to use to build masks + """ - for region_group in region_groups: - function, prefix, date = get_aggregator_by_name(region_group) - suffix = f'{prefix}{date}' + mask_dir = '../assembled_files/diagnostics/mpas_analysis/region_masks' + os.makedirs(mask_dir, exist_ok=True) + + self._make_moc_masks(mesh_short_name, logger, cpus_per_task, mask_dir) + + gf = GeometricFeatures() + region_groups = [ + 'Antarctic Regions', 'Arctic Ocean Regions', + 'Arctic Sea Ice Regions', 'ISMIP6 Greenland Regions', + 'Ocean Basins', 'Ocean Subbasins', 'ISMIP6 Regions'] + + if with_ice_shelf_cavities: + region_groups.append('Ice Shelves') + + for region_group in region_groups: + function, prefix, date = get_aggregator_by_name(region_group) + suffix = f'{prefix}{date}' + fc_mask = function(gf) + self._make_region_masks( + mesh_short_name, suffix=suffix, fc_mask=fc_mask, logger=logger, + cpus_per_task=cpus_per_task, output_dir=mask_dir) + + transect_groups = ['Transport Transects'] + for transect_group in transect_groups: + function, prefix, date = get_aggregator_by_name(transect_group) + suffix = f'{prefix}{date}' + fc_mask = function(gf) + self._make_transect_masks( + mesh_short_name, suffix=suffix, fc_mask=fc_mask, + logger=logger, cpus_per_task=cpus_per_task, + output_dir=mask_dir) + + def _make_region_masks(self, mesh_name, suffix, fc_mask, logger, + cpus_per_task, output_dir): + mesh_filename = 'restart.nc' + + geojson_filename = f'{suffix}.geojson' + mask_filename = f'{mesh_name}_{suffix}.nc' + + fc_mask.to_geojson(geojson_filename) + + # these defaults may have been updated from config options -- pass them + # along to the subprocess + netcdf_format = mpas_tools.io.default_format + netcdf_engine = mpas_tools.io.default_engine + + args = ['compute_mpas_region_masks', + '-m', mesh_filename, + '-g', geojson_filename, + '-o', mask_filename, + '-t', 'cell', + '--process_count', f'{cpus_per_task}', + '--format', netcdf_format, + '--engine', netcdf_engine] + check_call(args, logger=logger) + + # make links in output directory + symlink(os.path.abspath(mask_filename), + f'{output_dir}/{mask_filename}') + + def _make_transect_masks( + self, mesh_name, suffix, fc_mask, logger, cpus_per_task, + output_dir, subdivision_threshold=10e3): + mesh_filename = 'restart.nc' + + geojson_filename = f'{suffix}.geojson' + mask_filename = f'{mesh_name}_{suffix}.nc' + + fc_mask.to_geojson(geojson_filename) + + # these defaults may have been updated from config options -- pass them + # along to the subprocess + netcdf_format = mpas_tools.io.default_format + netcdf_engine = mpas_tools.io.default_engine + + args = ['compute_mpas_transect_masks', + '-m', mesh_filename, + '-g', geojson_filename, + '-o', mask_filename, + '-t', 'edge', + '-s', f'{subdivision_threshold}', + '--process_count', f'{cpus_per_task}', + '--add_edge_sign', + '--format', netcdf_format, + '--engine', netcdf_engine] + check_call(args, logger=logger) + + symlink(os.path.abspath(mask_filename), + f'{output_dir}/{mask_filename}') + + def _make_moc_masks( + self, mesh_short_name, logger, cpus_per_task, mask_dir): + gf = GeometricFeatures() + + ocean_inputdata_dir = \ + f'../assembled_files/inputdata/ocn/mpas-o/{mesh_short_name}' + os.makedirs(ocean_inputdata_dir, exist_ok=True) + + mesh_filename = 'restart.nc' + + function, prefix, date = get_aggregator_by_name('MOC Basins') fc_mask = function(gf) - _make_region_masks(mesh_short_name, suffix=suffix, fc_mask=fc_mask, - logger=logger, cpus_per_task=cpus_per_task, - output_dir=mask_dir) - transect_groups = ['Transport Transects'] - for transect_group in transect_groups: - function, prefix, date = get_aggregator_by_name(transect_group) suffix = f'{prefix}{date}' - fc_mask = function(gf) - _make_transect_masks(mesh_short_name, suffix=suffix, fc_mask=fc_mask, - logger=logger, cpus_per_task=cpus_per_task, - output_dir=mask_dir) - - -def _make_region_masks(mesh_name, suffix, fc_mask, logger, cpus_per_task, - output_dir): - mesh_filename = 'restart.nc' - - geojson_filename = f'{suffix}.geojson' - mask_filename = f'{mesh_name}_{suffix}.nc' - - fc_mask.to_geojson(geojson_filename) - - # these defaults may have been updated from config options -- pass them - # along to the subprocess - netcdf_format = mpas_tools.io.default_format - netcdf_engine = mpas_tools.io.default_engine - - args = ['compute_mpas_region_masks', - '-m', mesh_filename, - '-g', geojson_filename, - '-o', mask_filename, - '-t', 'cell', - '--process_count', f'{cpus_per_task}', - '--format', netcdf_format, - '--engine', netcdf_engine] - check_call(args, logger=logger) - - # make links in output directory - symlink(os.path.abspath(mask_filename), - f'{output_dir}/{mask_filename}') - - -def _make_transect_masks(mesh_name, suffix, fc_mask, logger, cpus_per_task, - output_dir, subdivision_threshold=10e3): - mesh_filename = 'restart.nc' - - geojson_filename = f'{suffix}.geojson' - mask_filename = f'{mesh_name}_{suffix}.nc' - - fc_mask.to_geojson(geojson_filename) - - # these defaults may have been updated from config options -- pass them - # along to the subprocess - netcdf_format = mpas_tools.io.default_format - netcdf_engine = mpas_tools.io.default_engine - - args = ['compute_mpas_transect_masks', - '-m', mesh_filename, - '-g', geojson_filename, - '-o', mask_filename, - '-t', 'edge', - '-s', f'{subdivision_threshold}', - '--process_count', f'{cpus_per_task}', - '--add_edge_sign', - '--format', netcdf_format, - '--engine', netcdf_engine] - check_call(args, logger=logger) - - symlink(os.path.abspath(mask_filename), - f'{output_dir}/{mask_filename}') - - -def _make_moc_masks(mesh_short_name, logger, cpus_per_task, moc_mask_dirs): - gf = GeometricFeatures() - mesh_filename = 'restart.nc' + geojson_filename = f'{suffix}.geojson' + mask_filename = f'{mesh_short_name}_{suffix}.nc' - function, prefix, date = get_aggregator_by_name('MOC Basins') - fc_mask = function(gf) + fc_mask.to_geojson(geojson_filename) - suffix = f'{prefix}{date}' + # these defaults may have been updated from config options -- pass them + # along to the subprocess + netcdf_format = mpas_tools.io.default_format + netcdf_engine = mpas_tools.io.default_engine - geojson_filename = f'{suffix}.geojson' - mask_filename = f'{mesh_short_name}_{suffix}.nc' + args = ['compute_mpas_region_masks', + '-m', mesh_filename, + '-g', geojson_filename, + '-o', mask_filename, + '-t', 'cell', + '--process_count', f'{cpus_per_task}', + '--format', netcdf_format, + '--engine', netcdf_engine] + check_call(args, logger=logger) - fc_mask.to_geojson(geojson_filename) + prefix = f'{mesh_short_name}_mocBasinsAndTransects{date}' + mask_and_transect_filename = f'{prefix}.nc' - # these defaults may have been updated from config options -- pass them - # along to the subprocess - netcdf_format = mpas_tools.io.default_format - netcdf_engine = mpas_tools.io.default_engine + diagnostics_filename = os.path.join( + mask_dir, mask_and_transect_filename) - args = ['compute_mpas_region_masks', - '-m', mesh_filename, - '-g', geojson_filename, - '-o', mask_filename, - '-t', 'cell', - '--process_count', f'{cpus_per_task}', - '--format', netcdf_format, - '--engine', netcdf_engine] - check_call(args, logger=logger) + inputdata_filename = os.path.join( + ocean_inputdata_dir, f'{prefix}mg.{self.creation_date}.nc') - mask_and_transect_filename = \ - f'{mesh_short_name}_mocBasinsAndTransects{date}.nc' + ds_mesh = xr.open_dataset(mesh_filename) + ds_mask = xr.open_dataset(mask_filename) - ds_mesh = xr.open_dataset(mesh_filename) - ds_mask = xr.open_dataset(mask_filename) + ds_masks_and_transects = add_moc_southern_boundary_transects( + ds_mask, ds_mesh, logger=logger) - ds_masks_and_transects = add_moc_southern_boundary_transects( - ds_mask, ds_mesh, logger=logger) + # these are string variables that cause problems if we convert to + # CDF5 format + ds_masks_and_transects = ds_masks_and_transects.drop_vars( + ['history', 'constituents'] + ) - write_netcdf(ds_masks_and_transects, mask_and_transect_filename, - char_dim_name='StrLen') + self.write_netcdf( + ds_masks_and_transects, mask_and_transect_filename, + char_dim_name='StrLen') - # make links in output directories (both inputdata and diagnostics) - for output_dir in moc_mask_dirs: - symlink(os.path.abspath(mask_and_transect_filename), - f'{output_dir}/{mask_and_transect_filename}') + # make links in output directories (both inputdata and diagnostics) + for link_name in [diagnostics_filename, inputdata_filename]: + symlink(os.path.abspath(mask_and_transect_filename), link_name) diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/files_for_e3sm_step.py b/compass/ocean/tests/global_ocean/files_for_e3sm/files_for_e3sm_step.py index 2ff375addc..1b95953f7d 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/files_for_e3sm_step.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/files_for_e3sm_step.py @@ -1,7 +1,9 @@ import os +import subprocess from datetime import datetime import xarray as xr +from mpas_tools.io import write_netcdf as mpas_tools_write_netcdf from compass.io import symlink from compass.step import Step @@ -208,3 +210,35 @@ def run(self): # noqa: C901 pass super().run() + + def write_netcdf(self, ds, filename, **kwargs): + """ + Write an xarray dataset to a NetCDF file, possibly converting to CDF5 + format + + Parameters + ---------- + ds : xarray.Dataset + the dataset to write + + filename : str + the name of the file to write + + kwargs : dict + additional keyword arguments to pass to mpas_tools.io.write_netcdf + """ + + config = self.config + convert_to_cdf5 = config.getboolean( + 'files_for_e3sm', 'convert_to_cdf5') + if convert_to_cdf5: + name, ext = os.path.splitext(filename) + write_filename = f'{name}_before_cdf5{ext}' + else: + write_filename = filename + + mpas_tools_write_netcdf(ds, write_filename, **kwargs) + + if convert_to_cdf5: + args = ['ncks', '-O', '-5', write_filename, filename] + subprocess.check_call(args) diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_initial_condition.py b/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_initial_condition.py index 6df159015f..655bd5e635 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_initial_condition.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_initial_condition.py @@ -1,7 +1,6 @@ import os import xarray -from mpas_tools.io import write_netcdf from compass.io import symlink from compass.ocean.tests.global_ocean.files_for_e3sm.files_for_e3sm_step import ( # noqa: E501 @@ -54,7 +53,7 @@ def run(self): ds.load() if 'xtime' in ds.data_vars: ds = ds.drop_vars('xtime') - write_netcdf(ds, dest_filename) + self.write_netcdf(ds, dest_filename) if self.add_metadata: add_mesh_and_init_metadata([dest_filename], self.config, diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_mesh.py b/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_mesh.py index 413598120e..dd1209a137 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_mesh.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/ocean_mesh.py @@ -2,7 +2,6 @@ import numpy as np import xarray as xr -from mpas_tools.io import write_netcdf from compass.io import symlink from compass.ocean.tests.global_ocean.files_for_e3sm.files_for_e3sm_step import ( # noqa: E501 @@ -116,7 +115,7 @@ def run(self): ds.zMid.attrs['long_name'] = \ 'z-coordinate of the mid-depth of the layer' - write_netcdf(ds, dest_filename) + self.write_netcdf(ds, dest_filename) symlink(os.path.abspath(dest_filename), f'{self.ocean_mesh_dir}/{dest_filename}') diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_ice_shelf_melt.py b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_ice_shelf_melt.py index ee8c47a204..1a76fd6cd4 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_ice_shelf_melt.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_ice_shelf_melt.py @@ -75,14 +75,16 @@ def run(self): remapped_filename = 'prescribed_ismf_paolo2023.nc' parallel_executable = config.get('parallel', 'parallel_executable') + convert_to_cdf5 = config.getboolean( + 'files_for_e3sm', 'convert_to_cdf5') base_mesh_filename = 'base_mesh.nc' culled_mesh_filename = 'initial_state.nc' mesh_name = self.mesh_short_name land_ice_mask_filename = 'initial_state.nc' - remap_paolo(in_filename, base_mesh_filename, - culled_mesh_filename, mesh_name, - land_ice_mask_filename, remapped_filename, - logger=logger, mpi_tasks=ntasks, - parallel_executable=parallel_executable) + remap_paolo( + in_filename, base_mesh_filename, culled_mesh_filename, mesh_name, + land_ice_mask_filename, remapped_filename, logger=logger, + mpi_tasks=ntasks, parallel_executable=parallel_executable, + convert_to_cdf5=convert_to_cdf5) diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_iceberg_climatology.py b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_iceberg_climatology.py index 952ab13569..efa93d8f05 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_iceberg_climatology.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_iceberg_climatology.py @@ -2,7 +2,6 @@ import numpy as np import xarray as xr -from mpas_tools.io import write_netcdf from pyremap import LatLonGridDescriptor, MpasCellMeshDescriptor, Remapper from compass.ocean.tests.global_ocean.files_for_e3sm.files_for_e3sm_step import ( # noqa: E501 @@ -64,133 +63,134 @@ def run(self): else: land_ice_mask_filename = None - remap_iceberg_climo(in_filename, mesh_filename, mesh_short_name, - land_ice_mask_filename, remapped_filename, - logger=logger, mpi_tasks=ntasks, - parallel_executable=parallel_executable) + self.remap_iceberg_climo( + in_filename, mesh_filename, mesh_short_name, + land_ice_mask_filename, remapped_filename, logger=logger, + mpi_tasks=ntasks, parallel_executable=parallel_executable) + def remap_iceberg_climo( + self, in_filename, mesh_filename, mesh_name, + land_ice_mask_filename, out_filename, logger, + mapping_directory='.', method='conserve', mpi_tasks=1, + parallel_executable=None): + """ + Remap iceberg freshwater fluxes from a climatology from + Merino et al. (2016) to the current MPAS mesh -def remap_iceberg_climo(in_filename, mesh_filename, mesh_name, - land_ice_mask_filename, out_filename, logger, - mapping_directory='.', method='conserve', mpi_tasks=1, - parallel_executable=None): - """ - Remap iceberg freshwater fluxes from a climatology from - Merino et al. (2016) to the current MPAS mesh - - Parameters - ---------- - in_filename : str - The original Merino et al. iceberg freshwater flux climatology file + Parameters + ---------- + in_filename : str + The original Merino et al. iceberg freshwater flux climatology file - mesh_filename : str - The MPAS mesh + mesh_filename : str + The MPAS mesh - mesh_name : str - The name of the mesh (e.g. oEC60to30wISC), used in the name of the - mapping file + mesh_name : str + The name of the mesh (e.g. oEC60to30wISC), used in the name of the + mapping file - land_ice_mask_filename : str - A file containing the variable ``landIceMask`` on the MPAS mesh + land_ice_mask_filename : str + A file containing the variable ``landIceMask`` on the MPAS mesh - out_filename : str - The iceberg freshwater fluxes remapped to the MPAS mesh + out_filename : str + The iceberg freshwater fluxes remapped to the MPAS mesh - logger : logging.Logger - A logger for output from the step + logger : logging.Logger + A logger for output from the step - mapping_directory : str - The directory where the mapping file should be stored (if it is to be - computed) or where it already exists (if not) + mapping_directory : str + The directory where the mapping file should be stored (if it is to + be computed) or where it already exists (if not) - method : {'bilinear', 'neareststod', 'conserve'}, optional - The method of interpolation used, see documentation for - `ESMF_RegridWeightGen` for details. + method : {'bilinear', 'neareststod', 'conserve'}, optional + The method of interpolation used, see documentation for + `ESMF_RegridWeightGen` for details. - mpi_tasks : int, optional - The number of MPI tasks to use to compute the mapping file + mpi_tasks : int, optional + The number of MPI tasks to use to compute the mapping file - parallel_executable : {'srun', 'mpirun'}, optional - The name of the parallel executable to use to launch ESMF tools. - But default, 'mpirun' from the conda environment is used - """ + parallel_executable : {'srun', 'mpirun'}, optional + The name of the parallel executable to use to launch ESMF tools. + But default, 'mpirun' from the conda environment is used + """ - name, ext = os.path.splitext(in_filename) - monotonic_filename = f'{name}_monotonic_lon{ext}' - - ds = xr.open_dataset(in_filename) - # latitude and longitude are actually 1D - ds['lon'] = ds.longitude.isel(y=0) - ds['lat'] = ds.latitude.isel(x=0) - ds = ds.drop_vars(['latitude', 'longitude']) - ds = ds.rename(dict(x='lon', y='lat')) - # the first and last longitudes are zeroed out!!! - ds = ds.isel(lon=slice(1, ds.sizes['lon'] - 1)) - - lon_indices = np.argsort(ds.lon) - # make sure longitudes are unique - lon = ds.lon.isel(lon=lon_indices).values - lon, unique_indices = np.unique(lon, return_index=True) - lon_indices = lon_indices[unique_indices] - ds = ds.isel(lon=lon_indices) - - ds.to_netcdf(monotonic_filename) - logger.info('Creating the source grid descriptor...') - src_descriptor = LatLonGridDescriptor.read(fileName=monotonic_filename) - src_mesh_name = src_descriptor.meshName - - logger.info('Creating the destination MPAS mesh descriptor...') - dst_descriptor = MpasCellMeshDescriptor(mesh_filename, mesh_name) - - mapping_filename = \ - f'{mapping_directory}/map_{src_mesh_name}_to_{mesh_name}_{method}.nc' - - logger.info(f'Creating the mapping file {mapping_filename}...') - remapper = Remapper(src_descriptor, dst_descriptor, mapping_filename) - - remapper.build_mapping_file(method=method, mpiTasks=mpi_tasks, - tempdir=mapping_directory, logger=logger, - esmf_parallel_exec=parallel_executable) - - logger.info('Remapping...') - name, ext = os.path.splitext(out_filename) - remap_filename = f'{name}_after_remap{ext}' - remapper.remap_file(inFileName=monotonic_filename, - outFileName=remap_filename, - logger=logger) - - ds = xr.open_dataset(remap_filename) - logger.info('Removing lat/lon vertex variables...') - drop = [var for var in ds if 'nv' in ds[var].dims] - ds = ds.drop_vars(drop) - logger.info('Renaming dimensions and variables...') - rename = dict(ncol='nCells', - month='Time', - Icb_flux='bergFreshwaterFluxData') - ds = ds.rename(rename) - logger.info('Adding xtime...') - xtime = [] - for time_index in range(ds.sizes['Time']): - time_str = f'0000-{time_index + 1:02d}-15_00:00:00' - xtime.append(time_str) - ds['xtime'] = ('Time', np.array(xtime, 'S64')) - - logger.info('Fix masking...') - field = 'bergFreshwaterFluxData' - # zero out the field where it's currently NaN - ds[field] = ds[field].where(ds[field].notnull(), 0.) - - if land_ice_mask_filename is not None: - logger.info('Masking out regions under ice shelves...') - ds_mask = xr.open_dataset(land_ice_mask_filename) - no_land_ice_mask = 1 - ds_mask.landIceMask - if 'Time' in no_land_ice_mask.dims: - no_land_ice_mask = no_land_ice_mask.isel(Time=0, drop=True) - - # mask only to regions without land ice - ds[field] = ds[field] * no_land_ice_mask - - logger.info(f'Writing to {out_filename}...') - write_netcdf(ds, out_filename) - - logger.info('done.') + name, ext = os.path.splitext(in_filename) + monotonic_filename = f'{name}_monotonic_lon{ext}' + + ds = xr.open_dataset(in_filename) + # latitude and longitude are actually 1D + ds['lon'] = ds.longitude.isel(y=0) + ds['lat'] = ds.latitude.isel(x=0) + ds = ds.drop_vars(['latitude', 'longitude']) + ds = ds.rename(dict(x='lon', y='lat')) + # the first and last longitudes are zeroed out!!! + ds = ds.isel(lon=slice(1, ds.sizes['lon'] - 1)) + + lon_indices = np.argsort(ds.lon) + # make sure longitudes are unique + lon = ds.lon.isel(lon=lon_indices).values + lon, unique_indices = np.unique(lon, return_index=True) + lon_indices = lon_indices[unique_indices] + ds = ds.isel(lon=lon_indices) + + ds.to_netcdf(monotonic_filename) + logger.info('Creating the source grid descriptor...') + src_descriptor = LatLonGridDescriptor.read(fileName=monotonic_filename) + src_mesh_name = src_descriptor.meshName + + logger.info('Creating the destination MPAS mesh descriptor...') + dst_descriptor = MpasCellMeshDescriptor(mesh_filename, mesh_name) + + mapping_filename = \ + f'{mapping_directory}/map_{src_mesh_name}_to_{mesh_name}_{method}.nc' # noqa: E501 + + logger.info(f'Creating the mapping file {mapping_filename}...') + remapper = Remapper(src_descriptor, dst_descriptor, mapping_filename) + + remapper.build_mapping_file(method=method, mpiTasks=mpi_tasks, + tempdir=mapping_directory, logger=logger, + esmf_parallel_exec=parallel_executable) + + logger.info('Remapping...') + name, ext = os.path.splitext(out_filename) + remap_filename = f'{name}_after_remap{ext}' + remapper.remap_file(inFileName=monotonic_filename, + outFileName=remap_filename, + logger=logger) + + ds = xr.open_dataset(remap_filename) + logger.info('Removing lat/lon vertex variables...') + drop = [var for var in ds if 'nv' in ds[var].dims] + ds = ds.drop_vars(drop) + logger.info('Renaming dimensions and variables...') + rename = dict( + ncol='nCells', + month='Time', + Icb_flux='bergFreshwaterFluxData') + ds = ds.rename(rename) + logger.info('Adding xtime...') + xtime = [] + for time_index in range(ds.sizes['Time']): + time_str = f'0000-{time_index + 1:02d}-15_00:00:00' + xtime.append(time_str) + ds['xtime'] = ('Time', np.array(xtime, 'S64')) + + logger.info('Fix masking...') + field = 'bergFreshwaterFluxData' + # zero out the field where it's currently NaN + ds[field] = ds[field].where(ds[field].notnull(), 0.) + + if land_ice_mask_filename is not None: + logger.info('Masking out regions under ice shelves...') + ds_mask = xr.open_dataset(land_ice_mask_filename) + no_land_ice_mask = 1 - ds_mask.landIceMask + if 'Time' in no_land_ice_mask.dims: + no_land_ice_mask = no_land_ice_mask.isel(Time=0, drop=True) + + # mask only to regions without land ice + ds[field] = ds[field] * no_land_ice_mask + + logger.info(f'Writing to {out_filename}...') + self.write_netcdf(ds, out_filename) + + logger.info('done.') diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_sea_surface_salinity_restoring.py b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_sea_surface_salinity_restoring.py index 1eecf89421..3ff8ad7afe 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_sea_surface_salinity_restoring.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_sea_surface_salinity_restoring.py @@ -3,7 +3,6 @@ import numpy as np import xarray as xr -from mpas_tools.io import write_netcdf from mpas_tools.logging import check_call from pyremap import MpasCellMeshDescriptor @@ -29,8 +28,7 @@ def __init__(self, test_case): The test case this step belongs to """ super().__init__(test_case, - name='remap_sea_surface_salinity_restoring', - ntasks=512, min_tasks=128) + name='remap_sea_surface_salinity_restoring') self.add_input_file( target='woa23_decav_ne300_sss_monthly_extrap.20250114.nc', @@ -42,6 +40,30 @@ def __init__(self, test_case): self.add_output_file(filename='sss.WOA23_monthlyClimatology.nc') + def setup(self): + """ + Set resources from config options + """ + super().setup() + section = self.config['files_for_e3sm'] + self.ntasks = section.getint('remap_sss_ntasks') + self.min_tasks = section.getint('remap_sss_min_tasks') + + def constrain_resources(self, available_resources): + """ + Constrain ``cpus_per_task`` and ``ntasks`` based on the number of + cores available to this step + + Parameters + ---------- + available_resources : dict + The total number of cores available to the step + """ + section = self.config['files_for_e3sm'] + self.ntasks = section.getint('remap_sss_ntasks') + self.min_tasks = section.getint('remap_sss_min_tasks') + super().constrain_resources(available_resources) + def run(self): """ Run this step of the test case @@ -109,7 +131,7 @@ def _create_target_scrip_file(self, target_mesh_filename, mesh_name): ds_out = xr.Dataset() ds_out['expandDist'] = expand_dist - write_netcdf(ds_out, 'expandDist.nc') + self.write_netcdf(ds_out, 'expandDist.nc') descriptor = MpasCellMeshDescriptor( fileName=target_mesh_filename, @@ -210,4 +232,4 @@ def _modify_remapped_sss(self, remap_filename, out_filename): rename = dict(ncol='nCells', SALT='surfaceSalinityMonthlyClimatologyValue') ds = ds.rename(rename) - write_netcdf(ds, out_filename) + self.write_netcdf(ds, out_filename) diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_tidal_mixing.py b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_tidal_mixing.py index 4d1ff82169..36cda101ef 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/remap_tidal_mixing.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/remap_tidal_mixing.py @@ -2,7 +2,6 @@ import pyproj import xarray as xr -from mpas_tools.io import write_netcdf from pyremap import MpasCellMeshDescriptor, ProjectionGridDescriptor, Remapper from compass.io import symlink @@ -68,122 +67,124 @@ def run(self): mesh_short_name = self.mesh_short_name land_ice_mask_filename = 'initial_state.nc' - remap_tidal(in_filename, mesh_filename, mesh_short_name, - land_ice_mask_filename, remapped_filename, logger=logger, - mpi_tasks=ntasks, parallel_executable=parallel_executable) + self.remap_tidal( + in_filename, mesh_filename, mesh_short_name, + land_ice_mask_filename, remapped_filename, logger=logger, + mpi_tasks=ntasks, parallel_executable=parallel_executable) symlink( os.path.abspath(remapped_filename), f'{self.ocean_inputdata_dir}/{dest_filename}') + def remap_tidal( + self, in_filename, mesh_filename, mesh_name, + land_ice_mask_filename, out_filename, logger, + mapping_directory='.', method='bilinear', renormalize=0.01, + mpi_tasks=1, parallel_executable=None): + """ + Remap the RMS tidal velocity from the CATS model + (https://www.usap-dc.org/view/dataset/601235) to the current MPAS mesh -def remap_tidal(in_filename, mesh_filename, mesh_name, land_ice_mask_filename, - out_filename, logger, mapping_directory='.', method='bilinear', - renormalize=0.01, mpi_tasks=1, parallel_executable=None): - """ - Remap the RMS tidal velocity from the CATS model - (https://www.usap-dc.org/view/dataset/601235) to the current MPAS mesh - - Parameters - ---------- - in_filename : str - The original CATS tidal friction velocity file + Parameters + ---------- + in_filename : str + The original CATS tidal friction velocity file - mesh_filename : str - The MPAS mesh + mesh_filename : str + The MPAS mesh - mesh_name : str - The name of the mesh (e.g. oEC60to30wISC), used in the name of the - mapping file + mesh_name : str + The name of the mesh (e.g. oEC60to30wISC), used in the name of the + mapping file - land_ice_mask_filename : str - A file containing the variable ``landIceMask`` on the MPAS mesh + land_ice_mask_filename : str + A file containing the variable ``landIceMask`` on the MPAS mesh - out_filename : str - An output file to write the remapped climatology of SSS to + out_filename : str + An output file to write the remapped climatology of SSS to - logger : logging.Logger - A logger for output from the step + logger : logging.Logger + A logger for output from the step - mapping_directory : str - The directory where the mapping file should be stored (if it is to be - computed) or where it already exists (if not) + mapping_directory : str + The directory where the mapping file should be stored (if it is to + be computed) or where it already exists (if not) - method : {'bilinear', 'neareststod', 'conserve'}, optional - The method of interpolation used, see documentation for - `ESMF_RegridWeightGen` for details. + method : {'bilinear', 'neareststod', 'conserve'}, optional + The method of interpolation used, see documentation for + `ESMF_RegridWeightGen` for details. - renormalize : float, optional - A threshold to use to renormalize the data + renormalize : float, optional + A threshold to use to renormalize the data - mpi_tasks : int, optional - The number of MPI tasks to use to compute the mapping file + mpi_tasks : int, optional + The number of MPI tasks to use to compute the mapping file - parallel_executable : {'srun', 'mpirun'}, optional - The name of the parallel executable to use to launch ESMF tools. - But default, 'mpirun' from the conda environment is used - """ + parallel_executable : {'srun', 'mpirun'}, optional + The name of the parallel executable to use to launch ESMF tools. + But default, 'mpirun' from the conda environment is used + """ - logger.info('Creating the source grid descriptor...') - projection_in = pyproj.Proj('+proj=stere +lat_ts=-71.0 +lat_0=-90 ' - '+lon_0=-70.0 +k_0=1.0 +x_0=0.0 +y_0=0.0 ' - '+ellps=WGS84') - in_grid_name = 'S71W70_CATS_ustar' + logger.info('Creating the source grid descriptor...') + projection_in = pyproj.Proj('+proj=stere +lat_ts=-71.0 +lat_0=-90 ' + '+lon_0=-70.0 +k_0=1.0 +x_0=0.0 +y_0=0.0 ' + '+ellps=WGS84') + in_grid_name = 'S71W70_CATS_ustar' - src_descriptor = ProjectionGridDescriptor.read( - projection=projection_in, fileName=in_filename, meshName=in_grid_name, - xVarName='x', yVarName='y') - src_mesh_name = src_descriptor.meshName + src_descriptor = ProjectionGridDescriptor.read( + projection=projection_in, fileName=in_filename, + meshName=in_grid_name, xVarName='x', yVarName='y') + src_mesh_name = src_descriptor.meshName - dst_descriptor = MpasCellMeshDescriptor(mesh_filename, mesh_name) + dst_descriptor = MpasCellMeshDescriptor(mesh_filename, mesh_name) - mapping_filename = \ - f'{mapping_directory}/map_{src_mesh_name}_to_{mesh_name}_{method}.nc' + mapping_filename = \ + f'{mapping_directory}/map_{src_mesh_name}_to_{mesh_name}_{method}.nc' # noqa: E501 - logger.info(f'Creating the mapping file {mapping_filename}...') - remapper = Remapper(src_descriptor, dst_descriptor, mapping_filename) + logger.info(f'Creating the mapping file {mapping_filename}...') + remapper = Remapper(src_descriptor, dst_descriptor, mapping_filename) - remapper.build_mapping_file(method=method, mpiTasks=mpi_tasks, - tempdir=mapping_directory, logger=logger, - esmf_parallel_exec=parallel_executable) - logger.info('done.') + remapper.build_mapping_file(method=method, mpiTasks=mpi_tasks, + tempdir=mapping_directory, logger=logger, + esmf_parallel_exec=parallel_executable) + logger.info('done.') - field = 'velocityTidalRMS' + field = 'velocityTidalRMS' - logger.info('Remapping...') - name, ext = os.path.splitext(out_filename) - remap_filename = f'{name}_after_remap{ext}' - remapper.remap_file(inFileName=in_filename, outFileName=remap_filename, - logger=logger) + logger.info('Remapping...') + name, ext = os.path.splitext(out_filename) + remap_filename = f'{name}_after_remap{ext}' + remapper.remap_file(inFileName=in_filename, outFileName=remap_filename, + logger=logger) - ds_mask = xr.open_dataset(land_ice_mask_filename) - land_ice_mask = ds_mask.landIceMask + ds_mask = xr.open_dataset(land_ice_mask_filename) + land_ice_mask = ds_mask.landIceMask - logger.info('Renormalize and masking to ice-shelf cavities...') - ds = xr.Dataset() - with xr.open_dataset(mesh_filename) as ds_mesh: - ds['lonCell'] = ds_mesh.lonCell - ds['latCell'] = ds_mesh.latCell + logger.info('Renormalize and masking to ice-shelf cavities...') + ds = xr.Dataset() + with xr.open_dataset(mesh_filename) as ds_mesh: + ds['lonCell'] = ds_mesh.lonCell + ds['latCell'] = ds_mesh.latCell - with xr.open_dataset(remap_filename) as ds_remap: - logger.info('Renaming dimensions and variables...') - rename = dict(ncol='nCells') - ds_remap = ds_remap.rename(rename) - ds_remap = ds_remap.drop_vars(['x', 'y']) + with xr.open_dataset(remap_filename) as ds_remap: + logger.info('Renaming dimensions and variables...') + rename = dict(ncol='nCells') + ds_remap = ds_remap.rename(rename) + ds_remap = ds_remap.drop_vars(['x', 'y']) - # renormalize - mask = ds_remap.mask > renormalize - ds[field] = (ds_remap[field] / ds_remap.mask).where(mask, 0.) + # renormalize + mask = ds_remap.mask > renormalize + ds[field] = (ds_remap[field] / ds_remap.mask).where(mask, 0.) - # mask only to regions with land ice - ds[field] = land_ice_mask * ds[field] + # mask only to regions with land ice + ds[field] = land_ice_mask * ds[field] - # drop Time dimension - if 'Time' in ds[field].dims: - ds[field] = ds[field].isel(Time=0) + # drop Time dimension + if 'Time' in ds[field].dims: + ds[field] = ds[field].isel(Time=0) - ds[field].attrs['units'] = 'm^2 s^-2' + ds[field].attrs['units'] = 'm^2 s^-2' - write_netcdf(ds, out_filename) + self.write_netcdf(ds, out_filename) - logger.info('done.') + logger.info('done.') diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_graph_partition.py b/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_graph_partition.py index cf208482d8..f41c33de40 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_graph_partition.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_graph_partition.py @@ -3,7 +3,6 @@ import numpy as np import xarray as xr -from mpas_tools.io import write_netcdf from mpas_tools.logging import check_call from pyremap import MpasCellMeshDescriptor, Remapper @@ -73,7 +72,7 @@ def run(self): # to add it again if 'cullCell' in ds: ds = ds.drop_vars(['cullCell']) - write_netcdf(ds, 'mesh.nc') + self.write_netcdf(ds, 'mesh.nc') mesh_filename = 'mesh.nc' max_cells_per_core = config.getint('files_for_e3sm', diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_initial_condition.py b/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_initial_condition.py index d740af3e97..ab7e905c4b 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_initial_condition.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_initial_condition.py @@ -1,7 +1,6 @@ import os import xarray -from mpas_tools.io import write_netcdf from compass.io import symlink from compass.ocean.tests.global_ocean.files_for_e3sm.files_for_e3sm_step import ( # noqa: E501 @@ -47,7 +46,7 @@ def run(self): with xarray.open_dataset('restart.nc') as ds: ds.load() ds = ds[keep_vars] - write_netcdf(ds, dest_filename) + self.write_netcdf(ds, dest_filename) symlink(os.path.abspath(dest_filename), f'{self.seaice_inputdata_dir}/{dest_filename}') diff --git a/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_mesh.py b/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_mesh.py index 8adf73a085..500521ace3 100644 --- a/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_mesh.py +++ b/compass/ocean/tests/global_ocean/files_for_e3sm/seaice_mesh.py @@ -1,7 +1,6 @@ import os import xarray as xr -from mpas_tools.io import write_netcdf from compass.io import symlink from compass.ocean.tests.global_ocean.files_for_e3sm.files_for_e3sm_step import ( # noqa: E501 @@ -43,7 +42,7 @@ def run(self): keep_vars = self.mesh_vars ds = ds[keep_vars] ds.load() - write_netcdf(ds, dest_filename) + self.write_netcdf(ds, dest_filename) symlink(os.path.abspath(dest_filename), f'{self.seaice_mesh_dir}/{dest_filename}') diff --git a/compass/ocean/tests/global_ocean/global_ocean.cfg b/compass/ocean/tests/global_ocean/global_ocean.cfg index e892386537..662e534d94 100644 --- a/compass/ocean/tests/global_ocean/global_ocean.cfg +++ b/compass/ocean/tests/global_ocean/global_ocean.cfg @@ -218,3 +218,10 @@ sss_smoothing_min_lat = 70 # the maximum smoothing distance (meters) sss_smoothing_max_dist = 1000e3 + +# whether to convert to CDF5 format +convert_to_cdf5 = False + +# resources for remapping SSS +remap_sss_ntasks = 512 +remap_sss_min_tasks = 128 diff --git a/compass/ocean/tests/global_ocean/init/remap_ice_shelf_melt.py b/compass/ocean/tests/global_ocean/init/remap_ice_shelf_melt.py index fc010135d3..8842fde07a 100644 --- a/compass/ocean/tests/global_ocean/init/remap_ice_shelf_melt.py +++ b/compass/ocean/tests/global_ocean/init/remap_ice_shelf_melt.py @@ -1,3 +1,6 @@ +import os +import subprocess + import h5py import numpy as np import pyproj @@ -84,17 +87,20 @@ def run(self): map_culled_to_base_filename = 'map_culled_to_base.nc' mesh_name = self.mesh.mesh_name + convert_to_cdf5 = config.getboolean( + 'files_for_e3sm', 'convert_to_cdf5') + remap_paolo( in_filename, base_mesh_filename, culled_mesh_filename, mesh_name, land_ice_mask_filename, out_filename, - logger=logger, mpi_tasks=ntasks, + logger=logger, convert_to_cdf5=convert_to_cdf5, mpi_tasks=ntasks, parallel_executable=parallel_executable, map_culled_to_base_filename=map_culled_to_base_filename) def remap_paolo(in_filename, base_mesh_filename, culled_mesh_filename, mesh_name, land_ice_mask_filename, out_filename, logger, - mapping_directory='.', method='conserve', + convert_to_cdf5, mapping_directory='.', method='conserve', renormalization_threshold=None, mpi_tasks=1, parallel_executable=None, map_culled_to_base_filename=None): @@ -127,6 +133,9 @@ def remap_paolo(in_filename, base_mesh_filename, culled_mesh_filename, logger : logging.Logger A logger for output from the step + convert_to_cdf5 : bool + Whether to convert the input file to CDF5 format + mapping_directory : str The directory where the mapping file should be stored (if it is to be computed) or where it already exists (if not) @@ -235,7 +244,7 @@ def remap_paolo(in_filename, base_mesh_filename, culled_mesh_filename, sphere_flux = (ds.dataLandIceFreshwaterFlux * sphere_area).sum().values heat_flux = (ds.dataLandIceHeatFlux * sphere_area).sum().values - logger.info(f'Area of a cell (m^2): {planar_area[0,0]:.1f}') + logger.info(f'Area of a cell (m^2): {planar_area[0, 0]:.1f}') logger.info(f'Total flux on plane (kg/s): {planar_flux:.1f}') logger.info(f'Total flux on sphere (kg/s): {sphere_flux:.1f}') logger.info(f'Total heat flux on sphere (W): {heat_flux:.1f}') @@ -282,7 +291,8 @@ def remap_paolo(in_filename, base_mesh_filename, culled_mesh_filename, _land_ice_mask_on_base_mesh( base_mesh_filename=base_mesh_filename, land_ice_mask_filename=land_ice_mask_filename, - map_culled_to_base_filename=map_culled_to_base_filename) + map_culled_to_base_filename=map_culled_to_base_filename, + convert_to_cdf5=convert_to_cdf5) ds_mask = xr.open_dataset('land_ice_mask_on_base.nc') mask = ds_mask.landIceFloatingMask @@ -292,13 +302,14 @@ def remap_paolo(in_filename, base_mesh_filename, culled_mesh_filename, write_netcdf(ds_remap, 'ismf_remapped_to_base.nc') # deal with melting beyond the land-ice mask - _reroute_missing_flux(base_mesh_filename, map_culled_to_base_filename, - out_filename, logger) + _reroute_missing_flux( + base_mesh_filename, map_culled_to_base_filename, out_filename, logger, + convert_to_cdf5) def remap_adusumilli(in_filename, base_mesh_filename, culled_mesh_filename, mesh_name, land_ice_mask_filename, out_filename, logger, - mapping_directory='.', method='conserve', + convert_to_cdf5, mapping_directory='.', method='conserve', renormalization_threshold=None, mpi_tasks=1, parallel_executable=None, map_culled_to_base_filename=None): @@ -331,6 +342,9 @@ def remap_adusumilli(in_filename, base_mesh_filename, culled_mesh_filename, logger : logging.Logger A logger for output from the step + convert_to_cdf5 : bool + Whether to convert the input file to CDF5 format + mapping_directory : str The directory where the mapping file should be stored (if it is to be computed) or where it already exists (if not) @@ -419,21 +433,24 @@ def remap_adusumilli(in_filename, base_mesh_filename, culled_mesh_filename, if map_culled_to_base_filename is None: map_culled_to_base_filename = 'map_culled_to_base.nc' - write_map_culled_to_base(base_mesh_filename=base_mesh_filename, - culled_mesh_filename=culled_mesh_filename, - out_filename=map_culled_to_base_filename) + write_map_culled_to_base( + base_mesh_filename=base_mesh_filename, + culled_mesh_filename=culled_mesh_filename, + out_filename=map_culled_to_base_filename, + convert_to_cdf5=convert_to_cdf5) _land_ice_mask_on_base_mesh( base_mesh_filename=base_mesh_filename, land_ice_mask_filename=land_ice_mask_filename, - map_culled_to_base_filename=map_culled_to_base_filename) + map_culled_to_base_filename=map_culled_to_base_filename, + convert_to_cdf5=convert_to_cdf5) ds_mask = xr.open_dataset('land_ice_mask_on_base.nc') mask = ds_mask.landIceFloatingMask ds_remap['landIceFloatingMask'] = mask ds_remap.attrs.pop('history') - write_netcdf(ds_remap, 'ismf_remapped_to_base.nc') + _write_netcdf(ds_remap, 'ismf_remapped_to_base.nc', convert_to_cdf5) # deal with melting beyond the land-ice mask _reroute_missing_flux(base_mesh_filename, map_culled_to_base_filename, @@ -441,7 +458,7 @@ def remap_adusumilli(in_filename, base_mesh_filename, culled_mesh_filename, def _land_ice_mask_on_base_mesh(base_mesh_filename, land_ice_mask_filename, - map_culled_to_base_filename): + map_culled_to_base_filename, convert_to_cdf5): """ Map the land-ice mask back to the base mesh """ ds_map = xr.open_dataset(map_culled_to_base_filename) @@ -457,11 +474,11 @@ def _land_ice_mask_on_base_mesh(base_mesh_filename, land_ice_mask_filename, ds_base_mask = xr.Dataset() ds_base_mask['landIceFloatingMask'] = ('nCells', base_land_ice_mask) - write_netcdf(ds_base_mask, 'land_ice_mask_on_base.nc') + _write_netcdf(ds_base_mask, 'land_ice_mask_on_base.nc', convert_to_cdf5) def _reroute_missing_flux(base_mesh_filename, map_culled_to_base_filename, - out_filename, logger): + out_filename, logger, convert_to_cdf5): """ For each flux cell not within an ice shelf, find the closest ice-shelf cell """ @@ -501,7 +518,7 @@ def _reroute_missing_flux(base_mesh_filename, map_culled_to_base_filename, logger.info(f'Total flux (kg/s): {fwf_total:.1f}') ds_to_route = xr.Dataset() ds_to_route['rerouteMask'] = reroute_mask - write_netcdf(ds_to_route, 'route_mask.nc') + _write_netcdf(ds_to_route, 'route_mask.nc', convert_to_cdf5) reroute_xyz = base_xyz[fluxes_to_reroute.values, :] @@ -535,9 +552,28 @@ def _reroute_missing_flux(base_mesh_filename, map_culled_to_base_filename, ds_out[field] = ('nCells', hf_rerouted) ds_out[field] = ds_out[field].expand_dims(dim='Time', axis=0) ds_out[field].attrs['units'] = 'W m^-2' - write_netcdf(ds_out, out_filename) + _write_netcdf(ds_out, out_filename, convert_to_cdf5) fwf_total = (ds_out.dataLandIceFreshwaterFlux * area.isel(nCells=map_culled_to_base)).sum().values logger.info(f'Total after rerouting (kg/s): {fwf_total:.1f}') logger.info('') + + +def _write_netcdf(ds, filename, convert_to_cdf5, **kwargs): + """ + Write an xarray dataset to a NetCDF file, possibly converting to CDF5 + format + """ + + if convert_to_cdf5: + name, ext = os.path.splitext(filename) + write_filename = f'{name}_before_cdf5{ext}' + else: + write_filename = filename + + write_netcdf(ds, write_filename, **kwargs) + + if convert_to_cdf5: + args = ['ncks', '-O', '-5', write_filename, filename] + subprocess.check_call(args) diff --git a/compass/ocean/tests/global_ocean/mesh/rrs6to18/rrs6to18.cfg b/compass/ocean/tests/global_ocean/mesh/rrs6to18/rrs6to18.cfg index c0278b3c0a..8d1a99e93d 100644 --- a/compass/ocean/tests/global_ocean/mesh/rrs6to18/rrs6to18.cfg +++ b/compass/ocean/tests/global_ocean/mesh/rrs6to18/rrs6to18.cfg @@ -95,3 +95,10 @@ engine = netcdf4 max_cells_per_core = 30000 # We're seeing gpmetis failures for more than 750,000 tasks so we'll stay under min_cells_per_core = 6 + +# whether to convert to CDF5 format +convert_to_cdf5 = True + +# resources for remapping SSS +remap_sss_ntasks = 4096 +remap_sss_min_tasks = 2048