Skip to content

Commit 71ea7cf

Browse files
Merge pull request #943 from trhille/make_major_data_interpolation_optional
Add optional data interpolation for all meshes. Add data interpolation option for all meshes. Interpolate data from high-resolution BedMachine and MEaSUREs data sets if interpolate_data = True and data paths are provided in .cfg file. Otherwise interpolation will be skipped. To preserve previous behavior, interpolation will be performed by default for antarctica and greenland, but not for regional meshes. This includes a significant refactor to reduce redundant code between all the different mesh_gen cases.
2 parents b9f4969 + 0905c41 commit 71ea7cf

35 files changed

Lines changed: 909 additions & 112 deletions

File tree

compass/landice/mesh.py

Lines changed: 270 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import re
33
import sys
44
import time
5+
import uuid
56
from shutil import copyfile
67

78
import jigsawpy
@@ -14,6 +15,7 @@
1415
from mpas_tools.mesh.conversion import convert, cull
1516
from mpas_tools.mesh.creation import build_planar_mesh
1617
from mpas_tools.mesh.creation.sort_mesh import sort_mesh
18+
from mpas_tools.scrip.from_mpas import scrip_from_mpas
1719
from netCDF4 import Dataset
1820
from scipy.interpolate import NearestNDInterpolator, interpn
1921

@@ -636,13 +638,10 @@ def build_cell_width(self, section_name, gridded_dataset,
636638

637639
f.close()
638640

639-
# Get bounds defined by user, or use bound of gridded dataset
640-
bnds = [np.min(x1), np.max(x1), np.min(y1), np.max(y1)]
641-
bnds_options = ['x_min', 'x_max', 'y_min', 'y_max']
642-
for index, option in enumerate(bnds_options):
643-
bnd = section.get(option)
644-
if bnd != 'None':
645-
bnds[index] = float(bnd)
641+
# Get bounds defined by user, or use bounds from the gridded dataset.
642+
bnds = get_mesh_config_bounding_box(
643+
section,
644+
default_bounds=[np.min(x1), np.max(x1), np.min(y1), np.max(y1)])
646645

647646
geom_points, geom_edges = set_rectangular_geom_points_and_edges(*bnds)
648647

@@ -1191,3 +1190,267 @@ def clean_up_after_interp(fname):
11911190
data.variables['observedSurfaceVelocityUncertainty'][:] == 0.0)
11921191
data.variables['observedSurfaceVelocityUncertainty'][0, mask[0, :]] = 1.0
11931192
data.close()
1193+
1194+
1195+
def get_optional_interp_datasets(section, logger):
1196+
"""
1197+
Determine whether optional interpolation inputs are configured.
1198+
1199+
Parameters
1200+
----------
1201+
section : configparser.SectionProxy
1202+
Config section containing optional interpolation options
1203+
1204+
logger : logging.Logger
1205+
Logger for status messages
1206+
1207+
Returns
1208+
-------
1209+
bedmachine_dataset : str or None
1210+
Path to BedMachine dataset if configured, otherwise ``None``
1211+
1212+
measures_dataset : str or None
1213+
Path to MEaSUREs dataset if configured, otherwise ``None``
1214+
"""
1215+
1216+
def _specified(value):
1217+
return value is not None and str(value).strip().lower() not in [
1218+
'', 'none']
1219+
1220+
data_path = section.get('data_path', fallback=None)
1221+
bedmachine_filename = section.get('bedmachine_filename', fallback=None)
1222+
measures_filename = section.get('measures_filename', fallback=None)
1223+
1224+
use_bedmachine_interp = _specified(data_path) and \
1225+
_specified(bedmachine_filename)
1226+
use_measures_interp = _specified(data_path) and \
1227+
_specified(measures_filename)
1228+
1229+
if use_bedmachine_interp:
1230+
bedmachine_dataset = os.path.join(data_path, bedmachine_filename)
1231+
else:
1232+
bedmachine_dataset = None
1233+
logger.info('Skipping BedMachine interpolation because '
1234+
'`data_path` and/or `bedmachine_filename` are '
1235+
'not specified in config.')
1236+
1237+
if use_measures_interp:
1238+
measures_dataset = os.path.join(data_path, measures_filename)
1239+
else:
1240+
measures_dataset = None
1241+
logger.info('Skipping MEaSUREs interpolation because '
1242+
'`data_path` and/or `measures_filename` are '
1243+
'not specified in config.')
1244+
1245+
return bedmachine_dataset, measures_dataset
1246+
1247+
1248+
def get_mesh_config_bounding_box(section, default_bounds=None):
1249+
"""
1250+
Get bounding-box coordinates from a mesh config section.
1251+
1252+
Parameters
1253+
----------
1254+
section : configparser.SectionProxy
1255+
Mesh config section containing ``x_min``, ``x_max``, ``y_min``,
1256+
and ``y_max``
1257+
1258+
default_bounds : list of float, optional
1259+
Default bounds in the form ``[x_min, x_max, y_min, y_max]`` to use
1260+
when config values are missing or set to ``None``
1261+
1262+
Returns
1263+
-------
1264+
bounding_box : list of float
1265+
Bounding box in the form ``[x_min, x_max, y_min, y_max]``
1266+
"""
1267+
1268+
if default_bounds is None:
1269+
default_bounds = [None, None, None, None]
1270+
1271+
def _get_bound(option, default):
1272+
value = section.get(option, fallback=None)
1273+
if value is None or str(value).strip().lower() in ['', 'none']:
1274+
if default is None:
1275+
raise ValueError(
1276+
f'Missing required config option `{option}` and no '
1277+
'default was provided.')
1278+
return float(default)
1279+
return float(value)
1280+
1281+
return [
1282+
_get_bound('x_min', default_bounds[0]),
1283+
_get_bound('x_max', default_bounds[1]),
1284+
_get_bound('y_min', default_bounds[2]),
1285+
_get_bound('y_max', default_bounds[3])]
1286+
1287+
1288+
def subset_gridded_dataset_to_bounds(
1289+
source_dataset, bounding_box, subset_tag, logger):
1290+
"""
1291+
Subset a gridded source dataset to a bounding box.
1292+
1293+
Parameters
1294+
----------
1295+
source_dataset : str
1296+
Path to source gridded dataset
1297+
1298+
bounding_box : list of float
1299+
Bounding box in the form ``[x_min, x_max, y_min, y_max]``
1300+
1301+
subset_tag : str
1302+
Tag to include in the subset filename
1303+
1304+
logger : logging.Logger
1305+
Logger for status messages
1306+
1307+
Returns
1308+
-------
1309+
subset_dataset : str
1310+
Path to subsetted gridded dataset written to the current directory
1311+
"""
1312+
1313+
x_min, x_max, y_min, y_max = bounding_box
1314+
ds = xarray.open_dataset(source_dataset)
1315+
1316+
if 'x1' in ds and 'y1' in ds:
1317+
x_name = 'x1'
1318+
y_name = 'y1'
1319+
elif 'x' in ds and 'y' in ds:
1320+
x_name = 'x'
1321+
y_name = 'y'
1322+
else:
1323+
ds.close()
1324+
raise ValueError(
1325+
f'Could not find x/y coordinates in {source_dataset}. '
1326+
'Expected either x1/y1 or x/y.')
1327+
1328+
subset = ds.where(
1329+
(ds[x_name] >= x_min) & (ds[x_name] <= x_max) &
1330+
(ds[y_name] >= y_min) & (ds[y_name] <= y_max),
1331+
drop=True)
1332+
1333+
# Check for empty subset, handling possible mismatch
1334+
# between variable and dimension names
1335+
x_dim = x_name if x_name in subset.sizes else (
1336+
'x' if 'x' in subset.sizes else None)
1337+
y_dim = y_name if y_name in subset.sizes else (
1338+
'y' if 'y' in subset.sizes else None)
1339+
if x_dim is None or y_dim is None or subset.sizes[x_dim] == 0 or subset.sizes[y_dim] == 0: # noqa
1340+
subset.close()
1341+
ds.close()
1342+
raise ValueError(
1343+
f'Bounding box {bounding_box} produced an empty subset for '
1344+
f'{source_dataset}. Dimension names in subset: '
1345+
f'{list(subset.sizes.keys())}')
1346+
1347+
base = os.path.splitext(os.path.basename(source_dataset))[0]
1348+
unique_id = uuid.uuid4().hex
1349+
subset_dataset = f'{base}_{subset_tag}_{unique_id}_subset.nc'
1350+
logger.info(f'Writing subset dataset: {subset_dataset}')
1351+
subset.to_netcdf(subset_dataset)
1352+
1353+
subset.close()
1354+
ds.close()
1355+
return subset_dataset
1356+
1357+
1358+
def run_optional_interpolation(
1359+
self, mesh_filename, src_proj, parallel_executable, nProcs,
1360+
bedmachine_dataset=None, measures_dataset=None, subset_bounds=None):
1361+
"""
1362+
Run optional interpolation from high-resolution BedMachine and MEaSUREs
1363+
datasets and perform some necessary cleanup. This can require many
1364+
more resources than the rest of the mesh generation process, so it is
1365+
usually desirable to skip this step when prototyping meshes. This step
1366+
is only run if `interpolate_data` is set to True in the config file
1367+
and the necessary dataset paths are provided.
1368+
1369+
Parameters
1370+
----------
1371+
self : compass.step.Step
1372+
Step instance providing logger and context
1373+
1374+
mesh_filename : str
1375+
Destination MALI mesh file to interpolate to
1376+
1377+
src_proj : str
1378+
Source dataset projection for SCRIP generation
1379+
1380+
parallel_executable : str
1381+
Parallel launcher executable (e.g. ``srun``/``mpirun``)
1382+
1383+
nProcs : int or str
1384+
Number of processes for regridding weight generation
1385+
1386+
bedmachine_dataset : str, optional
1387+
BedMachine dataset path; if ``None`` this interpolation is skipped
1388+
1389+
measures_dataset : str, optional
1390+
MEaSUREs dataset path; if ``None`` this interpolation is skipped
1391+
1392+
subset_bounds : list of float, optional
1393+
Optional source-dataset subset bounds in the form
1394+
``[x_min, x_max, y_min, y_max]``. If provided, BedMachine and
1395+
MEaSUREs datasets are subsetted before SCRIP generation and
1396+
interpolation.
1397+
"""
1398+
1399+
logger = self.logger
1400+
do_optional_interp = bedmachine_dataset is not None or \
1401+
measures_dataset is not None
1402+
if not do_optional_interp:
1403+
return
1404+
1405+
if nProcs is None:
1406+
raise ValueError("nProcs must be provided as an int or str")
1407+
nProcs = str(nProcs)
1408+
1409+
subset_files = []
1410+
1411+
try:
1412+
if subset_bounds is not None:
1413+
if bedmachine_dataset is not None:
1414+
bedmachine_dataset = subset_gridded_dataset_to_bounds(
1415+
bedmachine_dataset,
1416+
subset_bounds,
1417+
'bedmachine',
1418+
logger)
1419+
subset_files.append(bedmachine_dataset)
1420+
if measures_dataset is not None:
1421+
measures_dataset = subset_gridded_dataset_to_bounds(
1422+
measures_dataset,
1423+
subset_bounds,
1424+
'measures',
1425+
logger)
1426+
subset_files.append(measures_dataset)
1427+
1428+
logger.info('creating scrip file for destination mesh')
1429+
mesh_base = os.path.splitext(mesh_filename)[0]
1430+
dst_scrip_file = f'{mesh_base}_scrip.nc'
1431+
scrip_from_mpas(mesh_filename, dst_scrip_file)
1432+
1433+
if bedmachine_dataset is not None:
1434+
interp_gridded2mali(self, bedmachine_dataset, dst_scrip_file,
1435+
parallel_executable, nProcs,
1436+
mesh_filename, src_proj, variables='all')
1437+
1438+
if measures_dataset is not None:
1439+
measures_vars = ['observedSurfaceVelocityX',
1440+
'observedSurfaceVelocityY',
1441+
'observedSurfaceVelocityUncertainty']
1442+
interp_gridded2mali(self, measures_dataset, dst_scrip_file,
1443+
parallel_executable, nProcs,
1444+
mesh_filename, src_proj,
1445+
variables=measures_vars)
1446+
1447+
clean_up_after_interp(mesh_filename)
1448+
finally:
1449+
for subset_file in subset_files:
1450+
if os.path.exists(subset_file):
1451+
logger.info(f'Removing subset dataset: {subset_file}')
1452+
try:
1453+
os.remove(subset_file)
1454+
except OSError as exc:
1455+
logger.warning('Could not remove subset dataset '
1456+
f'{subset_file}: {exc}')

0 commit comments

Comments
 (0)