diff --git a/examples/storm-surge/ike/test_ike.py b/examples/storm-surge/ike/test_ike.py index 55b91538f..0d5d9d6cf 100644 --- a/examples/storm-surge/ike/test_ike.py +++ b/examples/storm-surge/ike/test_ike.py @@ -17,7 +17,7 @@ ), pytest.param( {"num_cells": [29 * 4, 24 * 4], - "amr_levels_max": 6, + "amr_levels_max": 4, "num_output_times": 16}, id="fine", marks=pytest.mark.slow, diff --git a/examples/storm-surge/isaac/Makefile b/examples/storm-surge/isaac/Makefile index cd46dc5b3..707c82690 100644 --- a/examples/storm-surge/isaac/Makefile +++ b/examples/storm-surge/isaac/Makefile @@ -24,7 +24,7 @@ PLOTDIR = _plots # Directory for plots # FFLAGS += -DNETCDF -I$(NETCDF4_DIR)/include -L$(NETCDF4_DIR)/lib # LFLAGS += -I$(NETCDF4_DIR)/include -L$(NETCDF4_DIR)/lib -lnetcdff FFLAGS += -DNETCDF $(NETCDF_FFLAGS) -LFLAGS += $(NETCDF_LFLAGS) +LFLAGS += $(FFLAGS) $(NETCDF_LFLAGS) # --------------------------------- # package sources for this program: diff --git a/examples/storm-surge/isaac/setplot.py b/examples/storm-surge/isaac/setplot.py index 8fe715b15..29c5872ba 100644 --- a/examples/storm-surge/isaac/setplot.py +++ b/examples/storm-surge/isaac/setplot.py @@ -13,6 +13,7 @@ import clawpack.amrclaw.data as amrclaw import clawpack.geoclaw.data as geodata import clawpack.geoclaw.util as geoutil +import clawpack.geoclaw.surge.storm as stormtools import clawpack.geoclaw.surge.plot as surgeplot try: @@ -42,7 +43,8 @@ def setplot(plotdata=None): friction_data = geodata.FrictionData() friction_data.read(os.path.join(plotdata.outdir, 'friction.data')) - # Load storm track + # Load storm data + storm = stormtools.Storm(surge_data.storm_file, file_format="data") track = surgeplot.track_data(os.path.join(plotdata.outdir, 'fort.track')) # Set afteraxes function @@ -173,17 +175,19 @@ def friction_after_axes(cd): # ======================================================================== # Figures for gauges # ======================================================================== + # Map GeoClaw gauge number to NOAA gauge number and location/name + gauge_mapping = {1: [8761724, "Grand Isle, LA"], + 2: [8760922, 'Pilots Station East, SW Pass, LA']} + + # Storm Timepoints + landfall_time = np.datetime64(datetime.datetime(2012, 8, 29, 0)) + begin_date = datetime.datetime(2012, 8, 27) + end_date = datetime.datetime(2012, 8, 31) + def plot_observed(current_data): """Fetch and plot gauge data for gauges used.""" - # Map GeoClaw gauge number to NOAA gauge number and location/name - gauge_mapping = {1: [8761724, "Grand Isle, LA"], - 2: [8760922, 'Pilots Station East, SW Pass, LA']} - station_id, station_name = gauge_mapping[current_data.gaugesoln.id] - landfall_time = np.datetime64(datetime.datetime(2012, 8, 29, 0)) - begin_date = datetime.datetime(2012, 8, 27) - end_date = datetime.datetime(2012, 8, 31) # Fetch data if needed date_time, water_level, tide = geoutil.fetch_noaa_tide_data(station_id, @@ -227,6 +231,49 @@ def plot_observed(current_data): plotitem = plotaxes.new_plotitem(plot_type='1d_plot') plotitem.plot_var = surgeplot.gauge_dry_regions plotitem.kwargs = {"color":'lightcoral', "linewidth":5} + + + # Plot wind and pressure at each gauge + def storm_gauge_afteraxes(cd, storm): + # Map GeoClaw gauge number to NOAA gauge number and location/name + station_id, station_name = gauge_mapping[cd.gaugesoln.id] + + # Convert to seconds relative to landfall + # t = (cd.gaugesoln.t - landfall_time) / np.timedelta64(1, 's') + t = cd.gaugesoln.t + t /= (24 * 60**2) + + w_ax = plt.gca() + color ="tab:blue" + w_ax.plot(t, surgeplot.gauge_wind(cd), label="wind", color=color) + w_ax.set_ylabel("Wind (m/s)", color=color) + w_ax.set_ylim(wind_limits) + w_ax.tick_params(axis='y', labelcolor=color) + + P_ax = w_ax.twinx() + color = "tab:red" + P_ax.set_ylabel("Pressure (mbar)", color=color) + P_ax.plot(t, surgeplot.gauge_pressure(cd) * 1e-2, label="pressure", color=color) + P_ax.set_ylim(pressure_limits) + P_ax.tick_params(axis='y', labelcolor=color) + + w_ax.set_title(f"Storm Fileds at {station_name}") + + plt.gcf().tight_layout() + + plotfigure = plotdata.new_plotfigure(name='Gauge Wind', figno=301, + type='each_gauge') + plotfigure.show = True + plotfigure.clf_each_gauge = True + plotfigure.kwargs = {"figsize": [6.4 * 1.2, + 4.8 * 1.0]} + + plotaxes = plotfigure.new_plotaxes() + plotaxes.time_scale = 1 / (24 * 60**2) + plotaxes.grid = True + plotaxes.xlimits = [-2, 1.5] + plotaxes.time_label = "Days relative to 2012-12-26 00:00 UTC" + plotaxes.afteraxes = lambda cd: storm_gauge_afteraxes(cd, storm) # Gauge Location Plot def gauge_location_afteraxes(cd): diff --git a/examples/storm-surge/isaac/setrun.py b/examples/storm-surge/isaac/setrun.py index 11d8ed79f..632bc94ae 100644 --- a/examples/storm-surge/isaac/setrun.py +++ b/examples/storm-surge/isaac/setrun.py @@ -322,6 +322,9 @@ def setrun(claw_pkg='geoclaw'): rundata.gaugedata.gauges.append([2, -89.95826667, 29.26460167, rundata.clawdata.t0, rundata.clawdata.tfinal]) + + # Record the wind and pressure fields at the gauges + rundata.gaugedata.aux_out_fields = [4, 5, 6] # ------------------------------------------------------------------ # GeoClaw specific parameters: @@ -409,34 +412,6 @@ def setgeo(rundata): data.storm_specification_type = 'holland80' # data.storm_specification_type = 'data' data.storm_file = (Path() / 'isaac.storm').resolve() - - isaac = Storm() - atcf_path = (Path(__file__).parent / "bal092012.dat").resolve() - isaac.read(path=atcf_path, file_format="ATCF") - # Calculate landfall time - Need to specify as the file above does not - # include this info (~2345 UTC - 6:45 p.m. CDT - on August 28) - isaac.time_offset = np.datetime64("2012-08-29T00:00") - - if data.storm_specification_type == 'holland80': - isaac.write(data.storm_file, file_format='geoclaw') - - elif data.storm_specification_type == "data": - # ASCII - isaac.file_format = 'NWS12' - isaac.file_paths = [] - isaac.file_paths.append((Path() / "isaac.PRE").resolve()) - isaac.file_paths.append((Path() / "isaac.WIN").resolve()) - - # NetCDF - # isaac.file_format = "NWS13" - # isaac.file_paths.append((Path() / "isaac.nc").resolve()) - # # For NetCDF we need to set this to the epoch or things do not work yet - # isaac.time_offset = np.datetime64("1970-01-01") - - isaac.write(data.storm_file, file_format='data') - - else: - raise ValueError("Invalid storm specification type.") # ======================= # Set Variable Friction @@ -463,6 +438,49 @@ def setgeo(rundata): # ---------------------- +def write_storm_file(rundata): + """Write the storm file configured in rundata.surge_data. + + Separated from setgeo so that importing/calling setrun() does not write + files as a side effect (important for test runners that call setrun() and + then substitute their own storm file). + """ + data = rundata.surge_data + + isaac = Storm() + atcf_path = (Path(__file__).parent / "bal092012.dat").resolve() + isaac.read(path=atcf_path, file_format="ATCF") + # Calculate landfall time - Need to specify as the file above does not + # include this info (~2345 UTC - 6:45 p.m. CDT - on August 28) + isaac.time_offset = np.datetime64("2012-08-29T00:00") + + if data.storm_specification_type == 'holland80': + isaac.write(data.storm_file, file_format='geoclaw') + + elif data.storm_specification_type == "data": + # ASCII + isaac.file_format = 'NWS12' + isaac.file_paths = [ + (Path(__file__).parent / "isaac.PRE").resolve(), + (Path(__file__).parent / "isaac.WIN").resolve(), + ] + + # NetCDF + # isaac.file_format = "NWS13" + # isaac.file_paths.append((Path() / "isaac.nc").resolve()) + # # For NetCDF we need to set this to the epoch or things do not work yet + # isaac.time_offset = np.datetime64("1970-01-01") + + # Stretch (>1) or compress (<1) the storm time axis relative to the + # first time step. 1.0 = no change; 2.0 = storm moves twice as slow. + isaac.storm_time_scale = 1.0 + + isaac.write(data.storm_file, file_format='data') + + else: + raise ValueError("Invalid storm specification type.") + + if __name__ == '__main__': # Set up run-time parameters and write all data files. import sys @@ -471,4 +489,5 @@ def setgeo(rundata): else: rundata = setrun() + write_storm_file(rundata) rundata.write() diff --git a/examples/storm-surge/isaac/test_isaac.py b/examples/storm-surge/isaac/test_isaac.py index acf2989ec..d3e38b88b 100644 --- a/examples/storm-surge/isaac/test_isaac.py +++ b/examples/storm-surge/isaac/test_isaac.py @@ -2,6 +2,8 @@ from __future__ import annotations +import re +import sys from pathlib import Path from datetime import datetime import pytest @@ -11,6 +13,20 @@ import clawpack.geoclaw.test as test from clawpack.geoclaw.surge.storm import Storm +# --------------------------------------------------------------------------- +# Import shared NetCDF file generators from tests/test_storm.py. +# No Python package __init__.py exists in tests/, so we insert the directory +# into sys.path rather than using a standard package import. +# --------------------------------------------------------------------------- +_tests_dir = Path(__file__).parents[3] / "tests" +if str(_tests_dir) not in sys.path: + sys.path.insert(0, str(_tests_dir)) +try: + from test_storm import create_era5_storm_file, create_nws13_storm_file +except ImportError: + create_era5_storm_file = None + create_nws13_storm_file = None + # Generic helper functions def days2seconds(t): """Convert days to seconds.""" @@ -108,6 +124,352 @@ def write_owi_wind( _write_owi_values(fobj, v_field) +def _read_owi_all_timesteps(pre_path: Path, win_path: Path): + """Read all time steps from committed OWI PRE and WIN files. + + There is no Python OWI reader in the GeoClaw codebase (the Fortran + ``read_OWI_ASCII`` routine in ``data_storm_module.f90`` is the only + reader). This function uses minimal line-by-line parsing of the + fixed-format OWI ASCII files. + + The OWI format stores data with longitude varying fastest (inner loop = + longitude). The flat data array for one time step is reshaped to + ``(nlat, nlon)`` in standard numpy ``[lat, lon]`` indexing. + + Returns + ------- + pressure_mb : ndarray, shape (nt, nlat, nlon) + Sea-level pressure in mb (millibar). + u_wind : ndarray, shape (nt, nlat, nlon) + Eastward wind speed in m/s. + v_wind : ndarray, shape (nt, nlat, nlon) + Northward wind speed in m/s. + lon : ndarray, shape (nlon,) + Longitude grid in degrees. First value = SWLon + DX (matching the + 1-based grid indexing used by the Fortran OWI reader). + lat : ndarray, shape (nlat,) + Latitude grid in degrees. First value = SWLat + DY. + times : list of datetime + Datetime objects for each time step (parsed from DT= field). + """ + + def _parse_snapshot_header(line): + """Extract grid parameters and datetime from an OWI snapshot header.""" + nlat = int(re.search(r'iLat=\s*(\d+)', line).group(1)) + nlon = int(re.search(r'iLong=\s*(\d+)', line).group(1)) + # OWI snapshot headers are fixed-width with NO spaces between fields, + # e.g. "DX=0.2500DY=0.2500SWLat=...". Use digit/decimal-only patterns + # so the match stops at the next field keyword, not at whitespace. + dx = float(re.search(r'DX=([\d.]+)', line).group(1)) + dy = float(re.search(r'DY=([\d.]+)', line).group(1)) + swlat = float(re.search(r'SWLat=\s*([-\d.]+)', line).group(1)) + swlon = float(re.search(r'SWLon=\s*([-\d.]+)', line).group(1)) + dt_str = re.search(r'DT=(\d{12})', line).group(1) + dt = datetime.strptime(dt_str, '%Y%m%d%H%M') + return nlat, nlon, dx, dy, swlat, swlon, dt + + def _read_n_values(fobj, n): + """Read exactly n whitespace-separated floats from an OWI file.""" + vals = [] + while len(vals) < n: + vals.extend(float(x) for x in fobj.readline().split()) + return np.array(vals[:n]) + + # --- Read pressure (all time steps) ------------------------------------ + # First pass: determine grid dimensions from the first snapshot header. + with open(pre_path) as f: + f.readline() # master header + header = f.readline() + nlat, nlon, dx, dy, swlat, swlon, _ = _parse_snapshot_header(header) + n_per_step = nlat * nlon + + # The Fortran OWI reader computes coordinates as 1-based: + # longitude(j) = swlon + j * dx, j = 1..mx + # so the first grid point is swlon+dx, not swlon. + lon = swlon + np.arange(1, nlon + 1) * dx + lat = swlat + np.arange(1, nlat + 1) * dy + + pre_timesteps = [] + times = [] + with open(pre_path) as f: + f.readline() # master header + while True: + header = f.readline() + if not header.strip(): + break + _, _, _, _, _, _, dt = _parse_snapshot_header(header) + times.append(dt) + flat = _read_n_values(f, n_per_step) + # OWI: lon varies fastest -> reshape as (nlat, nlon) + pre_timesteps.append(flat.reshape(nlat, nlon)) + + # --- Read wind (all time steps) ---------------------------------------- + u_timesteps = [] + v_timesteps = [] + with open(win_path) as f: + f.readline() # master header + for _ in times: + f.readline() # snapshot header + flat_u = _read_n_values(f, n_per_step) + flat_v = _read_n_values(f, n_per_step) + u_timesteps.append(flat_u.reshape(nlat, nlon)) + v_timesteps.append(flat_v.reshape(nlat, nlon)) + + pressure_mb = np.stack(pre_timesteps, axis=0) # (nt, nlat, nlon) + u_wind = np.stack(u_timesteps, axis=0) + v_wind = np.stack(v_timesteps, axis=0) + + return pressure_mb, u_wind, v_wind, lon, lat, times + + +def _make_isaac_netcdf(fmt: str, tmp_path: Path, test_path: Path) -> Path: + """Generate a NetCDF met-forcing file from the committed OWI Isaac files. + + Reads all time steps from ``isaac.PRE`` and ``isaac.WIN`` using minimal + OWI line-parsing (no Python OWI reader exists in the codebase; flagged + above in ``_read_owi_all_timesteps``), converts units, writes a NetCDF + file to ``tmp_path``, and writes the corresponding ``.storm`` descriptor + via ``Storm.write(file_format='data')``. + + Both ERA5 and NWS13 variants write pressure in **Pa** (as required by the + Fortran NetCDF reader, which has no unit conversion). The ERA5 variant + also stores wind in m/s; the NWS13 file uses the same values but with + NWS13 variable/dimension names (``uwnd``, ``vwnd``, ``press``). + + Parameters + ---------- + fmt : str + One of ``"netcdf_era5"`` or ``"netcdf_nws13"``. + tmp_path : Path + Directory for generated files. + test_path : Path + Path to the Isaac example directory (for finding committed OWI files). + + Returns + ------- + storm_path : Path + Path to the written ``.storm`` descriptor. + """ + if create_era5_storm_file is None or create_nws13_storm_file is None: + pytest.skip("NetCDF file generators not importable from test_storm.py") + + pytest.importorskip("netCDF4") + + pre_path = test_path / "isaac.PRE" + win_path = test_path / "isaac.WIN" + + pressure_mb, u_wind, v_wind, lon, lat, times = _read_owi_all_timesteps( + pre_path, win_path + ) + + # Convert OWI mb pressure to Pa (the Fortran OWI reader multiplies by + # 100; the NetCDF reader reads raw values, so we must pre-convert). + pressure_pa = pressure_mb * 100.0 + + # Convert datetime list to integer UNIX seconds (Fortran uses UNIX epoch + # 1970-01-01 via seconds_from_epoch in utility_module.f90). + unix_epoch = np.datetime64("1970-01-01T00:00:00", "s") + times_unix = np.array( + [ + int((np.datetime64(dt) - unix_epoch) / np.timedelta64(1, "s")) + for dt in times + ], + dtype=np.int64, + ) + + # lon/lat already adjusted for 1-based OWI grid offset in + # _read_owi_all_timesteps. + + time_offset = np.datetime64("2012-08-29T00:00:00") + + isaac = Storm() + isaac.time_offset = time_offset + + if fmt == "netcdf_era5": + nc_path = tmp_path / "isaac_era5.nc" + # create_era5_storm_file uses "seconds since 2012-08-29" as the time + # epoch (see time_epoch_str in test_storm.py). Convert Unix seconds + # to seconds relative to that epoch so xarray decodes them correctly + # and MetInterrogator computes the right nc_time_offset. + time_offset_s = int( + (time_offset - unix_epoch) / np.timedelta64(1, "s") + ) + create_era5_storm_file( + nc_path, + pressure_fields=pressure_pa, + u_fields=u_wind, + v_fields=v_wind, + times=times_unix - time_offset_s, + lon=lon, + lat=lat, + ) + isaac.file_format = "netcdf" + isaac.file_paths = [nc_path] + storm_path = tmp_path / "isaac_era5.storm" + isaac.write( + storm_path, + file_format="data", + dim_mapping={"t": "valid_time"}, + ) + + elif fmt == "netcdf_nws13": + nc_path = tmp_path / "isaac_nws13.nc" + # NWS13 file: same Pa pressure (Fortran no-conversion path), + # NWS13 variable names. The "mb" units attribute in + # create_nws13_storm_file is overridden by passing Pa values; + # the Fortran uses only the variable name, not the units attribute. + create_nws13_storm_file( + nc_path, + pressure_fields=pressure_pa, + u_fields=u_wind, + v_fields=v_wind, + times=times_unix, + lon=lon, + lat=lat, + ) + isaac.file_format = "nws13" + isaac.file_paths = [nc_path] + storm_path = tmp_path / "isaac_nws13.storm" + isaac.write( + storm_path, + file_format="data", + var_mapping={ + "wind_u": "uwnd", + "wind_v": "vwnd", + "pressure": "press", + }, + ) + + else: + raise ValueError(f"Unknown fmt={fmt!r}") + + return storm_path + + +def _check_netcdf_storm_descriptor(generated_path: Path, fmt: str) -> None: + """Validate structure of a NetCDF data-storm descriptor file. + + Checks that ``generated_path`` contains the correct format number (2), + the expected coordinate names in the ``&file_info`` block, the expected + variable/role pairs in ``&variable_info`` blocks, and exactly one file + path entry. + + This is a structural validation against known-good expected values for + each format, not a comparison against a committed regression file (which + would contain machine-specific absolute paths). + + Parameters + ---------- + generated_path : Path + Path to the ``.storm`` descriptor written by ``Storm.write``. + fmt : str + One of ``"netcdf_era5"`` or ``"netcdf_nws13"``. + """ + text = generated_path.read_text() + lines = [ln.rstrip() for ln in text.splitlines()] + + # Locate the format line (contains the integer file format number). + format_line = next( + ln for ln in lines if ln.startswith("2 ") or ln.strip().startswith("2") + and "# File format" in ln + ) + assert "2" in format_line, ( + f"Expected file format 2 in descriptor, got: {format_line!r}" + ) + + # Locate the "# Format Data Information" section. + try: + fmt_info_idx = next( + i for i, ln in enumerate(lines) + if "# Format Data Information" in ln + ) + except StopIteration: + raise AssertionError( + f"'# Format Data Information' section not found in {generated_path}" + ) + + # Parse &file_info block for coordinate names. + file_info: dict = {} + in_file_info = False + for ln in lines[fmt_info_idx:]: + stripped = ln.strip() + if stripped == "&file_info": + in_file_info = True + continue + if in_file_info: + if stripped == "/": + break + if "=" in stripped: + key, _, val = stripped.partition("=") + file_info[key.strip()] = val.strip() + + # Parse &variable_info blocks for role→var_name mapping. + var_info: dict = {} + for ln in lines[fmt_info_idx:]: + stripped = ln.strip() + if stripped.startswith("&variable_info"): + kv = dict(re.findall(r'(\w+)=(\S+)', stripped)) + if "geoclaw_role" in kv and "var_name" in kv: + var_info[kv["geoclaw_role"]] = kv["var_name"].rstrip("/").strip() + + if fmt == "netcdf_era5": + assert file_info.get("lon_name") == "longitude", ( + f"ERA5 lon_name wrong: {file_info.get('lon_name')!r}" + ) + assert file_info.get("lat_name") == "latitude", ( + f"ERA5 lat_name wrong: {file_info.get('lat_name')!r}" + ) + assert file_info.get("time_name") == "valid_time", ( + f"ERA5 time_name wrong: {file_info.get('time_name')!r}" + ) + assert var_info.get("wind_u") == "u10", ( + f"ERA5 wind_u var wrong: {var_info.get('wind_u')!r}" + ) + assert var_info.get("wind_v") == "v10", ( + f"ERA5 wind_v var wrong: {var_info.get('wind_v')!r}" + ) + assert var_info.get("pressure") == "msl", ( + f"ERA5 pressure var wrong: {var_info.get('pressure')!r}" + ) + elif fmt == "netcdf_nws13": + assert file_info.get("lon_name") == "lon", ( + f"NWS13 lon_name wrong: {file_info.get('lon_name')!r}" + ) + assert file_info.get("lat_name") == "lat", ( + f"NWS13 lat_name wrong: {file_info.get('lat_name')!r}" + ) + assert file_info.get("time_name") == "time", ( + f"NWS13 time_name wrong: {file_info.get('time_name')!r}" + ) + assert var_info.get("wind_u") == "uwnd", ( + f"NWS13 wind_u var wrong: {var_info.get('wind_u')!r}" + ) + assert var_info.get("wind_v") == "vwnd", ( + f"NWS13 wind_v var wrong: {var_info.get('wind_v')!r}" + ) + assert var_info.get("pressure") == "press", ( + f"NWS13 pressure var wrong: {var_info.get('pressure')!r}" + ) + else: + raise ValueError(f"Unknown fmt={fmt!r}") + + # There should be exactly one file path entry. + try: + paths_idx = next( + i for i, ln in enumerate(lines) if "# File paths" in ln + ) + except StopIteration: + raise AssertionError( + f"'# File paths' section not found in {generated_path}" + ) + path_entries = [ + ln for ln in lines[paths_idx + 1:] if ln.strip() + ] + assert len(path_entries) == 1, ( + f"Expected 1 file path, got {len(path_entries)}: {path_entries}" + ) + + def _check_geoclaw_storm_descriptor(generated_path: Path, regression_path: Path) -> None: """Compare two GeoClaw storm files semantically using Storm readers.""" generated = Storm(path=generated_path, file_format="geoclaw") @@ -139,25 +501,60 @@ def _check_data_storm_descriptor(generated_path: Path, regression_path: Path) -> ] -# @pytest.mark.slow - @pytest.mark.regression @pytest.mark.storm @pytest.mark.parametrize( "data_file_format", - ["holland80", "owi_ascii"], + [ + "holland80", + "owi_ascii", + pytest.param( + "netcdf_era5", + marks=[pytest.mark.netcdf], + ), + pytest.param( + "netcdf_nws13", + marks=[pytest.mark.netcdf], + ), + ], ) def test_isaac(tmp_path: Path, data_file_format: str, save: bool) -> None: - """Regression test for Isaac storm surge using several storm-input modes.""" + """Regression test for Isaac storm surge using several storm-input modes. + + The ``netcdf_era5`` and ``netcdf_nws13`` variants generate NetCDF met- + forcing files from the committed OWI ASCII files (``isaac.PRE`` / + ``isaac.WIN``) and verify that GeoClaw produces gauge output identical to + the ``owi_ascii`` baseline. The two variants differ only in variable and + dimension naming conventions: + + ``netcdf_era5`` + CF-1.7 ERA5-style file: dims ``valid_time / latitude / longitude``, + vars ``u10 / v10 / msl``, lon in [0, 360], lat S-to-N. + ERA5 is a common source of met forcing for GeoClaw storm surge + simulations. + + ``netcdf_nws13`` + OWI NWS13-style file: dims ``time / lat / lon``, vars + ``uwnd / vwnd / press``. Exercises the explicit ``user_mapping`` + path in ``util.get_netcdf_names`` for non-default variable names. + + Both NetCDF variants store pressure in Pa (after converting from the OWI + mb values) because the Fortran NetCDF reader has no unit conversion. + The gauge regression data from ``owi_ascii`` is reused since the + underlying forcing is identical once unit-converted. + """ runner = test.GeoClawTestRunner(tmp_path, test_path=Path(__file__).parent) runner.set_data() - # TODO: Decide on time range for Isaac test; this is currently set to match - # the Isaac test, but may need to be adjusted. + # TODO: Decide on time range for Isaac test, currently is just a half day runner.rundata.clawdata.t0 = days2seconds(-1) runner.rundata.clawdata.tfinal = days2seconds(-0.5) runner.rundata.clawdata.num_output_times = 1 + # TODO: May also want to change number of levels used, currently matches the + # example data but may be shortened for testing purposes. + runner.rundata.amrdata.amr_levels_max = 2 + surge_data = runner.rundata.surge_data surge_data.storm_file = tmp_path / "isaac.storm" @@ -166,45 +563,29 @@ def test_isaac(tmp_path: Path, data_file_format: str, save: bool) -> None: isaac = Storm(path=atcf_path, file_format="ATCF") isaac.time_offset = np.datetime64("2012-08-29") - + if data_file_format == "holland80": surge_data.storm_specification_type = "holland80" isaac.write(surge_data.storm_file, file_format="geoclaw", verbose=True) + elif data_file_format == "owi_ascii": surge_data.storm_specification_type = "data" isaac.file_format = "NWS12" - isaac.file_paths = [runner.test_path / "isaac.PRE", + isaac.file_paths = [runner.test_path / "isaac.PRE", runner.test_path / "isaac.WIN"] - isaac.write(surge_data.storm_file, file_format="data") - # TODO: Generate pressure and wind fields from the ATCF data, requires # storm field generation, which is not yet supported. - # write_owi_pressure(isaac.file_paths[0], - # pressure_fields=[isaac.pressure_field], - # times=[isaac.time_offset.astype(datetime)], - # lon=isaac.lon, - # lat=isaac.lat, - # ) - # write_owi_wind(isaac.file_paths[1], - # u_fields=[isaac.u_wind_field], - # v_fields=[isaac.v_wind_field], - # times=[isaac.time_offset.astype(datetime)], - # lon=isaac.lon, - # lat=isaac.lat, - # ) - # elif data_file_format == "owi_netcdf": - # surge_data.storm_specification_type = "data" - # isaac.data_file_format = "NWS13" - # isaac.file_paths = [tmp_path / "isaac.nc"] - # isaac.write(surge_data.storm_file, file_format="data") - # TODO: Add test for generic NetCDF formatted storms (should be a super set - # of NWS13) - # elif data_file_format == "netcdf": - # surge_data.storm_specification_type = "data" - # isaac.data_file_format ="netcdf" - # isaac.file_paths = [tmp_path / "isaac.nc"] - # isaac.write(surge_data.storm_file, file_format="data") + + elif data_file_format in ("netcdf_era5", "netcdf_nws13"): + # Generate NetCDF file + descriptor from committed OWI data. + surge_data.storm_file = _make_isaac_netcdf( + data_file_format, tmp_path, runner.test_path + ) + surge_data.storm_specification_type = "data" + # Validate descriptor structure before running. + _check_netcdf_storm_descriptor(surge_data.storm_file, data_file_format) + else: raise ValueError(f"Unsupported data_file_format={data_file_format}") @@ -219,17 +600,24 @@ def test_isaac(tmp_path: Path, data_file_format: str, save: bool) -> None: _check_geoclaw_storm_descriptor(surge_data.storm_file, regression_storm_file) elif data_file_format == "owi_ascii": _check_data_storm_descriptor(surge_data.storm_file, regression_storm_file) - else: - raise ValueError(f"Unsupported data_file_format={data_file_format}") + # For netcdf variants, structural validation already done above via + # _check_netcdf_storm_descriptor; no committed regression .storm file. # Run geoclaw runner.build_executable() runner.run_code() - # Gauge checks - may need to be format specific for interpolation - # differences - runner.check_gauge(gauge_id=1, regression_path=check_path, save=save) - runner.check_gauge(gauge_id=2, regression_path=check_path, save=save) + # Gauge checks. + # The netcdf variants use the owi_ascii regression gauge data because the + # underlying forcing (from the same committed OWI files, unit-converted) + # is identical. If the simulation results differ from the owi_ascii + # baseline, stop and investigate before generating new reference files. + if data_file_format in ("netcdf_era5", "netcdf_nws13"): + gauge_regression_path = runner.test_path / "regression_data" / "owi_ascii" + else: + gauge_regression_path = check_path + runner.check_gauge(gauge_id=1, regression_path=gauge_regression_path, save=save) + runner.check_gauge(gauge_id=2, regression_path=gauge_regression_path, save=save) if __name__ == "__main__": diff --git a/examples/tsunami/bowl-slosh-netcdf/Makefile b/examples/tsunami/bowl-slosh-netcdf/Makefile index 7d99edd32..f78a9068f 100644 --- a/examples/tsunami/bowl-slosh-netcdf/Makefile +++ b/examples/tsunami/bowl-slosh-netcdf/Makefile @@ -26,7 +26,7 @@ ifeq ($(HAVE_NETCDF), no) $(error This example requires NetCDF support, but none was found.) endif FFLAGS += -DNETCDF $(NETCDF_FFLAGS) -LFLAGS += $(NETCDF_LFLAGS) +LFLAGS += $(NETCDF_LFLAGS) $(FFLAGS) # --------------------------------- # package sources for this program: diff --git a/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py b/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py index a6a2036f1..662d210fb 100644 --- a/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py +++ b/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py @@ -1,9 +1,6 @@ """Pytest regression test for the GeoClaw bowl-slosh NetCDF example.""" from pathlib import Path -import os -import shutil -import subprocess import numpy as np import pytest @@ -11,14 +8,22 @@ import clawpack.geoclaw.test as test import clawpack.geoclaw.topotools as topotools - -def _make_bowl_netcdf_topography(output_dir: Path) -> None: +def _make_bowl_netcdf_topography(output_dir: Path, variant: str = "standard") -> None: """ Create the NetCDF topography input used by the example. - + We are directly creating the NetCDF file here rather than relying on the example's maketopo.py to test out a slightly generic NetCDF writing approach that doesn't rely on the topotools write method.™ + + Parameters + ---------- + output_dir : Path + Directory where the NetCDF file is written. + variant : str + Coordinate layout to produce. ``"standard"`` (default) preserves the + original behavior and writes ``bowl.nc``. All other variants write a + file named after the variant (see ``_VARIANT_FILENAMES``). """ netCDF4 = pytest.importorskip("netCDF4") @@ -30,32 +35,107 @@ def _make_bowl_netcdf_topography(output_dir: Path) -> None: topo.x = np.linspace(-3.1, 3.1, 310) topo.y = np.linspace(-3.5, 2.5, 300) - # Intentionally create latitude first to exercise dimension discovery. - with netCDF4.Dataset(output_dir / "bowl.nc", "w") as out: - out.createDimension("lat", len(topo.y)) - out.createDimension("lon", len(topo.x)) - - latitudes = out.createVariable("lat", "f8", ("lat",)) - longitudes = out.createVariable("lon", "f8", ("lon",)) - elevations = out.createVariable("elevation", "f8", ("lat", "lon")) - - latitudes[:] = topo.y - longitudes[:] = topo.x - elevations[:] = topo.Z + filename = "bowl.nc" + + if variant == "standard": + # Intentionally create latitude first to exercise dimension discovery. + with netCDF4.Dataset(output_dir / filename, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo.y + longitudes[:] = topo.x + elevations[:] = topo.Z + + elif variant == "dim_order": + # Data variable stored as (lon, lat) instead of (lat, lon); exercises + # dimension-order discovery. + with netCDF4.Dataset(output_dir / filename, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lon", "lat")) + + latitudes[:] = topo.y + longitudes[:] = topo.x + elevations[:] = topo.Z.T # transpose to match (lon, lat) + + elif variant == "cf_compliant": + # Standard (lat, lon) layout with full CF attributes added via + # CFNormalizer; exercises CF-attribute-based coordinate discovery. + from clawpack.geoclaw.netcdf_utils import CFNormalizer + import xarray as xr + + ds = xr.Dataset( + {"elevation": (["lat", "lon"], topo.Z)}, + coords={"lat": topo.y, "lon": topo.x}, + ) + ds_norm = CFNormalizer(ds).normalize() + ds_norm.to_netcdf( + output_dir / filename, + encoding={"elevation": {"_FillValue": 9.96921e+36}}, + ) + + elif variant == "cropped": + # Topography domain is exactly 0.5 degrees larger than the simulation + # extent on each side: x in [-2.5, 2.5], y in [-3.5, 2.5]. + # (Simulation domain after test setup: x in [-2, 2], y in [-3, 2].) + topo_crop = topotools.Topography(topo_func=topo_func) + topo_crop.x = np.linspace(-2.5, 2.5, 250) + topo_crop.y = np.linspace(-3.5, 2.5, 300) + + with netCDF4.Dataset(output_dir / filename, "w") as out: + out.createDimension("lat", len(topo_crop.y)) + out.createDimension("lon", len(topo_crop.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo_crop.y + longitudes[:] = topo_crop.x + elevations[:] = topo_crop.Z + + else: + raise ValueError(f"Unknown variant '{variant}'.") @pytest.mark.regression @pytest.mark.netcdf -def test_bowl_slosh_netcdf(tmp_path: Path, save: bool) -> None: - """Regression test for bowl-slosh using NetCDF topography input.""" +@pytest.mark.parametrize("variant", ["standard", "dim_order", "cf_compliant", "cropped"]) +def test_bowl_slosh_netcdf(tmp_path: Path, save: bool, variant: str) -> None: + """ + Parametrized regression test exercising non-standard NetCDF coordinate layouts. + + Each variant writes a topography file with a different coordinate convention + while encoding the same parabolic-bowl bathymetry as test_bowl_slosh_netcdf. + Because the physical bathymetry is identical, all variants must produce gauge + output that matches the existing reference data. + + Variants + -------- + standard Intentionally create latitude first to exercise dimension + discovery; otherwise, a pretty standard layout. + dim_order Data variable stored as (lon, lat) instead of (lat, lon); + exercises dimension-order discovery. + cf_compliant Standard (lat, lon) layout with full CF attributes added via + CFNormalizer (standard_name, axis, units, _FillValue); exercises + CF-attribute-based coordinate discovery. + cropped Topography domain is exactly 0.5 degrees larger than the + simulation extent on each side (x in [-2.5, 2.5], + y in [-3.5, 2.5]); exercises minimal-margin domain coverage. + """ pytest.importorskip("netCDF4") - example_dir = Path(__file__).parent - runner = test.GeoClawTestRunner(tmp_path, test_path=example_dir) - - # Generate topography - _make_bowl_netcdf_topography(tmp_path) + _make_bowl_netcdf_topography(tmp_path, variant=variant) + runner = test.GeoClawTestRunner(tmp_path, test_path=Path(__file__).parent) runner.set_data() runner.rundata.clawdata.lower[1] = -3.0 runner.rundata.clawdata.num_output_times = 1 diff --git a/examples/tsunami/chile2010/maketopo.py b/examples/tsunami/chile2010/maketopo.py index c77ec4dfa..8e40d6aab 100644 --- a/examples/tsunami/chile2010/maketopo.py +++ b/examples/tsunami/chile2010/maketopo.py @@ -22,14 +22,14 @@ # Scratch directory for storing topo and dtopo files: scratch_dir = os.path.join(CLAW, 'geoclaw', 'scratch') -def get_topo(makeplots=False): +def get_topo(path=scratch_dir, makeplots=False): """ Retrieve the topo file from the GeoClaw repository. """ from clawpack.geoclaw import topotools topo_fname = 'etopo10min120W60W60S0S.asc' url = 'http://depts.washington.edu/clawpack/geoclaw/topo/etopo/' + topo_fname - clawpack.clawutil.data.get_remote_file(url, output_dir=scratch_dir, + clawpack.clawutil.data.get_remote_file(url, output_dir=path, file_name=topo_fname, verbose=True) if makeplots: @@ -40,9 +40,11 @@ def get_topo(makeplots=False): plt.savefig(fname) print("Created ",fname) + return os.path.join(scratch_dir, topo_fname) + -def make_dtopo(makeplots=False): +def make_dtopo(path=scratch_dir, makeplots=False): """ Create dtopo data file for deformation of sea floor due to earthquake. Uses the Okada model with fault parameters and mesh specified below. @@ -50,7 +52,7 @@ def make_dtopo(makeplots=False): from clawpack.geoclaw import dtopotools import numpy - dtopo_fname = os.path.join(scratch_dir, "dtopo_usgs100227.tt3") + dtopo_fname = os.path.join(path, "dtopo_usgs100227.tt3") # Specify subfault parameters for this simple fault model consisting # of a single subfault: @@ -107,6 +109,8 @@ def make_dtopo(makeplots=False): plt.savefig(fname) print("Created ",fname) + return dtopo_fname + if __name__=='__main__': get_topo(False) diff --git a/examples/tsunami/chile2010/regression_data/gauge32412.txt b/examples/tsunami/chile2010/regression_data/gauge32412.txt new file mode 100644 index 000000000..2381dcaec --- /dev/null +++ b/examples/tsunami/chile2010/regression_data/gauge32412.txt @@ -0,0 +1,101 @@ +# gauge_id= 32412 location=( -0.8639200000E+02 -0.1797500000E+02 ) num_var= 4 +# Stationary gauge +# level, time, q[ 1 2 3], eta, aux[] +# file format ascii, time series follow in this file + 01 0.0000000E+00 0.4427352E+04 0.0000000E+00 0.0000000E+00 0.0000000E+00 + 01 0.2000000E+00 0.4427352E+04 -0.2390115E-14 0.2735758E-14 0.0000000E+00 + 01 0.4000000E+00 0.4427352E+04 -0.4778627E-14 0.5471769E-14 0.0000000E+00 + 01 0.6000000E+00 0.4427352E+04 -0.7165536E-14 0.8208034E-14 0.0000000E+00 + 01 0.8000000E+00 0.4427352E+04 -0.9550844E-14 0.1094455E-13 0.0000000E+00 + 01 0.1000000E+01 0.4427352E+04 -0.1193455E-13 0.1368133E-13 0.0000000E+00 + 01 0.1200000E+01 0.4427352E+04 -0.1431666E-13 0.1641835E-13 0.0000000E+00 + 01 0.1400000E+01 0.4427352E+04 -0.1669716E-13 0.1915563E-13 0.0000000E+00 + 01 0.3627168E+03 0.4427352E+04 -0.4339780E-11 0.5001933E-11 0.0000000E+00 + 01 0.7240336E+03 0.4427352E+04 -0.6298506E-11 0.1047074E-10 0.0000000E+00 + 01 0.1107879E+04 0.4427352E+04 -0.7260152E-11 0.1603970E-10 0.0000000E+00 + 01 0.1491724E+04 0.4427352E+04 -0.7763718E-11 0.2117563E-10 0.0000000E+00 + 01 0.1800000E+04 0.4427352E+04 -0.8034422E-11 0.2536037E-10 0.0000000E+00 + 01 0.2183845E+04 0.4427352E+04 0.3010818E-09 -0.2892667E-09 -0.2728484E-11 + 01 0.2567691E+04 0.4427352E+04 0.2559190E-08 -0.3083881E-08 -0.2182787E-10 + 01 0.2951536E+04 0.4427352E+04 0.3092790E-07 -0.2723332E-07 -0.2492015E-09 + 01 0.3335381E+04 0.4427352E+04 0.3073843E-07 -0.1131321E-07 0.6184564E-10 + 01 0.3600000E+04 0.4427352E+04 -0.4938416E-06 0.4885115E-06 0.5172296E-08 + 01 0.3983845E+04 0.4427352E+04 -0.4582646E-05 0.4403764E-05 0.3769401E-07 + 01 0.4367691E+04 0.4427352E+04 -0.2040033E-04 0.2044269E-04 0.1588132E-06 + 01 0.4751536E+04 0.4427352E+04 -0.4856143E-04 0.4672530E-04 0.3472078E-06 + 01 0.5135381E+04 0.4427352E+04 -0.3796539E-04 0.1522113E-04 -0.1528770E-07 + 01 0.5400000E+04 0.4427352E+04 0.1474904E-03 -0.1289545E-03 -0.1481011E-05 + 01 0.5783845E+04 0.4427352E+04 0.1166619E-02 -0.1095318E-02 -0.9535837E-05 + 01 0.6167691E+04 0.4427352E+04 0.3750700E-02 -0.3739158E-02 -0.2865544E-04 + 01 0.6551536E+04 0.4427352E+04 0.8655129E-02 -0.9474239E-02 -0.6508556E-04 + 01 0.6935381E+04 0.4427352E+04 0.1552649E-01 -0.1792501E-01 -0.1162010E-03 + 01 0.7200000E+04 0.4427352E+04 0.2152798E-01 -0.2516421E-01 -0.1602286E-03 + 01 0.7583845E+04 0.4427352E+04 0.2864741E-01 -0.2873269E-01 -0.1782480E-03 + 01 0.7967691E+04 0.4427352E+04 0.1488251E-01 -0.1484347E-02 0.3202844E-04 + 01 0.8351536E+04 0.4427353E+04 -0.7601952E-01 0.1279742E+00 0.9418253E-03 + 01 0.8735381E+04 0.4427355E+04 -0.3490622E+00 0.4905172E+00 0.3322598E-02 + 01 0.9000000E+04 0.4427358E+04 -0.7230538E+00 0.9760776E+00 0.6320155E-02 + 01 0.9383845E+04 0.4427365E+04 -0.1599329E+01 0.2121093E+01 0.1322233E-01 + 01 0.9767691E+04 0.4427375E+04 -0.2866216E+01 0.3801185E+01 0.2304002E-01 + 01 0.1015154E+05 0.4427387E+04 -0.4418212E+01 0.5893127E+01 0.3491224E-01 + 01 0.1053538E+05 0.4427399E+04 -0.6000644E+01 0.8149366E+01 0.4712919E-01 + 01 0.1080000E+05 0.4427407E+04 -0.6964159E+01 0.9628966E+01 0.5483370E-01 + 01 0.1118385E+05 0.4427416E+04 -0.8006512E+01 0.1145495E+02 0.6354060E-01 + 01 0.1156769E+05 0.4427421E+04 -0.8571755E+01 0.1272590E+02 0.6870498E-01 + 01 0.1195154E+05 0.4427422E+04 -0.8693416E+01 0.1336695E+02 0.7018643E-01 + 01 0.1233538E+05 0.4427420E+04 -0.8336861E+01 0.1336248E+02 0.6780721E-01 + 01 0.1260000E+05 0.4427416E+04 -0.7859248E+01 0.1295779E+02 0.6407369E-01 + 01 0.1298385E+05 0.4427408E+04 -0.6877429E+01 0.1175821E+02 0.5575264E-01 + 01 0.1336769E+05 0.4427396E+04 -0.5539940E+01 0.9894587E+01 0.4433368E-01 + 01 0.1375154E+05 0.4427382E+04 -0.3983030E+01 0.7432767E+01 0.3042898E-01 + 01 0.1413538E+05 0.4427367E+04 -0.2364429E+01 0.4540549E+01 0.1542519E-01 + 01 0.1440000E+05 0.4427357E+04 -0.1330441E+01 0.2452426E+01 0.5240889E-02 + 01 0.1478385E+05 0.4427344E+04 -0.1188210E+00 -0.3866864E+00 -0.7784205E-02 + 01 0.1516769E+05 0.4427334E+04 0.6520561E+00 -0.2854280E+01 -0.1795286E-01 + 01 0.1555154E+05 0.4427327E+04 0.1026629E+01 -0.4801948E+01 -0.2481501E-01 + 01 0.1593538E+05 0.4427324E+04 0.9731481E+00 -0.6153433E+01 -0.2809571E-01 + 01 0.1620000E+05 0.4427324E+04 0.7295747E+00 -0.6727638E+01 -0.2846905E-01 + 01 0.1658385E+05 0.4427325E+04 0.1533091E+00 -0.7151808E+01 -0.2687526E-01 + 01 0.1696769E+05 0.4427329E+04 -0.5895360E+00 -0.7174949E+01 -0.2341673E-01 + 01 0.1735153E+05 0.4427333E+04 -0.1365270E+01 -0.6918137E+01 -0.1900047E-01 + 01 0.1773538E+05 0.4427338E+04 -0.2061471E+01 -0.6456181E+01 -0.1429476E-01 + 01 0.1800000E+05 0.4427341E+04 -0.2451184E+01 -0.6041314E+01 -0.1116029E-01 + 01 0.1838384E+05 0.4427345E+04 -0.2873480E+01 -0.5309670E+01 -0.6961804E-02 + 01 0.1876768E+05 0.4427349E+04 -0.3110123E+01 -0.4445844E+01 -0.3398177E-02 + 01 0.1915152E+05 0.4427352E+04 -0.3152141E+01 -0.3479122E+01 -0.5241309E-03 + 01 0.1953536E+05 0.4427353E+04 -0.2993134E+01 -0.2486105E+01 0.1351050E-02 + 01 0.1980000E+05 0.4427354E+04 -0.2795176E+01 -0.1837147E+01 0.2012341E-02 + 01 0.2018384E+05 0.4427354E+04 -0.2321637E+01 -0.1004576E+01 0.1671369E-02 + 01 0.2056768E+05 0.4427352E+04 -0.1595358E+01 -0.3447845E+00 -0.3655244E-03 + 01 0.2095152E+05 0.4427348E+04 -0.6300869E+00 0.1345721E+00 -0.3948899E-02 + 01 0.2133537E+05 0.4427344E+04 0.4198216E+00 0.4697812E+00 -0.8328412E-02 + 01 0.2160000E+05 0.4427341E+04 0.1111855E+01 0.6364739E+00 -0.1140507E-01 + 01 0.2198385E+05 0.4427337E+04 0.2005934E+01 0.8160777E+00 -0.1554026E-01 + 01 0.2236769E+05 0.4427333E+04 0.2720605E+01 0.9497388E+00 -0.1895001E-01 + 01 0.2275153E+05 0.4427331E+04 0.3247472E+01 0.1061895E+01 -0.2152909E-01 + 01 0.2313538E+05 0.4427329E+04 0.3595387E+01 0.1146651E+01 -0.2328385E-01 + 01 0.2340000E+05 0.4427328E+04 0.3717978E+01 0.1181580E+01 -0.2393969E-01 + 01 0.2378384E+05 0.4427328E+04 0.3797370E+01 0.1190063E+01 -0.2432030E-01 + 01 0.2416769E+05 0.4427328E+04 0.3755513E+01 0.1138848E+01 -0.2396434E-01 + 01 0.2455153E+05 0.4427329E+04 0.3605192E+01 0.1033698E+01 -0.2296864E-01 + 01 0.2493538E+05 0.4427331E+04 0.3351138E+01 0.8732046E+00 -0.2136197E-01 + 01 0.2520000E+05 0.4427332E+04 0.3093646E+01 0.7285912E+00 -0.1977159E-01 + 01 0.2558384E+05 0.4427335E+04 0.2593677E+01 0.4668083E+00 -0.1678351E-01 + 01 0.2596769E+05 0.4427339E+04 0.1985026E+01 0.1591721E+00 -0.1322473E-01 + 01 0.2635153E+05 0.4427342E+04 0.1368104E+01 -0.1594560E+00 -0.9628789E-02 + 01 0.2673538E+05 0.4427346E+04 0.8116776E+00 -0.4786212E+00 -0.6347562E-02 + 01 0.2700000E+05 0.4427348E+04 0.4840644E+00 -0.6861564E+00 -0.4425335E-02 + 01 0.2738384E+05 0.4427350E+04 0.1022280E+00 -0.9589704E+00 -0.2156191E-02 + 01 0.2776769E+05 0.4427352E+04 -0.1777967E+00 -0.1191364E+01 -0.4908643E-03 + 01 0.2815153E+05 0.4427353E+04 -0.3656266E+00 -0.1382266E+01 0.6208400E-03 + 01 0.2853538E+05 0.4427353E+04 -0.4772250E+00 -0.1526913E+01 0.1288716E-02 + 01 0.2880000E+05 0.4427354E+04 -0.5167400E+00 -0.1598649E+01 0.1530585E-02 + 01 0.2918385E+05 0.4427354E+04 -0.5330856E+00 -0.1664146E+01 0.1675426E-02 + 01 0.2956769E+05 0.4427354E+04 -0.5075236E+00 -0.1673957E+01 0.1603983E-02 + 01 0.2995154E+05 0.4427353E+04 -0.4438678E+00 -0.1627665E+01 0.1328935E-02 + 01 0.3033538E+05 0.4427353E+04 -0.3423572E+00 -0.1522600E+01 0.8208316E-03 + 01 0.3060000E+05 0.4427352E+04 -0.2521980E+00 -0.1410519E+01 0.3320302E-03 + 01 0.3098385E+05 0.4427352E+04 -0.8979741E-01 -0.1199891E+01 -0.5531937E-03 + 01 0.3136769E+05 0.4427351E+04 0.9566841E-01 -0.9400675E+00 -0.1525651E-02 + 01 0.3175154E+05 0.4427350E+04 0.2778961E+00 -0.6457966E+00 -0.2439594E-02 + 01 0.3213538E+05 0.4427349E+04 0.4282149E+00 -0.3351473E+00 -0.3173087E-02 diff --git a/examples/tsunami/chile2010/test_chile2010.py b/examples/tsunami/chile2010/test_chile2010.py new file mode 100644 index 000000000..3397c538e --- /dev/null +++ b/examples/tsunami/chile2010/test_chile2010.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python +""" +Chile 2010 tsunami regression test + +Also tests some variants on lat-lon topography reading, including several +NetCDF topography file formats with non-standard coordinate conventions. +""" + +import warnings +from pathlib import Path + +import pytest + +import clawpack.geoclaw.test as test +import clawpack.geoclaw.topotools as topotools +import clawpack.geoclaw.dtopotools as dtopotools +import clawpack.clawutil.util as clawutil + +def _get_topography(runner, variant=0): + """Get topography and read""" + + # Ensure that the topography file is available + maketopo = clawutil.fullpath_import(runner.test_path / "maketopo.py", + module_name="maketopo") + topo_path = maketopo.get_topo() + dtopo_path = maketopo.make_dtopo() + + # Read in original topography data and write out in format expected by test + topo = topotools.Topography() + topo.topo_type = 2 + topo.path = topo_path + topo.read() + + # Read in dtopography data + dtopo = dtopotools.DTopography() + dtopo.dtopo_type = 3 + dtopo.path = dtopo_path + dtopo.read() + + return topo, dtopo + + +def _write_netcdf_variant(topo, variant, output_path, crop_bounds=None): + """Write topography as a NetCDF file in the requested variant format. + + Parameters + ---------- + topo : topotools.Topography + Topography object with x, y, Z already populated. + variant : str + One of ``"netcdf_topotools"``, ``"lat_flip"``, or ``"lon_360"``. + output_path : Path + Destination file path for the NetCDF output. + crop_bounds : tuple or None + (lon_min, lon_max, lat_min, lat_max) in domain coordinates. When + provided, ``topo_entries()`` is called to compute the correct + ``lon_offset`` for the descriptor; the returned list has the same + ``[ttype, path, meta]`` element format used by ``topo_data.topofiles``. + + Variants + -------- + netcdf_topotools + Write via ``topotools.Topography.write(topo_type=4)``, then run the + result through CFNormalizer. If CFNormalizer has to fix anything + (e.g. rename coordinate variables), a UserWarning is emitted so the + gap between topotools output and strict CF compliance is visible. + lat_flip + Write NetCDF manually (netCDF4) with latitude stored N-to-S + (descending), which is non-standard but common in observational data. + Elevation rows are flipped accordingly so the physical bathymetry is + identical to the standard variant. + lon_360 + Write NetCDF manually (netCDF4) with longitude in [0, 360] rather than + [-180, 180]. The physical bathymetry is otherwise identical. + """ + if variant == "netcdf_topotools": + import netCDF4 # guaranteed available; test already called importorskip + import xarray as xr + from clawpack.geoclaw.netcdf_utils import CFNormalizer + + topo.write(str(output_path), topo_type=4) + + # Open the just-written file and run it through CFNormalizer to check + # whether the output is already fully CF-compliant. + with xr.open_dataset(output_path) as ds_before: + coords_before = set(ds_before.coords) + attrs_before = {name: dict(ds_before[name].attrs) + for name in list(ds_before.coords) + list(ds_before.data_vars)} + + ds_after = CFNormalizer(ds_before).normalize() + + coords_after = set(ds_after.coords) + attrs_after = {name: dict(ds_after[name].attrs) + for name in list(ds_after.coords) + list(ds_after.data_vars)} + + changed = coords_before != coords_after + if not changed: + # Check whether any attrs changed on variables that survived renaming + for name in coords_before & coords_after: + if attrs_before.get(name) != attrs_after.get(name): + changed = True + break + if not changed: + for name in ds_after.data_vars: + if attrs_before.get(name) != attrs_after.get(name): + changed = True + break + + if changed: + renamed = coords_before - coords_after + warnings.warn( + f"CFNormalizer modified the topotools NetCDF output " + f"('{output_path.name}'). " + f"Coordinate variables renamed or attributes added: " + f"{renamed or '(attr-only change)'}. " + "The topotools.write topo_type=4 path is not fully " + "CF-compliant; consider updating it to match CF standard " + "variable names.", + UserWarning, + stacklevel=2, + ) + + elif variant == "lat_flip": + import netCDF4 # guaranteed available; test already called importorskip + + # Write latitude N-to-S (descending) — a common non-standard convention. + # Flip Z rows so each row still corresponds to the correct latitude. + with netCDF4.Dataset(output_path, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo.y[::-1] # descending: N → S + longitudes[:] = topo.x + elevations[:] = topo.Z[::-1, :] # flip rows to match reversed lat + + elif variant == "lon_360": + import netCDF4 # guaranteed available; test already called importorskip + + # Write longitude in [0, 360] convention instead of [-180, 180]. + with netCDF4.Dataset(output_path, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo.y + longitudes[:] = topo.x % 360 # map [-180, 180] → [0, 360] + elevations[:] = topo.Z + + else: + raise ValueError(f"Unknown NetCDF variant '{variant}'.") + + # Interrogate the written file to auto-detect lat_order, dim_order, etc. + # When crop_bounds (domain coordinates) are given, topo_entries() is used + # so that the correct lon_offset is computed and written into the descriptor + # block that Fortran's read_netcdf_descriptor needs. + from clawpack.geoclaw.netcdf_utils import TopoInterrogator + with TopoInterrogator(output_path, 'elevation', crop_bounds=crop_bounds) as intr: + if crop_bounds is not None: + return intr.topo_entries() + else: + return [[4, output_path, intr.interrogate_topo()]] + +CASES = [pytest.param("standard", id="standard"), + pytest.param("netcdf_topotools", id="netcdf_topotools", marks=pytest.mark.netcdf), + pytest.param("lat_flip", id="lat_flip", marks=pytest.mark.netcdf), + pytest.param("lon_360", id="lon_360", marks=pytest.mark.netcdf)] + +@pytest.mark.regression +@pytest.mark.remote +@pytest.mark.tsunami +@pytest.mark.parametrize("variant", CASES) +def test_chile2010(tmp_path: Path, save: bool, variant: dict): + """Chile 2010 tsunami regression test for GeoClaw. + + Parametrized over topography file format. Because every variant encodes + the same physical bathymetry, all variants must reproduce the existing + reference gauge output at gauge 32412 within the same tolerances. + + Variants + -------- + standard + ASCII topo type 2 read directly from the downloaded file. This is + the baseline behavior and is identical to the pre-parametrization test. + netcdf_topotools + The downloaded ASCII topo is re-written as NetCDF via + ``topotools.Topography.write(topo_type=4)``. CFNormalizer is run over + the result to surface any CF-compliance gaps in the write path. + lat_flip + NetCDF written manually (netCDF4) with latitude stored N-to-S + (descending) rather than the usual S-to-N (ascending) order. + lon_360 + NetCDF written manually (netCDF4) with longitude in the [0, 360] + convention rather than [-180, 180]. + """ + if variant != "standard": + pytest.importorskip("netCDF4") + + runner = test.GeoClawTestRunner(tmp_path, test_path=Path(__file__).parent) + + # Topography + topo, dtopo = _get_topography(runner) + + # Setup data for test + runner.set_data() + # runner.rundata.clawdata.num_output_times = 1 + runner.rundata.amrdata.amr_levels_max = 1 + + if variant == "standard": + runner.rundata.topo_data.topofiles = [[2, topo.path]] + else: + nc_path = tmp_path / f"chile_topo_{variant}.nc" + clawdata = runner.rundata.clawdata + # lon_360 needs domain crop bounds so topo_entries() can compute the + # correct lon_offset (-360.0) for the descriptor. Other variants use + # interrogate_topo() (lon_offset=0.0 is correct for [-180,180] files). + crop = (clawdata.lower[0], clawdata.upper[0], + clawdata.lower[1], clawdata.upper[1]) if variant == "lon_360" else None + entries = _write_netcdf_variant(topo, variant, nc_path, crop_bounds=crop) + runner.rundata.topo_data.topofiles = entries + + runner.write_data() + + # Build and run test + if variant != "standard": + print(f"Testing variant '{variant}' with topography file: {nc_path.name}") + runner.build_executable(FFLAGS="-DNETCDF $(NETCDF_FFLAGS)", + LFLAGS="$(NETCDF_LFLAGS)") + else: + print(f"Testing standard variant with topography file: {topo.path}") + runner.build_executable() + runner.run_code() + + # lon_360 coordinates undergo +360 then -360 shifts that are not a + # perfect floating-point round-trip for non-round values, so allow a + # slightly relaxed tolerance (still much smaller than 1% physically). + tol = {"atol": 1e-2} if variant == "lon_360" else {} + runner.check_gauge(save=save, gauge_id=32412, **tol) + +if __name__ == "__main__": + raise SystemExit(pytest.main([__file__])) diff --git a/src/2d/shallow/surge/data_storm_module.f90 b/src/2d/shallow/surge/data_storm_module.f90 index 278328d73..8c94a6149 100644 --- a/src/2d/shallow/surge/data_storm_module.f90 +++ b/src/2d/shallow/surge/data_storm_module.f90 @@ -47,6 +47,7 @@ module data_storm_module ! Run-time storm modification settings real(kind=8) :: wind_scaling, pressure_scaling + real(kind=8) :: storm_time_scale ! Ramping settings integer :: window_type @@ -94,9 +95,17 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) integer :: nc_fid, num_dims, num_vars, var_id character(len=16) :: name, dim_names(3), var_names(3) + ! NetCDF descriptor parsing (format 2) + real(kind=8) :: nc_time_offset + integer :: lon_convention + character(len=512) :: nc_line, nc_key, nc_val + integer :: nc_eq_pos, nc_in_file_info + integer :: nc_vn_start, nc_vn_end, nc_role_start, nc_role_end + if (.not.module_setup) then ! Open data file - print *,'Reading storm data file ', storm_data_path + print *,'Reading storm data file ', trim(storm_data_path) + print * open(unit=data_unit, file=storm_data_path, status='old', & action='read', iostat=io_status) if (io_status /= 0) then @@ -125,7 +134,16 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) else read(data_unit, *) end if - read(data_unit, *) + + ! storm_time_scale stretches (>1) or compresses (<1) the time axis + ! to simulate slower or faster storm translation speeds. + ! A blank line here means a legacy file without the parameter; default to 1.0. + read(data_unit, *, iostat=io_status) storm_time_scale + if (io_status /= 0) then + storm_time_scale = 1.0d0 + write(log_unit, *) "Warning: storm_time_scale not in data file, defaulting to 1.0" + end if + write(log_unit, "('storm_time_scale = ',d16.8)") storm_time_scale if (DEBUG) then print "('time_offset = ',a)", storm%time_offset @@ -138,17 +156,99 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) case(1) ! ASCII/OWI read(data_unit, *) read(data_unit, *) - case(2) ! NetCDF - read(data_unit, *) - read(data_unit, *) dim_names - read(data_unit, *) var_names - read(data_unit, *) - write(log_unit, "('dims = ',a)") dim_names - write(log_unit, "('vars = ',a)") var_names + case(2) ! NetCDF - &file_info / &variable_info namelist format + read(data_unit, *) ! "# Format Data Information" + nc_time_offset = 0.0d0 + lon_convention = 180 + nc_in_file_info = 0 + dim_names = '' + var_names = '' + + do + read(data_unit, '(a)', iostat=io_status) nc_line + if (io_status /= 0) exit + nc_line = adjustl(nc_line) + + ! Stop at "# File paths" — put it back for post-case read + if (nc_line(1:12) == '# File paths') then + backspace(data_unit) + exit + end if + + ! Start of &file_info block + if (nc_line(1:10) == '&file_info') then + nc_in_file_info = 1 + cycle + end if + + ! End of block (standalone /) + if (trim(nc_line) == '/') then + nc_in_file_info = 0 + cycle + end if + + ! &variable_info inline block + ! Format: &variable_info var_name=u10 geoclaw_role=wind_u / + if (nc_line(1:14) == '&variable_info') then + nc_vn_start = index(nc_line, 'var_name=') + 9 + nc_vn_end = nc_vn_start + do while (nc_vn_end <= len_trim(nc_line) .and. & + nc_line(nc_vn_end:nc_vn_end) /= ' ' .and. & + nc_line(nc_vn_end:nc_vn_end) /= '/') + nc_vn_end = nc_vn_end + 1 + end do + nc_val = nc_line(nc_vn_start:nc_vn_end-1) + + nc_role_start = index(nc_line, 'geoclaw_role=') + 13 + nc_role_end = nc_role_start + do while (nc_role_end <= len_trim(nc_line) .and. & + nc_line(nc_role_end:nc_role_end) /= ' ' .and. & + nc_line(nc_role_end:nc_role_end) /= '/') + nc_role_end = nc_role_end + 1 + end do + nc_key = nc_line(nc_role_start:nc_role_end-1) + + select case(trim(nc_key)) + case('wind_u') + var_names(1) = trim(nc_val) + case('wind_v') + var_names(2) = trim(nc_val) + case('pressure') + var_names(3) = trim(nc_val) + end select + cycle + end if + + ! key = value lines inside &file_info + if (nc_in_file_info == 1) then + nc_eq_pos = index(nc_line, '=') + if (nc_eq_pos > 0) then + nc_key = adjustl(trim(nc_line(1:nc_eq_pos-1))) + nc_val = adjustl(trim(nc_line(nc_eq_pos+1:))) + select case(trim(nc_key)) + case('lon_name') + dim_names(1) = trim(nc_val) + case('lat_name') + dim_names(2) = trim(nc_val) + case('time_name') + dim_names(3) = trim(nc_val) + case('lon_convention') + read(nc_val, *) lon_convention + case('time_offset') + read(nc_val, *) nc_time_offset + end select + end if + end if + end do + + write(log_unit, "('dims = ',3(1x,a))") dim_names + write(log_unit, "('vars = ',3(1x,a))") var_names + write(log_unit, "('lon_convention = ',i4)") lon_convention + write(log_unit, "('nc_time_offset = ',d16.8)") nc_time_offset if (DEBUG) then - print "('dims = ',a)", dim_names - print "('vars = ',a)", var_names + print *, "dims = ", dim_names + print *, "vars = ", var_names end if case default print *, "Storm data format", file_format," not available." @@ -199,8 +299,9 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) allocate(storm%time(mt)) ! Fill out variable data/info - print *, "Reading pressure file ", storm%paths(1) - print *, "Reading wind file ", storm%paths(2) + print *, "Reading pressure file ", trim(storm%paths(1)) + print *, "Reading wind file ", trim(storm%paths(2)) + print * call read_OWI_ASCII(storm%paths(2), storm%paths(1), & mx, my, mt, sw_lat, sw_lon, dy, dx, & storm, seconds_from_epoch(time(:, 1))) @@ -209,11 +310,14 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) #ifdef NETCDF ! Open file and get file ID ! :TODO: Only read in times that are between t0 and tfinal - print *, "Reading storm NetCDF file ", storm%paths(1) + print *, "Reading storm NetCDF file ", trim(storm%paths(1)) + print * call check_netcdf_error(nf90_open(storm%paths(1), nf90_nowrite, nc_fid)) ! Check dim/var number call check_netcdf_error(nf90_inquire(nc_fid, num_dims, num_vars)) - if (num_dims /= 3 .and. num_vars /= 3) then + ! Check to make sure the mimimal number of dimansions and + ! variables are there to read in the data + if (num_dims < 3 .or. num_vars < 3) then print *, "Invalid number of dimensions/variables." print *, " num_dims = ", num_dims print *, " num_vars = ", num_vars @@ -252,17 +356,50 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) call check_netcdf_error(nf90_inq_varid(nc_fid, dim_names(3), var_ID)) call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%time)) - ! Convert time to seconds from offset - storm%time(:) = storm%time(:) - seconds_from_epoch(time(:, 1)) - - ! Wind - call check_netcdf_error(nf90_inq_varid(nc_fid, var_names(1), var_ID)) - call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_u)) - call check_netcdf_error(nf90_inq_varid(nc_fid, var_names(2), var_ID)) - call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_v)) - ! ! Pressure - call check_netcdf_error(nf90_inq_varid(nc_fid, var_names(3), var_ID)) - call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%pressure)) + ! Normalize [0,360] longitude to [-180,180] + if (lon_convention == 360) then + where (storm%longitude > 180.0d0) + storm%longitude = storm%longitude - 360.0d0 + end where + end if + + ! nc_time_offset = (first_file_time - storm_offset) in s. + ! Subtracting raw[0] handles files where time is stored as + ! absolute seconds (e.g. Unix epoch) rather than relative. + storm%time(:) = storm%time(:) - storm%time(1) & + + nint(nc_time_offset) + + ! Stretch (>1) or compress (<1) the time axis relative to + ! the first time step (anchor). No-op when scale is 1.0. + if (storm_time_scale /= 1.0d0) then + storm%time(:) = storm%time(1) & + + nint(storm_time_scale & + * dble(storm%time(:) - storm%time(1))) + end if + + ! Wind (optional — zero-fill if not in descriptor) + if (len_trim(var_names(1)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_fid, trim(var_names(1)), var_ID)) + call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_u)) + else + write(log_unit, *) "Warning: wind_u not in descriptor, setting to zero." + storm%wind_u = 0.0d0 + end if + if (len_trim(var_names(2)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_fid, trim(var_names(2)), var_ID)) + call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_v)) + else + write(log_unit, *) "Warning: wind_v not in descriptor, setting to zero." + storm%wind_v = 0.0d0 + end if + ! Pressure + if (len_trim(var_names(3)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_fid, trim(var_names(3)), var_ID)) + call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%pressure)) + else + write(log_unit, *) "Warning: pressure not in descriptor, setting to zero." + storm%pressure = 0.0d0 + end if ! Close file to stop corrupting the netcdf files call check_netcdf_error(nf90_close(nc_fid)) diff --git a/src/2d/shallow/topo_module.f90 b/src/2d/shallow/topo_module.f90 index 808c8f07a..91659f402 100644 --- a/src/2d/shallow/topo_module.f90 +++ b/src/2d/shallow/topo_module.f90 @@ -39,6 +39,20 @@ module topo_module ! NetCDF4 support real(kind=4), parameter :: CONVENTION_REQUIREMENT = 1.0 + ! NetCDF descriptor fields — one slot per topo file, populated from the + ! key=value block in topo.data by read_netcdf_descriptor (type 4 only). + character(len=64), allocatable :: nc_var_name(:) ! topo variable name + character(len=64), allocatable :: nc_lon_name(:) ! longitude coord name + character(len=64), allocatable :: nc_lat_name(:) ! latitude coord name + character(len=32), allocatable :: nc_lat_order(:) ! 'N_to_S' or 'S_to_N' + character(len=32), allocatable :: nc_dim_order(:) ! e.g. 'lat,lon' + character(len=8), allocatable :: nc_fill_action(:) ! 'abort' or 'warn' + real(kind=8), allocatable :: nc_lon_offset(:) ! scalar added to file coords: x_domain = x_file + offset + real(kind=8), allocatable :: nc_fill_value(:) ! fill/nodata sentinel + real(kind=8), allocatable :: nc_crop_bounds(:,:) ! (4, n): lon0,lon1,lat0,lat1 + logical, allocatable :: nc_has_fill(:) ! fill_value was specified + logical, allocatable :: nc_has_crop(:) ! crop_bounds was specified + ! dtopo variables ! Work array real(kind=8), allocatable :: dtopowork(:) @@ -153,12 +167,39 @@ subroutine read_topo_settings(restart,file_name) allocate(topoID(mtopofiles),topotime(mtopofiles),topo0save(mtopofiles)) allocate(i0topo0(mtopofiles),topo0ID(mtopofiles)) allocate(areatopo(mtopofiles)) + + ! NetCDF descriptor arrays — allocated for all files; only + ! filled in for type 4 entries by read_netcdf_descriptor. + allocate(nc_var_name(mtopofiles), nc_lon_name(mtopofiles)) + allocate(nc_lat_name(mtopofiles), nc_lat_order(mtopofiles)) + allocate(nc_dim_order(mtopofiles), nc_fill_action(mtopofiles)) + allocate(nc_lon_offset(mtopofiles), nc_fill_value(mtopofiles)) + allocate(nc_crop_bounds(4, mtopofiles)) + allocate(nc_has_fill(mtopofiles), nc_has_crop(mtopofiles)) + nc_var_name = '' + nc_lon_name = '' + nc_lat_name = '' + nc_lat_order = 'S_to_N' + nc_dim_order = 'lat,lon' + nc_fill_action = 'abort' + nc_lon_offset = 0.0d0 + nc_fill_value = 0.0d0 + nc_crop_bounds = 0.0d0 + nc_has_fill = .false. + nc_has_crop = .false. + mtopofiles = mtopofiles - num_dtopo ! decrement after allocates do i=1,mtopofiles read(iunit,*) topofname(i) read(iunit,*) itopotype(i) + ! For NetCDF files, read the optional key=value descriptor + ! block that follows the topo_type line. + if (abs(itopotype(i)) == 4) then + call read_netcdf_descriptor(iunit, i) + end if + write(GEO_PARM_UNIT,*) ' ' write(GEO_PARM_UNIT,*) ' ',topofname(i) write(GEO_PARM_UNIT,*) ' itopotype = ', itopotype(i) @@ -168,7 +209,7 @@ subroutine read_topo_settings(restart,file_name) endif call read_topo_header(topofname(i),itopotype(i),mxtopo(i), & mytopo(i),xlowtopo(i),ylowtopo(i),xhitopo(i),yhitopo(i), & - dxtopo(i),dytopo(i)) + dxtopo(i),dytopo(i), i) topoID(i) = i mtopo(i) = int(mxtopo(i), 8) * int(mytopo(i), 8) areatopo(i) = dxtopo(i)*dytopo(i) @@ -212,7 +253,7 @@ subroutine read_topo_settings(restart,file_name) topoID(i) = i topotime(i) = -huge(1.0) call read_topo_file(mxtopo(i),mytopo(i),itopotype(i),topofname(i), & - xlowtopo(i),ylowtopo(i),topowork(i0topo(i):i0topo(i)+mtopo(i)-1)) + xlowtopo(i),ylowtopo(i),topowork(i0topo(i):i0topo(i)+mtopo(i)-1), i) ! set topo0save(i) = 1 if this topo file intersects any ! dtopo file. This approach to setting topo0save is changed from ! v5.4.1, where it only checked if some dtopo point lies within the @@ -486,7 +527,7 @@ end subroutine set_topo_for_dtopo ! Read topo file. ! ======================================================================== - subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) + subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) #ifdef NETCDF use netcdf @@ -502,6 +543,7 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) character(len=512), intent(in) :: fname real(kind=8), intent(inout) :: topo(1:int(mx, 8)*int(my, 8)) real(kind=8), intent(in) :: xll,yll + integer, intent(in), optional :: topo_idx ! Locals integer, parameter :: iunit = 19, miss_unit = 17 @@ -515,17 +557,18 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) ! NetCDF Support character(len=64) :: direction, x_dim_name, x_var_name, y_dim_name, & y_var_name, z_var_name, var_name - ! character(len=1) :: axis_string real(kind=8), allocatable :: nc_buffer(:, :), xlocs(:), ylocs(:) integer(kind=4) :: x_var_id, y_var_id, z_var_id, x_dim_id, y_dim_id integer(kind=4) :: xstart(1), ystart(1), mx_tot, my_tot integer(kind=4) :: ios, nc_file, dim_ids(2), num_dims, & var_type, num_vars, num_dims_tot, z_dim_ids(2) + logical :: lat_south_to_north mtot = int(mx, 8) * int(my, 8) print *, ' ' - print *, 'Reading topography file ', fname + print *, 'Reading topography file ', trim(fname) + print * select case(abs(topo_type)) ! ASCII file with x,y,z values on each line. @@ -625,108 +668,180 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) ! NetCDF case(4) #ifdef NETCDF - ! Open file + ! Open file call check_netcdf_error(nf90_open(fname, nf90_nowrite, nc_file)) - - ! Get number of dimensions and vars - call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, & - num_vars)) - - ! get x and y dimension info - call get_dim_info(nc_file, num_dims_tot, x_dim_id, x_dim_name, & - mx_tot, y_dim_id, y_dim_name, my_tot) - - allocate(xlocs(mx_tot),ylocs(my_tot)) - - call check_netcdf_error(nf90_get_var(nc_file, x_dim_id, xlocs, start=(/ 1 /), count=(/ mx_tot /))) - call check_netcdf_error(nf90_get_var(nc_file, y_dim_id, ylocs, start=(/ 1 /), count=(/ my_tot /))) - xstart = minloc(xlocs, mask=(xlocs.eq.xll)) - ystart = minloc(ylocs, mask=(ylocs.eq.yll)) - deallocate(xlocs,ylocs) - - z_var_id = -1 - do n=1, num_vars - call check_netcdf_error(nf90_inquire_variable(nc_file, n, var_name, var_type, num_dims, dim_ids)) - - ! Assume dim names are same as var ids - if (var_name == x_dim_name) then - x_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, x_var_name, x_var_id)) - else if (var_name == y_dim_name) then - y_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, y_var_name, y_var_id)) - else if (num_dims == 2) then - z_var_name = var_name - z_dim_ids = dim_ids - call check_netcdf_error(nf90_inq_varid(nc_file, z_var_name, z_var_id)) + + call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, num_vars)) + + ! ---------------------------------------------------------------- + ! Dimension discovery: descriptor or fallback heuristic. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. & + len_trim(nc_lon_name(topo_idx)) > 0) then + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lon_name(topo_idx)), x_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + x_dim_id, x_dim_name, mx_tot)) + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lat_name(topo_idx)), y_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + y_dim_id, y_dim_name, my_tot)) + else + call get_dim_info(nc_file, num_dims_tot, x_dim_id, & + x_dim_name, mx_tot, y_dim_id, y_dim_name, my_tot) + end if + + ! Dimension IDs and variable IDs are separate ID spaces in NetCDF. + ! Look up the coordinate variable IDs by name so nf90_get_var + ! receives the correct IDs regardless of creation order in the file. + call check_netcdf_error(nf90_inq_varid(nc_file, trim(x_dim_name), x_var_id)) + call check_netcdf_error(nf90_inq_varid(nc_file, trim(y_dim_name), y_var_id)) + + allocate(xlocs(mx_tot), ylocs(my_tot)) + call check_netcdf_error(nf90_get_var(nc_file, x_var_id, xlocs, & + start=(/ 1 /), count=(/ mx_tot /))) + call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, & + start=(/ 1 /), count=(/ my_tot /))) + + ! Apply lon_offset to convert file coordinates to domain + ! coordinates, so xstart matches the domain-coord xll that + ! was computed by read_topo_header. + if (present(topo_idx)) then + if (nc_lon_offset(topo_idx) /= 0.0d0) then + xlocs = xlocs + nc_lon_offset(topo_idx) end if + end if - end do - if (z_var_id == -1) then - stop "Unable to find topography data!" + xstart = minloc(xlocs, mask=(xlocs == xll)) + lat_south_to_north = (ylocs(1) < ylocs(my_tot)) + if (lat_south_to_north) then + ! S-to-N: southern boundary (yll) is at a low index. + ystart = minloc(ylocs, mask=(ylocs == yll)) + else + ! N-to-S: the read must start at the northern boundary (yhi), + ! which sits at a low index in the descending array. + ! Reconstruct yhi from yll, my, and the absolute grid spacing. + y = yll + (my - 1) * abs(ylocs(2) - ylocs(1)) + ystart = minloc(ylocs, & + mask=(abs(ylocs - y) < 0.5d0 * abs(ylocs(2) - ylocs(1)))) + end if + deallocate(xlocs, ylocs) + + ! ---------------------------------------------------------------- + ! Find the topo data variable: descriptor or first 2-D variable. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. & + len_trim(nc_var_name(topo_idx)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_file, & + trim(nc_var_name(topo_idx)), z_var_id)) + call check_netcdf_error(nf90_inquire_variable(nc_file, & + z_var_id, var_name, var_type, num_dims, z_dim_ids)) + else + z_var_id = -1 + do n = 1, num_vars + call check_netcdf_error(nf90_inquire_variable(nc_file, & + n, var_name, var_type, num_dims, dim_ids)) + if (var_name /= x_dim_name .and. & + var_name /= y_dim_name .and. num_dims == 2) then + z_var_id = n + z_dim_ids = dim_ids + exit + end if + end do + if (z_var_id == -1) then + print *, "ERROR: Unable to find topo variable in ", trim(fname) + stop + end if end if - ! Load in data - ! TODO: Provide striding into data if need be - - ! only try to access data if theres overlap with the domain + ! ---------------------------------------------------------------- + ! Load data into topo array (only if region overlaps domain). + ! ---------------------------------------------------------------- if ((mx > 0) .and. (my > 0)) then if (z_dim_ids(1) == x_dim_id) then allocate(nc_buffer(mx, my)) else if (z_dim_ids(1) == y_dim_id) then allocate(nc_buffer(my, mx)) - else - stop " NetCDF z variable has dimensions that don't align with x and y" + else + print *, "ERROR: NetCDF z variable dimensions do not align with x/y" + stop end if - call check_netcdf_error(nf90_get_var(nc_file, z_var_id, nc_buffer, & - start = (/ xstart(1), ystart(1) /), & - count = (/ mx, my /))) + if (z_dim_ids(1) == x_dim_id) then + ! Variable stored (lon, lat): start/count already lon-first. + call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & + nc_buffer, start=(/ xstart(1), ystart(1) /), & + count=(/ mx, my /))) + else + ! Variable stored (lat, lon): swap start and count to match + ! the actual dimension order of the variable. + call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & + nc_buffer, start=(/ ystart(1), xstart(1) /), & + count=(/ my, mx /))) + end if - ! check for lon/lat ordering of z variable + ! Transpose into GeoClaw topo array (N-to-S row order). + ! For S-to-N files ystart points to the southern end of the + ! domain, so nc_buffer row 1 = south, row my = north; reverse + ! with (my - j) to store north first. + ! For N-to-S files ystart points to the northern end, so + ! nc_buffer row 1 = north already; copy in order with (j + 1). if (z_dim_ids(1) == x_dim_id) then - do j = 0, int(my, 8) - 1 - topo(j * int(mx, 8) + 1:(j + 1) * int(mx, 8)) = nc_buffer(:, my - j) - end do - else if (z_dim_ids(1) == y_dim_id) then - do j = 0, int(my, 8) - 1 - topo(j * int(mx, 8) + 1:(j + 1) * int(mx, 8)) = nc_buffer(my - j, :) - end do + if (lat_south_to_north) then + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(:, my - j) + end do + else + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(:, j + 1) + end do + end if + else + if (lat_south_to_north) then + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(my - j, :) + end do + else + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(j + 1, :) + end do + end if end if deallocate(nc_buffer) - ! do j=1, my - ! i = i - 1 - ! topo((i - 1) * mx + 1:) - ! print *, mx, my - ! do j=1, my - ! row_index = row_index - 1 - ! print *, row_index - ! print *, (row_index-1)*mx + 1, (row_index-1)*mx + mx - ! ! call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & - ! ! topo)) - ! ! call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & - ! ! topo((row_index-1)*mx + 1:(row_index-1)*mx + mx), & - ! ! start = (/ 1, j /), count=(/ mx, 1 /))) - ! call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & - ! nc_buffer, start = (/ 1, j /), count=(/ mx, 1 /))) - ! topo((row_index - 1) * mx + 1:(row_index - 1) * mx + mx) = & - ! nc_buffer - ! end do - - ! Check if the topography was defined positive down and flip the - ! sign if need be. Just in case this is true but topo_type < 0 - ! we do not do anything here on this to avoid doing it twice. + ! ---------------------------------------------------------------- + ! Fill-value check (descriptor only). + ! Python has already verified no fill values in the crop region, + ! but we keep this as a safety net for edge-case mismatches. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. nc_has_fill(topo_idx)) then + do i = 1, mtot + if (abs(topo(i) - nc_fill_value(topo_idx)) < & + 1.0d-6 * max(abs(nc_fill_value(topo_idx)), 1.0d0)) then + if (trim(nc_fill_action(topo_idx)) == 'abort') then + print *, "ERROR: Fill value found in topo file ", & + trim(fname), " at index ", i + print *, " fill_value = ", nc_fill_value(topo_idx) + print *, " Set fill_action = warn to replace with topo_missing." + stop + else + topo(i) = topo_missing + end if + end if + end do + end if + + ! Honour 'positive = down' attribute (flip sign for depth-positive files). + ! Only flip when topo_type > 0 to avoid double-flip for negative types. ios = nf90_get_att(nc_file, z_var_id, 'positive', direction) - call check_netcdf_error(nf90_close(nc_file)) if (ios == NF90_NOERR) then - if (to_lower(direction) == "down") then - if (topo_type < 0) then - topo = -topo - endif + if (to_lower(direction) == "down" .and. topo_type > 0) then + topo(1:mtot) = -topo(1:mtot) end if end if end if + + call check_netcdf_error(nf90_close(nc_file)) #else print *, "ERROR: NetCDF library was not included in this build" print *, " of GeoClaw." @@ -783,7 +898,7 @@ end subroutine read_topo_file ! - xll,yll,xhi,yhi - (float) Lower and upper coordinates for grid ! - dx,dy - (float) Spatial resolution of grid ! ======================================================================== - subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) + subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy,topo_idx) #ifdef NETCDF use netcdf #endif @@ -798,6 +913,7 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) integer, intent(in) :: topo_type integer, intent(out) :: mx, my real(kind=8), intent(out) :: xll, yll, xhi, yhi, dx, dy + integer, intent(in), optional :: topo_idx ! Local integer, parameter :: iunit = 19 @@ -810,9 +926,6 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) logical :: xll_registered, yll_registered ! NetCDF Support - ! character(len=1) :: axis_string - ! character(len=6) :: convention_string - ! integer(kind=4) :: convention_version integer(kind=4) :: nc_file real(kind=8), allocatable :: xlocs(:),ylocs(:) logical, allocatable :: x_in_dom(:),y_in_dom(:) @@ -928,116 +1041,118 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) case(4) #ifdef NETCDF - ! Open file + ! Open file call check_netcdf_error(nf90_open(fname, nf90_nowrite, nc_file)) - ! NetCDF4 GEBCO topography, should conform to CF metadata - ! standard - ! ios = nf90_get_att(nc_file, NF90_GLOBAL, 'Conventions', convention_string) - ! if (verbose) then - ! print *, convention_string - ! end if - ! if (ios /= NF90_NOERR .or. convention_string(1:3) /= "CF-") then - ! print *, "Topography file does not conform to the CF" - ! print *, "conventions and meta-data. Please see the" - ! print *, "information at " - ! print *, "" - ! print *, " cfconventions.org" - ! print *, "" - ! print *, "to find out more info." - ! stop - ! end if - - ! call parse_values(convention_string(4:6), num_values, convention_version) - ! if (convention_version(1) < CONVENTION_REQUIREMENT) then - ! print *, "Topography file conforms to a CF version that" - ! print *, "is too old (", convention_version, "). " - ! print *, "Please refer to" - ! print *, "" - ! print *, " cfconventions.org" - ! print *, "" - ! print *, "to find out more info." - ! stop - ! end if - - ! get x and y dimension info - call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, & - num_vars)) - call get_dim_info(nc_file, num_dims_tot, x_dim_id, x_dim_name, & - mx, y_dim_id, y_dim_name, my) - - ! allocate vector to hold lon and lat vals and vector for var ids - allocate(xlocs(mx),ylocs(my),x_in_dom(mx),y_in_dom(my),var_ids(num_vars)) - - if (verbose) then - print *, "Names = (", x_dim_name, ", ", y_dim_name, ")" - print *, "Size = (", mx, ", ", my, ")" - end if - - ! Read in variables - call check_netcdf_error(nf90_inq_varids(nc_file, num_vars, var_ids)) - if (verbose) then - print *, "n, var_name, var_type, num_dims, dim_ids" + ! ---------------------------------------------------------------- + ! Discover spatial dimensions. + ! When a descriptor was provided (topo_idx present, lon_name set) + ! look up dimension IDs by name directly. Otherwise fall back to + ! the name-heuristic in get_dim_info. + ! ---------------------------------------------------------------- + call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, num_vars)) + + if (present(topo_idx) .and. & + len_trim(nc_lon_name(topo_idx)) > 0) then + ! Descriptor path: exact name look-up + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lon_name(topo_idx)), x_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + x_dim_id, x_dim_name, mx)) + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lat_name(topo_idx)), y_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + y_dim_id, y_dim_name, my)) + else + ! Fallback: auto-discover by name matching + call get_dim_info(nc_file, num_dims_tot, x_dim_id, & + x_dim_name, mx, y_dim_id, y_dim_name, my) end if - do n=1, num_vars - call check_netcdf_error(nf90_inquire_variable(nc_file, n, var_name, var_type, num_dims, dim_ids)) - if (verbose) then - print *, n, ": ", var_name, "|", var_type, "|", num_dims, "|", dim_ids - end if - - ! Assume dim names are same as var ids - if (var_name == x_dim_name) then - x_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, x_var_name, x_var_id)) - ! x_var_id = n - ! x_var_name = var_name - else if (var_name == y_dim_name) then - y_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, y_var_name, y_var_id)) - ! y_var_id = n - ! y_var_name = var_name - ! Assume that this is the topography data - else if (num_dims == 2) then - z_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, z_var_name, z_var_id)) - ! z_var_id = n - ! z_var_name = var_name - else - if (verbose) then - print *, "Not using var_id ", n + allocate(xlocs(mx), ylocs(my), x_in_dom(mx), y_in_dom(my)) + + ! ---------------------------------------------------------------- + ! Look up coordinate variable IDs (CF convention: var name == dim name). + ! ---------------------------------------------------------------- + call check_netcdf_error(nf90_inq_varid(nc_file, trim(x_dim_name), x_var_id)) + call check_netcdf_error(nf90_inq_varid(nc_file, trim(y_dim_name), y_var_id)) + + ! ---------------------------------------------------------------- + ! Find the topo data variable. + ! Descriptor path: look up by name. Fallback: first 2-D variable + ! that is not a coordinate variable. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. & + len_trim(nc_var_name(topo_idx)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_file, & + trim(nc_var_name(topo_idx)), z_var_id)) + else + z_var_id = -1 + do n = 1, num_vars + call check_netcdf_error(nf90_inquire_variable(nc_file, & + n, var_name, var_type, num_dims, dim_ids)) + if (var_name /= x_dim_name .and. & + var_name /= y_dim_name .and. num_dims == 2) then + z_var_id = n + exit end if + end do + if (z_var_id == -1) then + print *, "ERROR: Cannot find a 2-D topo variable in ", trim(fname) + print *, " Specify var_name in the topo descriptor block." + stop end if + end if - end do + ! ---------------------------------------------------------------- + ! Read coordinate arrays. + ! ---------------------------------------------------------------- + call check_netcdf_error(nf90_get_var(nc_file, x_var_id, xlocs, & + start=(/ 1 /), count=(/ mx /))) + call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, & + start=(/ 1 /), count=(/ my /))) + + dx = abs(xlocs(2) - xlocs(1)) + dy = abs(ylocs(2) - ylocs(1)) + + ! ---------------------------------------------------------------- + ! Select the subset of the file to load. + ! Descriptor path: use explicit crop_bounds (in FILE coordinates). + ! Fallback: clip to the AMR domain (with ghost-cell buffer). + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. nc_has_crop(topo_idx)) then + ! crop_bounds are in FILE coordinates; compare against + ! file-coord xlocs before applying lon_offset. + x_in_dom = (xlocs >= nc_crop_bounds(1, topo_idx)) .and. & + (xlocs <= nc_crop_bounds(2, topo_idx)) + y_in_dom = (ylocs >= nc_crop_bounds(3, topo_idx)) .and. & + (ylocs <= nc_crop_bounds(4, topo_idx)) + else + x_in_dom = (xlocs > (xlower - dx - hxposs(1)*nghost)) .and. & + (xlocs < (xupper + dx + hxposs(1)*nghost)) + y_in_dom = (ylocs > (ylower - dy - hyposs(1)*nghost)) .and. & + (ylocs < (yupper + dy + hyposs(1)*nghost)) + end if - if (verbose) then - print *, "x_var_name, x_var_id = ", x_var_name, x_var_id - print *, "y_var_name, y_var_id = ", y_var_name, y_var_id - print *, "z_var_name, z_var_id = ", z_var_name, z_var_id + ! Apply lon_offset AFTER crop_bounds selection: converts file + ! coordinates to domain coordinates. crop_bounds above were + ! compared against file-coord xlocs; xll/xhi below will be in + ! domain coordinates as GeoClaw expects. + if (present(topo_idx)) then + if (nc_lon_offset(topo_idx) /= 0.0d0) then + xlocs = xlocs + nc_lon_offset(topo_idx) + end if end if - call check_netcdf_error(nf90_get_var(nc_file, x_var_id, xlocs, start=(/ 1 /), count=(/ mx /))) - call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, start=(/ 1 /), count=(/ my /))) - - dx = xlocs(2) - xlocs(1) - dy = ylocs(2) - ylocs(1) - - ! find which locs are within domain (with a dx/dy buffer around domain + ghost cells) - x_in_dom = (xlocs.gt.(xlower-dx-hxposs(1)*nghost)) .and. (xlocs.lt.(xupper+dx+hxposs(1)*nghost)) - y_in_dom = (ylocs.gt.(ylower-dy-hyposs(1)*nghost)) .and. (ylocs.lt.(yupper+dy+hyposs(1)*nghost)) - xll = minval(xlocs, mask=x_in_dom) yll = minval(ylocs, mask=y_in_dom) xhi = maxval(xlocs, mask=x_in_dom) yhi = maxval(ylocs, mask=y_in_dom) - - ! adjust mx, my - mx = count(x_in_dom) - my = count(y_in_dom) + mx = count(x_in_dom) + my = count(y_in_dom) call check_netcdf_error(nf90_close(nc_file)) - deallocate(xlocs,ylocs,x_in_dom,y_in_dom) + deallocate(xlocs, ylocs, x_in_dom, y_in_dom) #else print *, "ERROR: NetCDF library was not included in this build" print *, " of GeoClaw." @@ -1383,207 +1498,204 @@ subroutine read_dtopo_header(fname,topo_type,mx,my,mt,xlow,ylow,t0,xhi, & end subroutine read_dtopo_header - - -recursive subroutine topoarea(x1,x2,y1,y2,m,area) + recursive subroutine topoarea(x1,x2,y1,y2,m,area) - ! Compute the area of overlap of topo with the rectangle (x1,x2) x (y1,y2) - ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) - ! (coarse to fine). + ! Compute the area of overlap of topo with the rectangle (x1,x2) x (y1,y2) + ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) + ! (coarse to fine). - ! The main call to this subroutine has corners of a physical domain for - ! the rectangle and m = 1 in order to compute the area of overlap of - ! domain by all topo arrays. Used to check inputs and insure topo - ! covers domain. + ! The main call to this subroutine has corners of a physical domain for + ! the rectangle and m = 1 in order to compute the area of overlap of + ! domain by all topo arrays. Used to check inputs and insure topo + ! covers domain. - ! The recursive strategy is to first compute the area using only topo - ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then apply corrections due to adding topo array mtopoorder(m). - - ! Corrections are needed if the new topo array intersects the grid cell. - ! Let the intersection be (x1m,x2m) x (y1m,y2m). - ! Two corrections are needed, first to subtract out the area over - ! the rectangle (x1m,x2m) x (y1m,y2m) computed using - ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then adding in the area over this same region using - ! topo array mtopoorder(m). + ! The recursive strategy is to first compute the area using only topo + ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then apply corrections due to adding topo array mtopoorder(m). + + ! Corrections are needed if the new topo array intersects the grid cell. + ! Let the intersection be (x1m,x2m) x (y1m,y2m). + ! Two corrections are needed, first to subtract out the area over + ! the rectangle (x1m,x2m) x (y1m,y2m) computed using + ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then adding in the area over this same region using + ! topo array mtopoorder(m). - ! Based on the recursive routine rectintegral that integrates - ! topo over grid cells using a similar strategy. + ! Based on the recursive routine rectintegral that integrates + ! topo over grid cells using a similar strategy. - implicit none + implicit none - ! arguments - real (kind=8), intent(in) :: x1,x2,y1,y2 - integer, intent(in) :: m - real (kind=8), intent(out) :: area + ! arguments + real (kind=8), intent(in) :: x1,x2,y1,y2 + integer, intent(in) :: m + real (kind=8), intent(out) :: area - ! local - real(kind=8) :: xmlo,xmhi,ymlo,ymhi,x1m,x2m, & - y1m,y2m, area1,area2,area_m - integer :: mfid, indicator - integer(kind=8) :: i0 - real(kind=8), external :: topointegral + ! local + real(kind=8) :: xmlo,xmhi,ymlo,ymhi,x1m,x2m, & + y1m,y2m, area1,area2,area_m + integer :: mfid, indicator + integer(kind=8) :: i0 + real(kind=8), external :: topointegral - mfid = mtopoorder(m) - i0=i0topo(mfid) + mfid = mtopoorder(m) + i0=i0topo(mfid) - if (m == mtopofiles) then - ! innermost step of recursion reaches this point. - ! only using coarsest topo grid -- compute directly... - call intersection(indicator,area,xmlo,xmhi, & - ymlo,ymhi, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + if (m == mtopofiles) then + ! innermost step of recursion reaches this point. + ! only using coarsest topo grid -- compute directly... + call intersection(indicator,area,xmlo,xmhi, & + ymlo,ymhi, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) - else - ! recursive call to compute area using one fewer topo grids: - call topoarea(x1,x2,y1,y2,m+1,area1) + else + ! recursive call to compute area using one fewer topo grids: + call topoarea(x1,x2,y1,y2,m+1,area1) - ! region of intersection of cell with new topo grid: - call intersection(indicator,area_m,x1m,x2m, & - y1m,y2m, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + ! region of intersection of cell with new topo grid: + call intersection(indicator,area_m,x1m,x2m, & + y1m,y2m, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + + if (area_m > 0) then + + ! correction to subtract out from previous set of topo grids: + call topoarea(x1m,x2m,y1m,y2m,m+1,area2) - if (area_m > 0) then - - ! correction to subtract out from previous set of topo grids: - call topoarea(x1m,x2m,y1m,y2m,m+1,area2) - - ! adjust integral due to corrections for new topo grid: - area = area1 - area2 + area_m - else - area = area1 + ! adjust integral due to corrections for new topo grid: + area = area1 - area2 + area_m + else + area = area1 + endif endif - endif -end subroutine topoarea + end subroutine topoarea - -recursive subroutine rectintegral(x1,x2,y1,y2,m,integral) + recursive subroutine rectintegral(x1,x2,y1,y2,m,integral) - ! Compute the integral of topo over the rectangle (x1,x2) x (y1,y2) - ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) - ! (coarse to fine). + ! Compute the integral of topo over the rectangle (x1,x2) x (y1,y2) + ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) + ! (coarse to fine). - ! The main call to this subroutine has corners of a grid cell for the - ! rectangle and m = 1 in order to compute the integral over the cell - ! using all topo arrays. + ! The main call to this subroutine has corners of a grid cell for the + ! rectangle and m = 1 in order to compute the integral over the cell + ! using all topo arrays. - ! The recursive strategy is to first compute the integral using only topo - ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then apply corrections due to adding topo array mtopoorder(m). - - ! Corrections are needed if the new topo array intersects the grid cell. - ! Let the intersection be (x1m,x2m) x (y1m,y2m). - ! Two corrections are needed, first to subtract out the integral over - ! the rectangle (x1m,x2m) x (y1m,y2m) computed using - ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then adding in the integral over this same region using - ! topo array mtopoorder(m). + ! The recursive strategy is to first compute the integral using only topo + ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then apply corrections due to adding topo array mtopoorder(m). + + ! Corrections are needed if the new topo array intersects the grid cell. + ! Let the intersection be (x1m,x2m) x (y1m,y2m). + ! Two corrections are needed, first to subtract out the integral over + ! the rectangle (x1m,x2m) x (y1m,y2m) computed using + ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then adding in the integral over this same region using + ! topo array mtopoorder(m). - ! Note that the function topointegral returns the integral over the - ! rectangle based on a single topo array, and that routine calls - ! bilinearintegral. + ! Note that the function topointegral returns the integral over the + ! rectangle based on a single topo array, and that routine calls + ! bilinearintegral. - implicit none + implicit none + + ! arguments + real (kind=8), intent(in) :: x1,x2,y1,y2 + integer, intent(in) :: m + real (kind=8), intent(out) :: integral + + ! local + real(kind=8) :: xmlo,xmhi,ymlo,ymhi,area,x1m,x2m, & + y1m,y2m, int1,int2,int3 + integer :: mfid, indicator + integer(kind=8) :: i0 + real(kind=8), external :: topointegral + + + mfid = mtopoorder(m) + i0=i0topo(mfid) + + if (m == mtopofiles) then + ! innermost step of recursion reaches this point. + ! only using coarsest topo grid -- compute directly... + call intersection(indicator,area,xmlo,xmhi, & + ymlo,ymhi, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + + if (indicator.eq.1) then + ! cell overlaps the file + ! integrate surface over intersection of grid and cell + integral = topointegral( xmlo,xmhi,ymlo, & + ymhi,xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & + dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) + else + integral = 0.d0 + endif + + else + ! recursive call to compute area using one fewer topo grids: + call rectintegral(x1,x2,y1,y2,m+1,int1) - ! arguments - real (kind=8), intent(in) :: x1,x2,y1,y2 - integer, intent(in) :: m - real (kind=8), intent(out) :: integral - - ! local - real(kind=8) :: xmlo,xmhi,ymlo,ymhi,area,x1m,x2m, & - y1m,y2m, int1,int2,int3 - integer :: mfid, indicator - integer(kind=8) :: i0 - real(kind=8), external :: topointegral - - - mfid = mtopoorder(m) - i0=i0topo(mfid) - - if (m == mtopofiles) then - ! innermost step of recursion reaches this point. - ! only using coarsest topo grid -- compute directly... - call intersection(indicator,area,xmlo,xmhi, & - ymlo,ymhi, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) - - if (indicator.eq.1) then - ! cell overlaps the file - ! integrate surface over intersection of grid and cell - integral = topointegral( xmlo,xmhi,ymlo, & - ymhi,xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & - dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) - else - integral = 0.d0 - endif - - else - ! recursive call to compute area using one fewer topo grids: - call rectintegral(x1,x2,y1,y2,m+1,int1) - - ! region of intersection of cell with new topo grid: - call intersection(indicator,area,x1m,x2m, & - y1m,y2m, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + ! region of intersection of cell with new topo grid: + call intersection(indicator,area,x1m,x2m, & + y1m,y2m, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + + if (area > 0) then + + ! correction to subtract out from previous set of topo grids: + call rectintegral(x1m,x2m,y1m,y2m,m+1,int2) - if (area > 0) then + ! correction to add in for new topo grid: + int3 = topointegral(x1m,x2m, y1m,y2m, & + xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & + dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) - ! correction to subtract out from previous set of topo grids: - call rectintegral(x1m,x2m,y1m,y2m,m+1,int2) - - ! correction to add in for new topo grid: - int3 = topointegral(x1m,x2m, y1m,y2m, & - xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & - dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) - - ! adjust integral due to corrections for new topo grid: - integral = int1 - int2 + int3 - else - integral = int1 + ! adjust integral due to corrections for new topo grid: + integral = int1 - int2 + int3 + else + integral = int1 + endif endif - endif -end subroutine rectintegral + end subroutine rectintegral -subroutine intersection(indicator,area,xintlo,xinthi, & - yintlo,yinthi,x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi) + subroutine intersection(indicator,area,xintlo,xinthi, & + yintlo,yinthi,x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi) - ! find the intersection of two rectangles, return the intersection - ! and it's area, and indicator =1 - ! if there is no intersection, indicator =0 + ! find the intersection of two rectangles, return the intersection + ! and it's area, and indicator =1 + ! if there is no intersection, indicator =0 - implicit none + implicit none - integer, intent(out) :: indicator + integer, intent(out) :: indicator - real(kind=8), intent(in) :: x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi - real(kind=8), intent(out) :: area,xintlo,xinthi,yintlo,yinthi + real(kind=8), intent(in) :: x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi + real(kind=8), intent(out) :: area,xintlo,xinthi,yintlo,yinthi - xintlo=dmax1(x1lo,x2lo) - xinthi=dmin1(x1hi,x2hi) - yintlo=dmax1(y1lo,y2lo) - yinthi=dmin1(y1hi,y2hi) + xintlo=dmax1(x1lo,x2lo) + xinthi=dmin1(x1hi,x2hi) + yintlo=dmax1(y1lo,y2lo) + yinthi=dmin1(y1hi,y2hi) - if (xinthi.gt.xintlo.and.yinthi.gt.yintlo) then - area = (xinthi-xintlo)*(yinthi-yintlo) - indicator = 1 - else - area = 0.d0 - indicator = 0 - endif + if (xinthi.gt.xintlo.and.yinthi.gt.yintlo) then + area = (xinthi-xintlo)*(yinthi-yintlo) + indicator = 1 + else + area = 0.d0 + indicator = 0 + endif -end subroutine intersection + end subroutine intersection ! :TODO: Use the utility module's version of these #ifdef NETCDF @@ -1602,7 +1714,103 @@ subroutine check_netcdf_error(ios) end if end subroutine check_netcdf_error +#endif + ! ======================================================================== + ! subroutine read_netcdf_descriptor(iunit, idx) + ! + ! Read the optional key=value block that follows the topo_type line in + ! topo.data for a type-4 (NetCDF) entry. Lines are consumed until either + ! a blank line or EOF is reached. Parsed values are stored in the module- + ! level nc_* arrays at position idx. + ! + ! Format (Python DescriptorWriter output): + ! var_name = z + ! lon_name = longitude + ! lat_name = latitude + ! lon_offset = 0.0 + ! lat_order = N_to_S + ! dim_order = lat,lon + ! fill_value = -9999.0 + ! fill_action = abort + ! crop_bounds = -180.0 180.0 -90.0 90.0 + ! + ! Unknown keys produce a warning and are skipped. + ! ======================================================================== + subroutine read_netcdf_descriptor(iunit, idx) + + use geoclaw_module, only: GEO_PARM_UNIT + + implicit none + + integer, intent(in) :: iunit, idx + + character(len=512) :: line, key, val + integer :: eq_pos, comment_pos, ios + + do + read(iunit, '(a)', iostat=ios) line + if (ios /= 0) exit ! EOF + + ! Strip inline comment (! character) + comment_pos = index(line, '!') + if (comment_pos > 0) line = line(1:comment_pos-1) + line = adjustl(trim(line)) + + ! Blank line terminates the descriptor block + if (len_trim(line) == 0) exit + + ! Skip lines without an '=' sign + eq_pos = index(line, '=') + if (eq_pos == 0) cycle + + key = adjustl(trim(line(1:eq_pos-1))) + val = adjustl(trim(line(eq_pos+1:))) + + select case (trim(key)) + case ('var_name') + nc_var_name(idx) = trim(val) + case ('lon_name') + nc_lon_name(idx) = trim(val) + case ('lat_name') + nc_lat_name(idx) = trim(val) + case ('lon_offset') + read(val, *) nc_lon_offset(idx) + case ('lat_order') + nc_lat_order(idx) = trim(val) + case ('dim_order') + nc_dim_order(idx) = trim(val) + case ('fill_value') + read(val, *) nc_fill_value(idx) + nc_has_fill(idx) = .true. + case ('fill_action') + nc_fill_action(idx) = trim(val) + case ('crop_bounds') + read(val, *) nc_crop_bounds(1, idx), nc_crop_bounds(2, idx), & + nc_crop_bounds(3, idx), nc_crop_bounds(4, idx) + nc_has_crop(idx) = .true. + case default + write(GEO_PARM_UNIT, *) 'WARNING: Unknown NetCDF descriptor key: ', & + trim(key) + end select + end do + + ! Log what was parsed + write(GEO_PARM_UNIT, *) ' NetCDF descriptor for topo file ', idx, ':' + write(GEO_PARM_UNIT, *) ' var_name = ', trim(nc_var_name(idx)) + write(GEO_PARM_UNIT, *) ' lon_name = ', trim(nc_lon_name(idx)) + write(GEO_PARM_UNIT, *) ' lat_name = ', trim(nc_lat_name(idx)) + write(GEO_PARM_UNIT, *) ' lon_offset = ', nc_lon_offset(idx) + write(GEO_PARM_UNIT, *) ' lat_order = ', trim(nc_lat_order(idx)) + write(GEO_PARM_UNIT, *) ' fill_action = ', trim(nc_fill_action(idx)) + if (nc_has_fill(idx)) & + write(GEO_PARM_UNIT, *) ' fill_value = ', nc_fill_value(idx) + if (nc_has_crop(idx)) & + write(GEO_PARM_UNIT, *) ' crop_bounds = ', nc_crop_bounds(:, idx) + + end subroutine read_netcdf_descriptor + +#ifdef NETCDF subroutine get_dim_info(nc_file, ndims, x_dim_id, x_dim_name, mx, & y_dim_id, y_dim_name, my) use netcdf @@ -1630,18 +1838,18 @@ subroutine get_dim_info(nc_file, ndims, x_dim_id, x_dim_name, mx, & end subroutine get_dim_info #endif -function Upper(s1) RESULT (s2) - CHARACTER(*) :: s1 - CHARACTER(LEN(s1)) :: s2 - CHARACTER :: ch - INTEGER,PARAMETER :: DUC = ICHAR('A') - ICHAR('a') - INTEGER :: i - - DO i = 1,LEN(s1) - ch = s1(i:i) - IF (ch >= 'a'.AND.ch <= 'z') ch = CHAR(ICHAR(ch)+DUC) - s2(i:i) = ch - END DO -END function Upper + function Upper(s1) RESULT (s2) + CHARACTER(*) :: s1 + CHARACTER(LEN(s1)) :: s2 + CHARACTER :: ch + INTEGER,PARAMETER :: DUC = ICHAR('A') - ICHAR('a') + INTEGER :: i + + DO i = 1,LEN(s1) + ch = s1(i:i) + IF (ch >= 'a'.AND.ch <= 'z') ch = CHAR(ICHAR(ch)+DUC) + s2(i:i) = ch + END DO + END function Upper end module topo_module diff --git a/src/python/geoclaw/data.py b/src/python/geoclaw/data.py index 4ad0668fb..e30df379e 100755 --- a/src/python/geoclaw/data.py +++ b/src/python/geoclaw/data.py @@ -200,14 +200,23 @@ def write(self,data_source='setrun.py', out_file='topo.data'): tfile = [tfile[0], tfile[-1]] # drop minlevel,maxlevel,t1,t2 elif len(tfile) == 2: pass # now expect only topo_type, filename + elif len(tfile) == 3: + pass # [topo_type, filename, TopoMetadata] for NetCDF type 4 else: raise ValueError('Unexpected len(tfile) = %i' % len(tfile)) # if path is relative in setrun, assume it's relative to the # same directory that out_file comes from - fname = os.path.abspath(os.path.join(os.path.dirname(out_file),tfile[-1])) + fname = os.path.abspath(os.path.join(os.path.dirname(out_file),tfile[1])) self._out_file.write("\n'%s' \n " % fname) self._out_file.write("%3i # topo_type\n" % tfile[0]) + + # For type-4 (NetCDF) entries that carry a TopoMetadata object, + # write the descriptor key=value block consumed by + # read_netcdf_descriptor in topo_module.f90. + if tfile[0] == 4 and len(tfile) == 3 and tfile[2] is not None: + from clawpack.geoclaw.netcdf_utils import DescriptorWriter + DescriptorWriter.write_topo_descriptor(self._out_file, tfile[2]) elif self.test_topography == 1: self.data_write(name='topo_location',description='(Bathymetry jump location)') self.data_write(name='topo_left',description='(Depth to left of bathy_location)') diff --git a/src/python/geoclaw/meson.build b/src/python/geoclaw/meson.build index 3a835d77d..0509994be 100644 --- a/src/python/geoclaw/meson.build +++ b/src/python/geoclaw/meson.build @@ -13,6 +13,7 @@ python_sources = { 'kmltools.py', 'marching_front.py', 'most2geoclaw.py', + 'netcdf_utils.py', 'nonuniform_grid_tools.py', 'okada.py', 'plotfg.py', diff --git a/src/python/geoclaw/netcdf_utils.py b/src/python/geoclaw/netcdf_utils.py new file mode 100644 index 000000000..362e44140 --- /dev/null +++ b/src/python/geoclaw/netcdf_utils.py @@ -0,0 +1,1232 @@ +#!/usr/bin/env python +# encoding: utf-8 +r""" +NetCDF utilities for GeoClaw NetCDF input. + +Provides interrogator classes that inspect NetCDF files for coordinate +metadata, unit consistency, and fill values without loading data arrays. +The interrogators produce metadata dataclasses consumed by DescriptorWriter +(not yet implemented — pending confirmation of Fortran topo.data parser). + +Classes +------- +NetCDFInterrogator + Base class: opens a file, discovers coordinate variables, detects lon/lat + conventions, resolves fill value, validates crop bounds. + +TopoInterrogator(NetCDFInterrogator) + Adds bathymetry-specific checks: fill values within crop region (fatal), + unit verification against GEOCLAW_NETCDF_UNITS['topo']. + +MetInterrogator(NetCDFInterrogator) + Adds met-forcing checks: multi-variable grid/time consistency, unit + verification and conversion to contract units, CF time -> seconds offset, + ensemble dimension detection. + +CFNormalizer + Standalone utility: renames coordinates to CF standard names, adds/fixes + standard_name / axis / units / _FillValue attributes, resolves + _FillValue vs missing_value conflicts. Does not resample or reproject. + +Metadata dataclasses +-------------------- +FileMetadata, TopoMetadata, MetVariableInfo, MetMetadata +""" + +from __future__ import annotations + +import dataclasses +import warnings +from pathlib import Path +from typing import Optional + +import numpy as np +import xarray as xr + +from clawpack.geoclaw.units import GEOCLAW_NETCDF_UNITS, convert as units_convert + + +# --------------------------------------------------------------------------- +# CF unit alias table +# Maps CF-style unit strings to the abbreviations understood by units.convert() +# --------------------------------------------------------------------------- + +# For each contract role, the set of CF unit strings that mean "already correct" +_CONTRACT_UNIT_CF_ALIASES: dict[str, frozenset[str]] = { + 'm': frozenset({'m', 'meter', 'meters', 'metre', 'metres'}), + 'm/s': frozenset({'m/s', 'm s-1', 'meters per second', 'metres per second', + 'meter per second', 'metre per second', 'm s**-1'}), + 'Pa': frozenset({'Pa', 'pa', 'pascal', 'pascals', 'Pascal', 'Pascals', + 'N/m2', 'N m-2', 'kg m-1 s-2'}), + 's': frozenset({'s', 'sec', 'second', 'seconds'}), +} + +# Maps CF unit strings to units.py abbreviations for conversion +_CF_TO_UNITS_PY: dict[str, str] = { + # length + 'm': 'm', 'meter': 'm', 'meters': 'm', 'metre': 'm', 'metres': 'm', + 'cm': 'cm', 'centimeter': 'cm', 'centimeters': 'cm', + 'km': 'km', 'kilometer': 'km', 'kilometers': 'km', + # speed + 'm/s': 'm/s', 'm s-1': 'm/s', 'm s**-1': 'm/s', + 'meters per second': 'm/s', 'metres per second': 'm/s', + 'knots': 'knots', 'kt': 'knots', 'kts': 'knots', + 'km/h': 'km/h', 'km h-1': 'km/h', + 'mph': 'mph', + # pressure + 'Pa': 'Pa', 'pa': 'Pa', 'pascal': 'Pa', 'pascals': 'Pa', + 'hPa': 'hPa', 'hectopascal': 'hPa', 'hectopascals': 'hPa', + 'mbar': 'mbar', 'mb': 'mbar', 'millibar': 'mbar', 'millibars': 'mbar', + # time + 's': 's', 'sec': 's', 'second': 's', 'seconds': 's', + 'min': 'min', 'minute': 'min', 'minutes': 'min', + 'h': 'h', 'hr': 'h', 'hour': 'h', 'hours': 'h', + 'days': 'days', 'day': 'days', +} + + +def _normalize_cf_unit(cf_unit: str) -> Optional[str]: + """Return the units.py abbreviation for *cf_unit*, or None if unknown.""" + return _CF_TO_UNITS_PY.get(cf_unit.strip()) + + +def _unit_matches_contract(cf_unit: str, contract_unit: str) -> bool: + """Return True if *cf_unit* is equivalent to *contract_unit*.""" + aliases = _CONTRACT_UNIT_CF_ALIASES.get(contract_unit, frozenset()) + return cf_unit.strip() in aliases + + +# --------------------------------------------------------------------------- +# Metadata dataclasses +# --------------------------------------------------------------------------- + +@dataclasses.dataclass +class FileMetadata: + """Coordinate and convention metadata for a single NetCDF file.""" + source_file: Path + lon_name: str + lat_name: str + time_name: Optional[str] + lon_convention: int # 180 -> [-180, 180]; 360 -> [0, 360] + lat_order: str # 'N_to_S' or 'S_to_N' + dim_order: list[str] # canonical role names, e.g. ['lat', 'lon'] + fill_value: Optional[float] # resolved from _FillValue / missing_value + crop_bounds: Optional[tuple[float, float, float, float]] # lon0,lon1,lat0,lat1 + + +@dataclasses.dataclass +class TopoMetadata(FileMetadata): + """FileMetadata plus topo-specific fields.""" + var_name: str + source_units: str # units as found in file + fill_action: str # 'abort' (only value for topo; fill = fatal) + lon_offset: float = 0.0 # scalar Fortran adds to file coords: x_domain = x_file + lon_offset + + +@dataclasses.dataclass +class MetVariableInfo: + """Maps one NetCDF variable to its GeoClaw role.""" + var_name: str + geoclaw_role: str # 'wind_u', 'wind_v', 'pressure' + source_units: str # units as found in file + + +@dataclasses.dataclass +class MetMetadata(FileMetadata): + """FileMetadata plus met-forcing-specific fields.""" + variables: list[MetVariableInfo] + time_offset: float # seconds; Fortran adds this to times read from file + fill_action: str # 'abort' or 'warn' + + +# --------------------------------------------------------------------------- +# Base interrogator +# --------------------------------------------------------------------------- + +class NetCDFInterrogator: + """ + Open a NetCDF file and interrogate its coordinate metadata. + + No data arrays are loaded; only coordinate values and variable attributes + are accessed. Dask-lazy chunking is enabled so that any accidental + downstream `.compute()` calls are bounded. + + Parameters + ---------- + path : str or Path + Path to the NetCDF file. + crop_bounds : tuple of four floats, optional + (lon_min, lon_max, lat_min, lat_max) in the same convention as the + file. When provided, bounds are validated against file extent. + """ + + def __init__( + self, + path: str | Path, + crop_bounds: Optional[tuple[float, float, float, float]] = None, + ) -> None: + self.path = Path(path) + self.crop_bounds = crop_bounds + # Activate Dask-lazy chunking if dask is available; fall back to + # netCDF4 native lazy loading so dask is an optional dependency. + try: + import dask # noqa: F401 + chunks: dict | str = {} + except ImportError: + chunks = None + # mask_and_scale=True (default) so fill values appear as NaN. + self.ds: xr.Dataset = xr.open_dataset( + self.path, chunks=chunks, mask_and_scale=True + ) + + # ------------------------------------------------------------------ + # Coordinate discovery + # ------------------------------------------------------------------ + + def _find_coord_name( + self, + axis_letter: str, + std_names: list[str], + fallback_names: list[str], + ) -> Optional[str]: + """Find a coordinate by CF conventions: axis > standard_name > name.""" + # 1. Check axis attribute (highest CF priority) + for name, coord in self.ds.coords.items(): + if coord.attrs.get('axis', '').upper() == axis_letter: + return str(name) + # 2. Check standard_name + for name, coord in self.ds.coords.items(): + if coord.attrs.get('standard_name', '') in std_names: + return str(name) + # 3. Fallback: common names (case-insensitive) + coord_lower = {str(n).lower(): str(n) for n in self.ds.coords} + for fname in fallback_names: + if fname.lower() in coord_lower: + return coord_lower[fname.lower()] + return None + + def _require_coord( + self, + axis_letter: str, + std_names: list[str], + fallback_names: list[str], + label: str, + ) -> str: + name = self._find_coord_name(axis_letter, std_names, fallback_names) + if name is None: + raise ValueError( + f"Cannot find {label} coordinate in '{self.path}'. " + f"Expected axis='{axis_letter}', standard_name in {std_names}, " + f"or a name in {fallback_names}. " + f"Available coordinates: {list(self.ds.coords)}" + ) + return name + + def _find_lon_name(self) -> str: + return self._require_coord( + 'X', + ['longitude'], + ['longitude', 'lon', 'x', 'nav_lon', 'LONGITUDE', 'LON'], + 'longitude', + ) + + def _find_lat_name(self) -> str: + return self._require_coord( + 'Y', + ['latitude'], + ['latitude', 'lat', 'y', 'nav_lat', 'LATITUDE', 'LAT'], + 'latitude', + ) + + def _find_time_name(self) -> Optional[str]: + return self._find_coord_name( + 'T', + ['time'], + ['time', 't', 'TIME', 'valid_time'], + ) + + # ------------------------------------------------------------------ + # Convention detection + # ------------------------------------------------------------------ + + def _detect_lon_convention(self, lon_name: str) -> int: + """Return 180 if longitudes are in [-180, 180], else 360.""" + lon_max = float(self.ds[lon_name].max()) + return 360 if lon_max > 180.0 else 180 + + def _detect_lat_order(self, lat_name: str) -> str: + """Return 'N_to_S' if latitudes decrease, else 'S_to_N'.""" + lat_vals = self.ds[lat_name].values + if lat_vals[0] > lat_vals[-1]: + return 'N_to_S' + return 'S_to_N' + + def _detect_dim_order( + self, + var_name: str, + lon_name: str, + lat_name: str, + time_name: Optional[str], + ) -> list[str]: + """ + Return dimension order for *var_name* using canonical role names + ('lon', 'lat', 'time'). Unknown dims are passed through as-is. + """ + dim_map: dict[str, str] = {lon_name: 'lon', lat_name: 'lat'} + if time_name is not None: + dim_map[time_name] = 'time' + return [dim_map.get(d, d) for d in self.ds[var_name].dims] + + # ------------------------------------------------------------------ + # Fill value + # ------------------------------------------------------------------ + + def _resolve_fill_value(self, var_name: str) -> Optional[float]: + """ + Return fill value with CF-correct precedence: _FillValue > missing_value. + + Note: xarray with mask_and_scale=True has already decoded fill values + to NaN in the data arrays, but the original attribute is preserved. + """ + attrs = self.ds[var_name].attrs + if '_FillValue' in attrs: + return float(attrs['_FillValue']) + if 'missing_value' in attrs: + return float(attrs['missing_value']) + # xarray may have moved _FillValue to encoding dict + enc = self.ds[var_name].encoding + if '_FillValue' in enc: + return float(enc['_FillValue']) + return None + + # ------------------------------------------------------------------ + # Crop bounds validation + # ------------------------------------------------------------------ + + def _validate_crop_bounds( + self, + lon_name: str, + lat_name: str, + crop_bounds: tuple[float, float, float, float], + ) -> None: + """Raise ValueError if crop_bounds exceed file spatial extent.""" + lon_min, lon_max, lat_min, lat_max = crop_bounds + flon_min = float(self.ds[lon_name].min()) + flon_max = float(self.ds[lon_name].max()) + flat_min = float(self.ds[lat_name].min()) + flat_max = float(self.ds[lat_name].max()) + + if lon_min < flon_min or lon_max > flon_max: + raise ValueError( + f"crop_bounds lon [{lon_min}, {lon_max}] exceed file extent " + f"[{flon_min}, {flon_max}] in '{self.path}'." + ) + if lat_min < flat_min or lat_max > flat_max: + raise ValueError( + f"crop_bounds lat [{lat_min}, {lat_max}] exceed file extent " + f"[{flat_min}, {flat_max}] in '{self.path}'." + ) + + # ------------------------------------------------------------------ + # Core interrogation + # ------------------------------------------------------------------ + + def interrogate( + self, + var_name: str, + time_name_override: Optional[str] = None, + ) -> FileMetadata: + """ + Interrogate *var_name* and return a FileMetadata instance. + + Parameters + ---------- + var_name : str + Name of the primary data variable (used to determine dim_order). + time_name_override : str, optional + Force a specific time coordinate name instead of auto-detecting. + """ + if var_name not in self.ds: + raise KeyError( + f"Variable '{var_name}' not found in '{self.path}'. " + f"Available: {list(self.ds.data_vars)}" + ) + + lon_name = self._find_lon_name() + lat_name = self._find_lat_name() + time_name = ( + time_name_override + if time_name_override is not None + else self._find_time_name() + ) + + lon_convention = self._detect_lon_convention(lon_name) + lat_order = self._detect_lat_order(lat_name) + dim_order = self._detect_dim_order(var_name, lon_name, lat_name, time_name) + fill_value = self._resolve_fill_value(var_name) + + if self.crop_bounds is not None: + self._validate_crop_bounds(lon_name, lat_name, self.crop_bounds) + + return FileMetadata( + source_file=self.path, + lon_name=lon_name, + lat_name=lat_name, + time_name=time_name, + lon_convention=lon_convention, + lat_order=lat_order, + dim_order=dim_order, + fill_value=fill_value, + crop_bounds=self.crop_bounds, + ) + + def close(self) -> None: + self.ds.close() + + def __enter__(self) -> 'NetCDFInterrogator': + return self + + def __exit__(self, *args: object) -> None: + self.close() + + +# --------------------------------------------------------------------------- +# Topo interrogator +# --------------------------------------------------------------------------- + +class TopoInterrogator(NetCDFInterrogator): + """ + Interrogate a NetCDF bathymetry/topography file. + + Additional checks beyond the base class: + + * Verifies the data variable's units attribute matches the contract unit + (meters). If units are convertible via units.py, records the + source units in the metadata; Fortran will need a conversion factor. + If units are unrecognised, raises ValueError. + * Checks for fill values (NaN) within the crop region and raises + ValueError — silent NaN in bathymetry is numerically fatal. + + Parameters + ---------- + path : str or Path + Path to the NetCDF topography file. + var_name : str, optional + Name of the elevation/bathymetry variable (e.g. ``'z'``, + ``'elevation'``). If omitted, the variable is auto-detected by + searching for known CF ``standard_name`` values + (``surface_altitude``, ``height_above_mean_sea_level``, …) and + then falling back to common names (``z``, ``elevation``, ``topo``, + …). A ``ValueError`` is raised if no match is found. + crop_bounds : tuple of four floats, optional + (lon_min, lon_max, lat_min, lat_max). + """ + + def __init__( + self, + path: str | Path, + var_name: Optional[str] = None, + crop_bounds: Optional[tuple[float, float, float, float]] = None, + ) -> None: + super().__init__(path, crop_bounds) + self.var_name = var_name + + # ------------------------------------------------------------------ + # Variable auto-detection + # ------------------------------------------------------------------ + + #: CF standard_names that indicate an elevation / bathymetry variable, + #: ordered from most specific to most generic. + _TOPO_CF_STANDARD_NAMES: tuple[str, ...] = ( + 'surface_altitude', + 'height_above_mean_sea_level', + 'height_above_reference_ellipsoid', + 'bedrock_altitude', + 'altitude', + 'height', + 'sea_floor_depth_below_geoid', + ) + + #: Common variable names for elevation / bathymetry data. + _TOPO_FALLBACK_NAMES: tuple[str, ...] = ( + 'z', 'Z', + 'elevation', 'Elevation', 'ELEVATION', + 'topo', 'TOPO', + 'height', 'HEIGHT', + 'altitude', 'ALTITUDE', + 'depth', 'DEPTH', + 'Band1', + 'dem', 'DEM', + 'topography', 'bathymetry', 'bathy', + 'bedrock', + ) + + def _find_topo_var_name(self) -> str: + """ + Auto-detect the elevation/bathymetry variable. + + Search order: + 1. CF ``standard_name`` attribute against known elevation standard names. + 2. Variable name against ``_TOPO_FALLBACK_NAMES`` (case-sensitive). + + Returns the matched variable name. Raises ValueError if nothing is found. + """ + # 1. CF standard_name lookup across all data variables + for std_name in self._TOPO_CF_STANDARD_NAMES: + for var_name, var in self.ds.data_vars.items(): + if var.attrs.get('standard_name', '') == std_name: + return str(var_name) + + # 2. Name-based fallback + available = {str(k) for k in self.ds.data_vars} + for name in self._TOPO_FALLBACK_NAMES: + if name in available: + return name + + raise ValueError( + f"Cannot auto-detect elevation variable in '{self.path}'. " + f"No data variable matched known CF standard_names " + f"{self._TOPO_CF_STANDARD_NAMES} or fallback names " + f"{self._TOPO_FALLBACK_NAMES}. " + f"Pass var_name explicitly. " + f"Available data variables: {sorted(available)}" + ) + + # ------------------------------------------------------------------ + # Unit verification + # ------------------------------------------------------------------ + + def _check_topo_units(self) -> str: + """ + Verify units of *var_name* match GEOCLAW_NETCDF_UNITS['topo'] ('m'). + + Returns the units string found in the file. Raises ValueError if the + units are not recognisable or not convertible to meters. + """ + contract = GEOCLAW_NETCDF_UNITS['topo'] # 'm' + cf_unit = self.ds[self.var_name].attrs.get('units', '') + + if not cf_unit: + warnings.warn( + f"Variable '{self.var_name}' in '{self.path}' has no 'units' " + f"attribute. Assuming '{contract}' (contract unit for topo). " + f"If the data are not in meters, results will be incorrect.", + stacklevel=3, + ) + return contract + + if _unit_matches_contract(cf_unit, contract): + return cf_unit + + # Try to find a conversion path via units.py + canonical = _normalize_cf_unit(cf_unit) + if canonical is None: + raise ValueError( + f"Unrecognised units '{cf_unit}' on variable '{self.var_name}' " + f"in '{self.path}'. Contract requires '{contract}'. " + f"Add the unit string to _CF_TO_UNITS_PY or pre-convert the file." + ) + # Verify units.convert() can handle the conversion (raises if not) + try: + units_convert(1.0, canonical, contract) + except (ValueError, KeyError) as exc: + raise ValueError( + f"Cannot convert units '{cf_unit}' -> '{contract}' for variable " + f"'{self.var_name}' in '{self.path}': {exc}" + ) from exc + + warnings.warn( + f"Variable '{self.var_name}' has units '{cf_unit}'; contract is " + f"'{contract}'. A unit conversion will be needed before Fortran " + f"reads this file. Pre-convert the data or implement unit scaling " + f"in the descriptor.", + stacklevel=3, + ) + return cf_unit + + # ------------------------------------------------------------------ + # Fill value check within crop region + # ------------------------------------------------------------------ + + def _check_fill_in_crop( + self, + lon_name: str, + lat_name: str, + lat_order: str, + ) -> None: + """ + Check for NaN / fill values within the crop region. + + This triggers a bounded Dask computation (scalar boolean result). + Raises ValueError if any fill values are present — silent NaN in + bathymetry would silently corrupt simulation output. + """ + var = self.ds[self.var_name] + + if self.crop_bounds is not None: + lon_min, lon_max, lat_min, lat_max = self.crop_bounds + # For a decreasing lat axis (N_to_S) the slice must be reversed. + lat_slice = ( + slice(lat_max, lat_min) + if lat_order == 'N_to_S' + else slice(lat_min, lat_max) + ) + var = var.sel({ + lon_name: slice(lon_min, lon_max), + lat_name: lat_slice, + }) + + # This .compute() is intentional: we must confirm no fill values exist. + has_fill = bool(var.isnull().any().compute()) + if has_fill: + region_str = ( + f"crop region {self.crop_bounds}" if self.crop_bounds is not None + else "full file extent" + ) + raise ValueError( + f"Fill values (NaN) found in topography variable '{self.var_name}' " + f"within {region_str} of '{self.path}'. " + f"Silent NaN in bathymetry would corrupt simulation results. " + f"Fill holes before using this file." + ) + + # ------------------------------------------------------------------ + # Public interface + # ------------------------------------------------------------------ + + def interrogate_topo(self) -> TopoMetadata: + """ + Fully interrogate the topo file and return a TopoMetadata instance. + """ + if self.var_name is None: + self.var_name = self._find_topo_var_name() + base = self.interrogate(self.var_name) + source_units = self._check_topo_units() + self._check_fill_in_crop(base.lon_name, base.lat_name, base.lat_order) + + return TopoMetadata( + **dataclasses.asdict(base), + var_name=self.var_name, + source_units=source_units, + fill_action='abort', # fill in topo crop region is always fatal + lon_offset=0.0, + ) + + def topo_entries(self) -> list[list]: + """ + Return a list of ready-to-use topo entries for topofiles. + + Each entry is [4, filepath, TopoMetadata]. When no wrapping is needed + this returns a list of one entry. When crop_bounds straddle the file's + lon cut point, returns two entries pointing to the same file with + different lon_offset and crop_bounds values. + + crop_bounds on the interrogator are in domain coordinates; this method + converts them to file coordinates before storing in the returned + metadata. Fortran can then use crop_bounds directly against file + coordinate arrays before applying lon_offset. + """ + + # Interrogate without crop validation: self.crop_bounds is in domain + # coordinates, which may lie outside the file extent. + saved_crop = self.crop_bounds + self.crop_bounds = None + try: + meta = self.interrogate_topo() + finally: + self.crop_bounds = saved_crop + + if saved_crop is None: + return [[4, self.path, dataclasses.replace(meta, lon_offset=0.0)]] + + assert saved_crop is not None # narrowing hint: already returned above + lon_coords = self.ds[meta.lon_name].values + file_lon_min = float(lon_coords.min()) + file_lon_max = float(lon_coords.max()) + if len(lon_coords) > 1: + lon_resolution = float(abs(lon_coords[1] - lon_coords[0])) + else: + lon_resolution = 1e-10 # single-point file, no gap tolerance needed + crop_lon_min, crop_lon_max, crop_lat_min, crop_lat_max = saved_crop + + entries_spec = _compute_lon_entries( + file_lon_min, file_lon_max, crop_lon_min, crop_lon_max, + max_gap=lon_resolution, + ) + + result = [] + for file_crop_min, file_crop_max, lon_offset in entries_spec: + new_meta = dataclasses.replace( + meta, + crop_bounds=(file_crop_min, file_crop_max, crop_lat_min, crop_lat_max), + lon_offset=lon_offset, + ) + result.append([4, self.path, new_meta]) + + return result + + +# --------------------------------------------------------------------------- +# Met interrogator +# --------------------------------------------------------------------------- + +class MetInterrogator(NetCDFInterrogator): + """ + Interrogate a NetCDF meteorological forcing file. + + Additional checks beyond the base class: + + * Verifies that all requested variables share the same spatial grid and + time axis. + * Validates and (if needed) converts units to contract units via units.py. + * Converts CF time to seconds from a user-provided reference offset. + * Detects ensemble/member dimensions and raises if any are non-singleton. + + Parameters + ---------- + path : str or Path + Path to the NetCDF file. + variable_map : dict + Maps GeoClaw role strings to variable names in the file, e.g.:: + + {'wind_u': 'u10', 'wind_v': 'v10', 'pressure': 'msl'} + + crop_bounds : tuple of four floats, optional + (lon_min, lon_max, lat_min, lat_max). + time_reference : str or datetime-like, optional + Reference datetime for the time_offset calculation. The + ``time_offset`` written to the descriptor will be the number of + seconds between *time_reference* and the first time in the file. + Defaults to the Unix epoch (1970-01-01T00:00:00). + fill_action : str, optional + 'abort' or 'warn'. Met files may legitimately have fill values at + the domain edges (Fortran handles edge fill). Default is 'warn'. + """ + + def __init__( + self, + path: str | Path, + variable_map: dict[str, str], + crop_bounds: Optional[tuple[float, float, float, float]] = None, + time_reference: Optional[str] = None, + fill_action: str = 'warn', + ) -> None: + super().__init__(path, crop_bounds) + self.variable_map = variable_map # {geoclaw_role: var_name_in_file} + self.time_reference = time_reference + if fill_action not in ('abort', 'warn'): + raise ValueError(f"fill_action must be 'abort' or 'warn', got '{fill_action}'") + self.fill_action = fill_action + + # ------------------------------------------------------------------ + # Variable grid/time consistency + # ------------------------------------------------------------------ + + def _check_variable_consistency( + self, + lon_name: str, + lat_name: str, + time_name: str, + ) -> None: + """ + Verify all variables share the same spatial grid and time axis. + + Raises ValueError on any mismatch. + """ + ref_role = next(iter(self.variable_map)) + ref_var_name = self.variable_map[ref_role] + ref_dims = set(self.ds[ref_var_name].dims) + + for role, var_name in self.variable_map.items(): + if var_name not in self.ds: + raise KeyError( + f"Variable '{var_name}' (role '{role}') not found in " + f"'{self.path}'. Available: {list(self.ds.data_vars)}" + ) + dims = set(self.ds[var_name].dims) + if dims != ref_dims: + raise ValueError( + f"Variable '{var_name}' (role '{role}') has dims {dims} " + f"but '{ref_var_name}' (role '{ref_role}') has dims {ref_dims}. " + f"All met variables must share the same grid." + ) + # Verify that the expected coordinate dims are actually present + for coord_name, label in [ + (lon_name, 'longitude'), + (lat_name, 'latitude'), + (time_name, 'time'), + ]: + if coord_name not in dims: + raise ValueError( + f"Variable '{var_name}' dims {dims} do not include the " + f"{label} coordinate '{coord_name}'." + ) + + # ------------------------------------------------------------------ + # Ensemble dimension check + # ------------------------------------------------------------------ + + def _check_ensemble_dims( + self, + known_dims: set[str], + ) -> None: + """ + Raise ValueError if any variable has non-singleton extra dimensions. + + Extra dimensions (beyond lon, lat, time) are assumed to be ensemble / + member axes, which GeoClaw does not support. + """ + for role, var_name in self.variable_map.items(): + var = self.ds[var_name] + extra = [d for d in var.dims if d not in known_dims] + for dim in extra: + size = self.ds.sizes[dim] + if size != 1: + raise ValueError( + f"Variable '{var_name}' (role '{role}') has non-singleton " + f"dimension '{dim}' (size {size}). Ensemble / member " + f"dimensions are not supported by GeoClaw. " + f"Select a single member before interrogating." + ) + + # ------------------------------------------------------------------ + # Unit verification / conversion + # ------------------------------------------------------------------ + + def _check_met_units(self) -> list[MetVariableInfo]: + """ + Verify or convert units for each variable to contract units. + + Returns a list of MetVariableInfo with source_units populated. + Raises ValueError for unrecognised or unconvertible units. + """ + result: list[MetVariableInfo] = [] + for role, var_name in self.variable_map.items(): + contract = GEOCLAW_NETCDF_UNITS.get(role) + if contract is None: + # Role not in contract (e.g. future extension) — just record + warnings.warn( + f"No contract unit defined for role '{role}'. " + f"Skipping unit check for '{var_name}'.", + stacklevel=3, + ) + source_units = self.ds[var_name].attrs.get('units', 'unknown') + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=source_units, + )) + continue + + cf_unit = self.ds[var_name].attrs.get('units', '') + if not cf_unit: + warnings.warn( + f"Variable '{var_name}' (role '{role}') has no 'units' " + f"attribute. Assuming contract unit '{contract}'. " + f"Verify the data are actually in '{contract}'.", + stacklevel=3, + ) + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=contract, + )) + continue + + if _unit_matches_contract(cf_unit, contract): + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=cf_unit, + )) + continue + + # Attempt conversion path + canonical = _normalize_cf_unit(cf_unit) + if canonical is None: + raise ValueError( + f"Unrecognised units '{cf_unit}' on variable '{var_name}' " + f"(role '{role}') in '{self.path}'. " + f"Contract requires '{contract}'. " + f"Add the unit to _CF_TO_UNITS_PY or pre-convert the file." + ) + try: + units_convert(1.0, canonical, contract) + except (ValueError, KeyError) as exc: + raise ValueError( + f"Cannot convert units '{cf_unit}' -> '{contract}' for " + f"variable '{var_name}' (role '{role}'): {exc}" + ) from exc + + warnings.warn( + f"Variable '{var_name}' (role '{role}') has units '{cf_unit}'; " + f"contract is '{contract}'. A unit conversion factor must be " + f"applied before Fortran reads this data.", + stacklevel=3, + ) + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=cf_unit, + )) + + return result + + # ------------------------------------------------------------------ + # CF time -> seconds offset + # ------------------------------------------------------------------ + + def _compute_time_offset(self, time_name: str) -> float: + """ + Compute time_offset in seconds. + + time_offset = seconds between *time_reference* and the first time + value in the file (after CF decoding by xarray). + + This reads only the first element of the time coordinate — a minimal + data fetch, not a full array load. + """ + import pandas as pd + + time_coord = self.ds[time_name] + # Access only the first element (.values[0] fetches one chunk element) + t0_raw = time_coord.values[0] + + try: + t0 = pd.Timestamp(t0_raw) + except Exception as exc: + raise ValueError( + f"Cannot convert first time value '{t0_raw}' in '{self.path}' " + f"to a pandas Timestamp. Ensure xarray can decode CF time " + f"(check 'units' and 'calendar' attributes on '{time_name}')." + ) from exc + + if self.time_reference is None: + ref = pd.Timestamp('1970-01-01T00:00:00') # Unix epoch + else: + ref = pd.Timestamp(self.time_reference) + + offset_s = (t0 - ref).total_seconds() + return float(offset_s) + + # ------------------------------------------------------------------ + # Public interface + # ------------------------------------------------------------------ + + def interrogate_met(self) -> MetMetadata: + """ + Fully interrogate the met file and return a MetMetadata instance. + """ + if not self.variable_map: + raise ValueError("variable_map must not be empty.") + + # Use the first variable for base coordinate detection + primary_var = next(iter(self.variable_map.values())) + base = self.interrogate(primary_var) + + if base.time_name is None: + raise ValueError( + f"No time coordinate found in '{self.path}'. " + f"Met forcing requires a time axis." + ) + + known_dims = {base.lon_name, base.lat_name, base.time_name} + self._check_variable_consistency( + base.lon_name, base.lat_name, base.time_name + ) + self._check_ensemble_dims(known_dims) + variable_infos = self._check_met_units() + time_offset = self._compute_time_offset(base.time_name) + + return MetMetadata( + **dataclasses.asdict(base), + variables=variable_infos, + time_offset=time_offset, + fill_action=self.fill_action, + ) + + +# --------------------------------------------------------------------------- +# CF Normalizer +# --------------------------------------------------------------------------- + +class CFNormalizer: + """ + Normalize CF metadata in an xarray Dataset. + + Performs in-memory attribute fixups: + * Renames coordinate variables to CF standard names where unambiguous. + * Adds ``standard_name``, ``axis``, and ``units`` attributes to coordinate + variables if missing. + * Resolves ``_FillValue`` vs ``missing_value`` conflicts (CF precedence: + ``_FillValue`` wins; ``missing_value`` is promoted if ``_FillValue`` + absent; conflict triggers a warning). + + Does NOT resample, reproject, or modify data values. + + Parameters + ---------- + ds : xr.Dataset + Dataset to normalise. A copy is made; the original is not modified. + + Examples + -------- + >>> ds_norm = CFNormalizer(ds).normalize() + """ + + # Alias sets for unambiguous renaming + _LON_ALIASES: frozenset[str] = frozenset( + {'lon', 'longitude', 'x', 'nav_lon', 'LONGITUDE', 'LON', 'Longitude'} + ) + _LAT_ALIASES: frozenset[str] = frozenset( + {'lat', 'latitude', 'y', 'nav_lat', 'LATITUDE', 'LAT', 'Latitude'} + ) + _TIME_ALIASES: frozenset[str] = frozenset( + {'time', 't', 'TIME', 'valid_time', 'Time'} + ) + + def __init__(self, ds: xr.Dataset) -> None: + self.ds = ds.copy() + + def normalize(self) -> xr.Dataset: + """Apply all normalizations and return the modified dataset.""" + self._rename_coords() + self._fix_coord_attrs() + self._resolve_fill_values() + return self.ds + + def _rename_coords(self) -> None: + """Rename coordinate variables to CF standard names.""" + rename_map: dict[str, str] = {} + for name in list(self.ds.coords): + sname = str(name) + if sname in self._LON_ALIASES and sname != 'longitude': + if 'longitude' not in self.ds.coords: + rename_map[sname] = 'longitude' + elif sname in self._LAT_ALIASES and sname != 'latitude': + if 'latitude' not in self.ds.coords: + rename_map[sname] = 'latitude' + elif sname in self._TIME_ALIASES and sname != 'time': + if 'time' not in self.ds.coords: + rename_map[sname] = 'time' + if rename_map: + self.ds = self.ds.rename(rename_map) + + def _fix_coord_attrs(self) -> None: + """Add standard_name, axis, and units to recognized coordinates.""" + _COORD_STANDARDS: dict[str, tuple[str, Optional[str], str]] = { + 'longitude': ('longitude', 'degrees_east', 'X'), + 'latitude': ('latitude', 'degrees_north', 'Y'), + 'time': ('time', None, 'T'), + } + for coord_name, (std_name, units_val, axis) in _COORD_STANDARDS.items(): + if coord_name not in self.ds.coords: + continue + attrs = dict(self.ds[coord_name].attrs) + attrs.setdefault('standard_name', std_name) + attrs.setdefault('axis', axis) + if units_val is not None: + attrs.setdefault('units', units_val) + self.ds[coord_name].attrs = attrs + + def _resolve_fill_values(self) -> None: + """ + Resolve _FillValue / missing_value conflicts for all data variables. + + CF precedence: _FillValue wins. If only missing_value is present it + is promoted to _FillValue. Conflicting values trigger a UserWarning. + """ + for var_name in list(self.ds.data_vars): + attrs = dict(self.ds[var_name].attrs) + fv: Optional[float] = attrs.get('_FillValue') + mv: Optional[float] = attrs.get('missing_value') + + if fv is not None and mv is not None: + if fv != mv: + warnings.warn( + f"Variable '{var_name}' has conflicting " + f"_FillValue={fv} and missing_value={mv}. " + f"Using _FillValue (CF precedence); " + f"missing_value removed.", + UserWarning, + stacklevel=2, + ) + del attrs['missing_value'] + self.ds[var_name].attrs = attrs + + elif mv is not None: + # Promote missing_value -> _FillValue + attrs['_FillValue'] = mv + del attrs['missing_value'] + self.ds[var_name].attrs = attrs + + +# --------------------------------------------------------------------------- +# Longitude entry computation +# --------------------------------------------------------------------------- + +def _compute_lon_entries( + file_lon_min: float, + file_lon_max: float, + domain_lon_min: float, + domain_lon_max: float, + max_gap: float = 1e-10, +) -> list[tuple[float, float, float]]: + """ + Compute (file_crop_min, file_crop_max, lon_offset) tuples needed to cover + [domain_lon_min, domain_lon_max] from a file with lons in + [file_lon_min, file_lon_max]. + + lon_offset is the scalar Fortran adds to file coordinates to produce + domain coordinates: x_domain = x_file + lon_offset. + + Returns 1 tuple if a single offset suffices, 2 tuples if the domain + straddles the file's cut point. + + max_gap controls how much under-coverage is tolerated. The default + (1e-10) is tight enough to catch genuine gaps. Pass the file's grid + spacing to allow for the half-cell gap at the dateline that near-global + files (e.g. GEBCO) have between their last and first longitude columns. + + Raises ValueError if the file cannot cover the requested domain even + with all candidate offsets, or if the remaining uncovered gap exceeds + max_gap. + """ + candidate_offsets = [0.0, 360.0, -360.0] + entries: list[tuple[float, float, float]] = [] + total_coverage = 0.0 + + for offset in candidate_offsets: + shifted_min = file_lon_min + offset + shifted_max = file_lon_max + offset + intersect_min = max(domain_lon_min, shifted_min) + intersect_max = min(domain_lon_max, shifted_max) + width = intersect_max - intersect_min + if width > 1e-10: + file_crop_min = intersect_min - offset + file_crop_max = intersect_max - offset + # Clamp to file extent (guards against floating-point overshoot) + file_crop_min = max(file_crop_min, file_lon_min) + file_crop_max = min(file_crop_max, file_lon_max) + entries.append((file_crop_min, file_crop_max, offset)) + total_coverage += width + + if not entries: + raise ValueError( + f"File longitude range [{file_lon_min}, {file_lon_max}] cannot cover " + f"domain [{domain_lon_min}, {domain_lon_max}] with candidate offsets " + f"{candidate_offsets}." + ) + + # One-sided check: overcoverage is harmless; under-coverage beyond max_gap + # indicates that the file genuinely cannot cover the requested domain. + gap = (domain_lon_max - domain_lon_min) - total_coverage + if gap > max_gap: + raise ValueError( + f"File longitudes [{file_lon_min}, {file_lon_max}] cannot " + f"cover requested domain [{domain_lon_min}, {domain_lon_max}]. " + f"Gap of {gap:.6f} degrees exceeds tolerance {max_gap:.6f}." + ) + + return entries + + +# --------------------------------------------------------------------------- +# Descriptor writer +# --------------------------------------------------------------------------- + +class DescriptorWriter: + """ + Write NetCDF descriptor metadata for GeoClaw input files. + + For topo (type 4) entries the descriptor is a block of ``key = value`` + lines written immediately after the ``topo_type`` line in *topo.data*. + Fortran's ``read_netcdf_descriptor`` parses these lines until it + encounters a blank line. + + Usage — topo:: + + meta = TopoInterrogator(path, var_name='z').interrogate_topo() + with open('topo.data', 'a') as f: + DescriptorWriter.write_topo_descriptor(f, meta) + + Usage — met (storm file):: + + meta = MetInterrogator(path, var_map).interrogate_met() + with open('storm.nc', 'w') as f: + f.write('netcdf\\n') # format header written by caller + DescriptorWriter.write_met_descriptor(f, meta) + """ + + @staticmethod + def write_topo_descriptor(f, meta: 'TopoMetadata') -> None: + """ + Write the key=value descriptor block for one topo file. + + *f* should be positioned immediately after the ``topo_type`` line + has been written. A trailing blank line is written to terminate + the block for the Fortran parser. + + Parameters + ---------- + f : file-like object + Open for writing (text mode). + meta : TopoMetadata + Output of ``TopoInterrogator.interrogate_topo()``. + """ + # Fortran adds lon_offset to all file coordinate values after reading: + # x_domain = x_file + lon_offset + # crop_bounds written here are in FILE coordinates (converted from + # domain coordinates by topo_entries()). + f.write(f"var_name = {meta.var_name}\n") + f.write(f"lon_name = {meta.lon_name}\n") + f.write(f"lat_name = {meta.lat_name}\n") + f.write(f"lon_offset = {meta.lon_offset!r}\n") + f.write(f"lat_order = {meta.lat_order}\n") + f.write(f"dim_order = {','.join(meta.dim_order)}\n") + if meta.fill_value is not None: + f.write(f"fill_value = {meta.fill_value!r}\n") + f.write(f"fill_action = {meta.fill_action}\n") + if meta.crop_bounds is not None: + lon0, lon1, lat0, lat1 = meta.crop_bounds + f.write(f"crop_bounds = {lon0} {lon1} {lat0} {lat1}\n") + f.write("\n") # blank line terminates block for Fortran parser + + @staticmethod + def write_met_descriptor(f, meta: 'MetMetadata') -> None: + """ + Write the ``&file_info`` / ``&variable_info`` namelist-style body + of a GeoClaw NetCDF met/storm descriptor file. + + The caller is responsible for writing the ``netcdf`` format header + line before calling this method. + + Parameters + ---------- + f : file-like object + Open for writing (text mode). + meta : MetMetadata + Output of ``MetInterrogator.interrogate_met()``. + """ + f.write("&file_info\n") + f.write(f" lon_name = {meta.lon_name}\n") + f.write(f" lat_name = {meta.lat_name}\n") + f.write(f" time_name = {meta.time_name}\n") + f.write(f" dim_order = {','.join(meta.dim_order)}\n") + f.write(f" lon_convention = {meta.lon_convention}\n") + f.write(f" lat_order = {meta.lat_order}\n") + if meta.fill_value is not None: + f.write(f" fill_value = {meta.fill_value!r}\n") + f.write(f" fill_action = {meta.fill_action}\n") + f.write(f" time_offset = {meta.time_offset!r}\n") + if meta.crop_bounds is not None: + lon0, lon1, lat0, lat1 = meta.crop_bounds + f.write(f" crop_bounds = {lon0} {lon1} {lat0} {lat1}\n") + f.write("/\n") + for var in meta.variables: + f.write( + f"&variable_info" + f" var_name={var.var_name}" + f" geoclaw_role={var.geoclaw_role}" + f" /\n" + ) diff --git a/src/python/geoclaw/surge/plot.py b/src/python/geoclaw/surge/plot.py index ca7f54d7a..aa3415e99 100644 --- a/src/python/geoclaw/surge/plot.py +++ b/src/python/geoclaw/surge/plot.py @@ -107,25 +107,36 @@ def get_track(self, frame): # ========================================================================== # Gauge functions # ========================================================================== -def gauge_locations(current_data, gaugenos='all'): +def gauge_locations(current_data, gaugenos='all') -> None: gaugetools.plot_gauge_locations(current_data.plotdata, gaugenos=gaugenos, format_string='kx', add_labels=True, xoffset=0.02, yoffset=0.02) -def gauge_dry_regions(cd, dry_tolerance=1e-16): +def gauge_dry_regions(cd, dry_tolerance: float=1e-16): """Masked array of zeros where gauge is dry.""" return np.ma.masked_where(np.abs(cd.gaugesoln.q[0, :]) > dry_tolerance, np.zeros(cd.gaugesoln.q[0, :].shape)) -def gauge_surface(cd, dry_tolerance=1e-16): +def gauge_surface(cd, dry_tolerance: float=1e-16): """Sea surface at gauge masked when dry.""" return np.ma.masked_where(np.abs(cd.gaugesoln.q[0, :]) < dry_tolerance, cd.gaugesoln.q[3, :]) +def gauge_wind(cd, wind_x_idx: int=4, wind_y_idx: int=5): + """Wind speed at gauge""" + return np.sqrt(cd.gaugesoln.q[wind_x_idx, :]**2 + + cd.gaugesoln.q[wind_y_idx, :]**2) + + +def gauge_pressure(cd, pressure_idx: int=6): + """Wind speed at gauge""" + return cd.gaugesoln.q[pressure_idx, :] + + def plot_landfall_gauge(gauge, axes, landfall=0.0, style='b', kwargs={}): """Plot gauge data on the axes provided diff --git a/src/python/geoclaw/surge/storm.py b/src/python/geoclaw/surge/storm.py index 2ae850f56..9d9e5e0fa 100644 --- a/src/python/geoclaw/surge/storm.py +++ b/src/python/geoclaw/surge/storm.py @@ -219,6 +219,7 @@ def __init__(self, path=None, file_format="ATCF", **kwargs): # Run-time modifications for storm self.scaling = [1.0, 1.0] # Scaling of wind and pressure + self.storm_time_scale = 1.0 # >1 slower storm, <1 faster storm # Ramping information - only applies to data storms currently self.window_type = 0 @@ -226,6 +227,17 @@ def __init__(self, path=None, file_format="ATCF", **kwargs): self.window = None # If not provided for data files it # will be set to the file extents + # NetCDF met-forcing metadata (populated by read_data for file_format==2) + self.met_lon_name = None + self.met_lat_name = None + self.met_time_name = None + self.met_lon_convention = None + self.met_lat_order = None + self.met_fill_value = None + self.met_fill_action = None + self.met_time_offset = None + self.met_variable_map = {} + if path is not None: self.read(path, file_format=file_format, **kwargs) @@ -944,19 +956,73 @@ def read_data(self, path: Path, verbose: bool=False): data_file.readline().partition("#")[0].rstrip()) self.ramp_width = float( data_file.readline().partition("#")[0].rstrip()) - data_file.readline() - data_file.readline() - data_file.readline() + data_file.readline() # window + _ts_str = data_file.readline().partition("#")[0].rstrip() + try: + self.storm_time_scale = float(_ts_str) + except ValueError: + # Legacy file: blank line consumed; storm_time_scale absent + self.storm_time_scale = 1.0 + data_file.readline() # "# Format Data Information" - # We do not keep track of any of the format specific information if self.file_format == 1: + # Skip blank line then "# File paths" comment data_file.readline() data_file.readline() elif self.file_format == 2: - data_file.readline() - data_file.readline() - data_file.readline() - data_file.readline() + # Parse the &file_info / &variable_info descriptor produced + # by DescriptorWriter.write_met_descriptor. + file_info = {} + variable_infos = [] + + # Read &file_info block (lines until standalone "/") + line = data_file.readline() + if line.strip().startswith('&file_info'): + while True: + inner = data_file.readline() + if not inner: + break + s = inner.strip() + if s == '/': + break + if '=' in s and not s.startswith('&'): + key, _, val = s.partition('=') + file_info[key.strip()] = val.strip() + + # Read &variable_info lines until "# File paths" terminator + while True: + line = data_file.readline() + if not line: + break + s = line.strip() + if s.startswith('# File paths'): + break + if s.startswith('&variable_info'): + var_info = {} + content = s[len('&variable_info'):].rstrip('/').strip() + for pair in content.split(): + k, _, v = pair.partition('=') + if k and v: + var_info[k] = v + variable_infos.append(var_info) + + # Store fields on Storm object for Fortran to consume + self.met_lon_name = file_info.get('lon_name') + self.met_lat_name = file_info.get('lat_name') + self.met_time_name = file_info.get('time_name') + _conv = file_info.get('lon_convention') + self.met_lon_convention = int(_conv) if _conv is not None else None + self.met_lat_order = file_info.get('lat_order') + _fv = file_info.get('fill_value') + self.met_fill_value = float(_fv) if _fv is not None else None + self.met_fill_action = file_info.get('fill_action', 'warn') + _to = file_info.get('time_offset') + self.met_time_offset = float(_to) if _to is not None else None + self.met_variable_map = { + vi['geoclaw_role']: vi['var_name'] + for vi in variable_infos + if 'geoclaw_role' in vi and 'var_name' in vi + } else: raise TypeError(f"Unknown storm data file format type" + f" '{self.file_format}' provided.") @@ -1269,10 +1335,10 @@ def write_tcvitals(self, path, verbose=False): "implemented yet but is planned for a ", "future release.")) - def write_data(self, path, dim_mapping=None, var_mapping=None, verbose=False): + def write_data(self, path, dim_mapping=None, var_mapping=None, + met_interrogator=None, verbose=False): r""" """ - # Only one format right now _data_file_format_mapping = {'ascii': 1, 'nws12': 1, "owi": 1, 'netcdf': 2, 'nws13': 2} @@ -1327,7 +1393,7 @@ def write_data(self, path, dim_mapping=None, var_mapping=None, verbose=False): data_file.write(f"{str(self.window)[1:-1].ljust(20)} # Window\n") else: data_file.write(f"{str(None).ljust(20)} # Window\n") - data_file.write("\n") + data_file.write(f"{str(self.storm_time_scale).ljust(20)} # Storm time scale (>1 slower, <1 faster)\n") data_file.write("# Format Data Information\n") if file_format == 1: # Check number of file paths @@ -1337,29 +1403,73 @@ def write_data(self, path, dim_mapping=None, var_mapping=None, verbose=False): "resolution provided.") data_file.write("\n") elif file_format == 2: - # Get dimension mapping - _dim_mapping = util.get_netcdf_names(self.file_paths[0], - lookup_type='dim', - user_mapping=dim_mapping, - verbose=verbose) - - # Get variable mapping - _var_mapping = util.get_netcdf_names(self.file_paths[0], - lookup_type='var', - user_mapping=var_mapping, - verbose=verbose) - data_file.write(f"{str(_dim_mapping['x'])} ") - data_file.write(f"{str(_dim_mapping['y'])} ") - data_file.write(f"{str(_dim_mapping['t'])}\n") - data_file.write(f"{str(_var_mapping['wind_u'])} ") - data_file.write(f"{str(_var_mapping['wind_v'])} ") - data_file.write(f"{str(_var_mapping['pressure'])}\n") - data_file.write("\n") + # Variable discovery: get_netcdf_names auto-discovers met + # roles from standard name lists, then any explicit var_mapping + # entries override specific roles. MetInterrogator then applies + # CF-aware coordinate discovery, unit validation, lon-convention + # detection, and fill-value resolution. + # TODO: consolidate get_netcdf_names with MetInterrogator if len(self.file_paths) != 1: raise ValueError(f"Expected 1 path for NetCDF format, " + f"got {len(self.file_paths)}") + from clawpack.geoclaw.netcdf_utils import ( + MetInterrogator, DescriptorWriter) + + if met_interrogator is None: + # Auto-discover met variable names, then apply explicit + # overrides from var_mapping. This lets callers supply + # only the non-standard names (e.g. a non-standard pressure + # field) while standard wind names are still found + # automatically. dim_mapping is handled automatically by + # MetInterrogator via CF axis/standard_name conventions; it + # is accepted for backwards compatibility but not forwarded. + _MET_ROLES = frozenset({'wind_u', 'wind_v', 'pressure'}) + variable_map = { + role: name + for role, name in util.get_netcdf_names( + self.file_paths[0], + lookup_type='var', + verbose=verbose, + ).items() + if role in _MET_ROLES + } + + if var_mapping is not None: + # Validate each user-specified name exists before + # merging to catch typos early with a clear message. + import xarray as xr + with xr.open_dataset(str(self.file_paths[0])) as _ds: + _available = set(_ds.data_vars) | set(_ds.coords) + _bad = {role: name + for role, name in var_mapping.items() + if name not in _available} + if _bad: + raise ValueError( + "The following names in var_mapping were not " + f"found in {self.file_paths[0]}:\n" + + "\n".join( + f" '{role}': '{name}'" + for role, name in _bad.items()) + + f"\nAvailable variables: " + + str(sorted(_available)) + ) + variable_map.update(var_mapping) + + with MetInterrogator(self.file_paths[0], + variable_map=variable_map, + time_reference=self.time_offset) as mi: + meta = mi.interrogate_met() + else: + meta = met_interrogator.interrogate_met() + + # Note: storm.window governs the Fortran forcing application + # domain (with ramp width); MetInterrogator.crop_bounds is a + # read-time spatial subset of the NetCDF file. These are + # conceptually distinct and should remain independent. + DescriptorWriter.write_met_descriptor(data_file, meta) + # Write paths data_file.write("# File paths\n") for path in self.file_paths: diff --git a/src/python/geoclaw/units.py b/src/python/geoclaw/units.py index 7a7bc477e..ed8fbeb04 100644 --- a/src/python/geoclaw/units.py +++ b/src/python/geoclaw/units.py @@ -92,6 +92,31 @@ lambda temp: temp - 273.15] units['temperature'] = {'C': "Celsius", 'F': "Fahrenheit", 'K': "Kelvin"} +# Time +conversion_func['s'] = [lambda t: t, lambda t: t] +conversion_func['min'] = [lambda t: t * 60.0, lambda t: t / 60.0] +conversion_func['minutes'] = conversion_func['min'] +conversion_func['h'] = [lambda t: t * 3600.0, lambda t: t / 3600.0] +conversion_func['hours'] = conversion_func['h'] +conversion_func['days'] = [lambda t: t * 86400.0, lambda t: t / 86400.0] +units['time'] = collections.OrderedDict({'s': 'seconds', 'min': 'minutes', + 'h': 'hours', 'days': 'days'}) + +# Unit contract for GeoClaw NetCDF input. +# These are the units that Fortran assumes when reading NetCDF data. +# Python interrogators must verify and, if necessary, record a conversion +# factor before writing the descriptor so that Fortran receives data in +# these units. +GEOCLAW_NETCDF_UNITS = { + 'topo': 'm', # bathymetry / topography elevation + 'wind_u': 'm/s', # eastward wind component + 'wind_v': 'm/s', # northward wind component + 'pressure': 'Pa', # surface pressure (absolute) + 'time': 's', # time coordinate (seconds from descriptor offset) + # TODO: 'precipitation': ??? (unit TBD) + # TODO: 'friction': ??? (unit TBD) +} + def units_available(): r""" diff --git a/src/python/geoclaw/util.py b/src/python/geoclaw/util.py index c5119cc44..c8dea3a7d 100644 --- a/src/python/geoclaw/util.py +++ b/src/python/geoclaw/util.py @@ -305,7 +305,7 @@ def get_netcdf_names(path, lookup_type='dim', user_mapping=None, verbose=False): import xarray as xr # Open file and construct set of names - data = xr.open_dataset(path) + data = xr.open_dataset(path, decode_timedelta=False) if 'dim' in lookup_type.lower(): _var_mapping = {"x": (False, ["x", "longitude", "lon"]), "y": (False, ["y", "latitude", "lat"]), diff --git a/tests/netcdf/__init__.py b/tests/netcdf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/netcdf/_helpers.py b/tests/netcdf/_helpers.py new file mode 100644 index 000000000..8f61c61c0 --- /dev/null +++ b/tests/netcdf/_helpers.py @@ -0,0 +1,289 @@ +""" +Non-fixture helpers shared across the NetCDF test suite. + +Exported by conftest.py (as fixtures) and importable directly by test files +for use with @pytest.mark.parametrize. +""" +from __future__ import annotations + +from typing import Optional + +import numpy as np +import pandas as pd +import pytest +import xarray as xr + + +# ============================================================ +# Dataset factory functions +# ============================================================ + +def make_topo_dataset( + *, + lon_name: str = "lon", + lat_name: str = "lat", + var_name: str = "z", + lon_min: float = -10.0, + lon_max: float = 10.0, + lat_min: float = -10.0, + lat_max: float = 10.0, + nlat: int = 5, + nlon: int = 5, + lat_direction: str = "S_to_N", # "S_to_N" or "N_to_S" + dim_order: tuple = ("lat", "lon"), + units: str = "m", + has_fill_in_data: bool = False, + lon_axis_attr: bool = False, + lat_axis_attr: bool = False, + var_standard_name: Optional[str] = None, +) -> xr.Dataset: + """ + Build a minimal topo xarray Dataset. + + lon_max > 180 gives a [0, 360] convention file. + lat_direction='N_to_S' gives decreasing latitude values. + has_fill_in_data=True puts a NaN in the data array so + fill-in-crop tests can verify the interrogator rejects the file. + """ + lons = np.linspace(lon_min, lon_max, nlon) + lats = ( + np.linspace(lat_min, lat_max, nlat) + if lat_direction == "S_to_N" + else np.linspace(lat_max, lat_min, nlat) + ) + + dim_to_coord = {"lat": lat_name, "lon": lon_name} + actual_dims = tuple(dim_to_coord[d] for d in dim_order) + shape = tuple(nlon if d == lon_name else nlat for d in actual_dims) + + data = np.full(shape, -100.0, dtype=np.float32) + if has_fill_in_data: + data.flat[0] = np.nan + + lon_attrs: dict = {} + lat_attrs: dict = {} + if lon_axis_attr: + lon_attrs["axis"] = "X" + if lat_axis_attr: + lat_attrs["axis"] = "Y" + + var_attrs: dict = {} + if units: + var_attrs["units"] = units + if var_standard_name is not None: + var_attrs["standard_name"] = var_standard_name + + coords = { + lon_name: xr.DataArray(lons, dims=[lon_name], attrs=lon_attrs), + lat_name: xr.DataArray(lats, dims=[lat_name], attrs=lat_attrs), + } + da = xr.DataArray(data, dims=actual_dims, coords=coords, attrs=var_attrs) + return xr.Dataset({var_name: da}) + + +def make_met_dataset( + *, + lon_name: str = "lon", + lat_name: str = "lat", + time_name: str = "time", + var_name_map: Optional[dict] = None, # {geoclaw_role: file_var_name} + lon_min: float = -10.0, + lon_max: float = 10.0, + lat_min: float = -10.0, + lat_max: float = 10.0, + nlat: int = 4, + nlon: int = 4, + nt: int = 3, + lat_direction: str = "S_to_N", + dim_order: tuple = ("time", "lat", "lon"), + wind_units: str = "m/s", + pressure_units: str = "Pa", + time_start: str = "2020-01-01", + time_freq: str = "6h", + extra_dim: Optional[tuple] = None, # (dim_name, size) for ensemble + omit_roles: Optional[list] = None, # geoclaw roles to leave out + mismatch_dims: bool = False, # last var gets different dims +) -> xr.Dataset: + """ + Build a minimal met forcing xarray Dataset. + + extra_dim=('member', 1) inserts a singleton ensemble dimension. + extra_dim=('member', 3) inserts a non-singleton ensemble dimension. + omit_roles=['wind_v'] omits that variable (consistency check must fail). + mismatch_dims=True gives the last variable a different dim set. + """ + if var_name_map is None: + var_name_map = {"wind_u": "u10", "wind_v": "v10", "pressure": "msl"} + if omit_roles: + var_name_map = {k: v for k, v in var_name_map.items() + if k not in omit_roles} + + lons = np.linspace(lon_min, lon_max, nlon) + lats = ( + np.linspace(lat_min, lat_max, nlat) + if lat_direction == "S_to_N" + else np.linspace(lat_max, lat_min, nlat) + ) + times = pd.date_range(time_start, periods=nt, freq=time_freq) + + role_to_coord = {"lat": lat_name, "lon": lon_name, "time": time_name} + actual_dims = [role_to_coord.get(d, d) for d in dim_order] + coord_size = {lon_name: nlon, lat_name: nlat, time_name: nt} + base_shape = tuple(coord_size[d] for d in actual_dims) + + coords: dict = {lon_name: lons, lat_name: lats, time_name: times} + data_vars: dict = {} + var_items = list(var_name_map.items()) + + for i, (role, var_name) in enumerate(var_items): + dims = list(actual_dims) + shape = list(base_shape) + + if extra_dim is not None: + ename, esize = extra_dim + dims = [ename] + dims + shape = [esize] + shape + if ename not in coords: + coords[ename] = np.arange(esize) + + if mismatch_dims and i == len(var_items) - 1: + # Drop time so this variable has incompatible dims + dims = [d for d in dims if d != time_name] + shape = [s for d, s in zip(actual_dims, base_shape) + if d != time_name] + + fill_val = 101325.0 if role == "pressure" else 5.0 + data = np.full(shape, fill_val, dtype=np.float32) + + if role in ("wind_u", "wind_v"): + attrs = {"units": wind_units} + elif role == "pressure": + attrs = {"units": pressure_units} + else: + attrs = {} + + data_vars[var_name] = xr.DataArray(data, dims=dims, attrs=attrs) + + return xr.Dataset(data_vars, coords=coords) + + +# ============================================================ +# Parametrize lists +# ============================================================ + +#: lon/lat convention + direction combos. +#: Each entry is a dict of kwargs for make_topo_dataset / make_met_dataset. +COORD_VARIANTS: list = [ + pytest.param( + {"lon_min": -10.0, "lon_max": 10.0, "lat_direction": "S_to_N"}, + id="lon180-S_to_N", + ), + pytest.param( + {"lon_min": -10.0, "lon_max": 10.0, "lat_direction": "N_to_S"}, + id="lon180-N_to_S", + ), + pytest.param( + {"lon_min": 170.0, "lon_max": 190.0, "lat_direction": "S_to_N"}, + id="lon360-S_to_N", + ), + pytest.param( + {"lon_min": 170.0, "lon_max": 190.0, "lat_direction": "N_to_S"}, + id="lon360-N_to_S", + ), +] + +#: dim_order variants for 2-D (no time) topo variables. +TOPO_DIM_ORDER_VARIANTS: list = [ + pytest.param({"dim_order": ("lat", "lon")}, id="dim-lat-lon"), + pytest.param({"dim_order": ("lon", "lat")}, id="dim-lon-lat"), +] + +#: dim_order variants for 3-D met variables. +MET_DIM_ORDER_VARIANTS: list = [ + pytest.param({"dim_order": ("time", "lat", "lon")}, id="dim-time-lat-lon"), + pytest.param({"dim_order": ("time", "lon", "lat")}, id="dim-time-lon-lat"), + pytest.param({"dim_order": ("lat", "lon", "time")}, id="dim-lat-lon-time"), + pytest.param({"dim_order": ("lon", "lat", "time")}, id="dim-lon-lat-time"), +] + +#: Fill-value attribute combinations for nc4_topo_file_factory. +#: Keys: fill_value (nc4 createVariable arg), missing_value (plain attr), +#: expected (what _resolve_fill_value should return). +FILL_VALUE_VARIANTS: list = [ + pytest.param( + {"fill_value": -9999.0, "missing_value": None, "expected": -9999.0}, + id="fill-only", + ), + pytest.param( + # Modern xarray (mask_and_scale=True) moves missing_value to + # enc['missing_value'], not enc['_FillValue']. _resolve_fill_value + # only checks enc['_FillValue'], so it returns None. This is a known + # implementation limitation; the test documents actual behaviour. + {"fill_value": None, "missing_value": -8888.0, "expected": None}, + id="missing-only", + ), + pytest.param( + # Both present, agreeing: either path returns the same value. + {"fill_value": -9999.0, "missing_value": -9999.0, "expected": -9999.0}, + id="both-agree", + ), + pytest.param( + # Conflicting: _FillValue wins (CF precedence); no warning from interrogator. + {"fill_value": -9999.0, "missing_value": -8888.0, "expected": -9999.0}, + id="both-conflict-no-warn", + ), + pytest.param( + {"fill_value": None, "missing_value": None, "expected": None}, + id="neither", + ), +] + +#: Pressure unit variants: (units written in file, expected source_units in metadata) +PRESSURE_UNIT_VARIANTS: list = [ + pytest.param({"pressure_units": "Pa", "expected_source": "Pa"}, id="Pa"), + pytest.param({"pressure_units": "hPa", "expected_source": "hPa"}, id="hPa"), + pytest.param({"pressure_units": "mbar", "expected_source": "mbar"}, id="mbar"), +] + +#: Wind unit variants +WIND_UNIT_VARIANTS: list = [ + pytest.param({"wind_units": "m/s", "expected_source": "m/s"}, id="m_s"), + pytest.param({"wind_units": "knots", "expected_source": "knots"}, id="knots"), +] + +#: CF standard_name values recognised by TopoInterrogator._find_topo_var_name, +#: in priority order. +TOPO_CF_STANDARD_NAME_VARIANTS: list = [ + pytest.param("surface_altitude", id="surface_altitude"), + pytest.param("height_above_mean_sea_level", id="height_above_mean_sea_level"), + pytest.param("height_above_reference_ellipsoid", id="height_above_reference_ellipsoid"), + pytest.param("bedrock_altitude", id="bedrock_altitude"), + pytest.param("altitude", id="altitude"), + pytest.param("height", id="height"), + pytest.param("sea_floor_depth_below_geoid", id="sea_floor_depth_below_geoid"), +] + +#: Variable names in the fallback list recognised by +#: TopoInterrogator._find_topo_var_name. +TOPO_FALLBACK_NAME_VARIANTS: list = [ + pytest.param("z", id="z"), + pytest.param("Z", id="Z"), + pytest.param("elevation", id="elevation"), + pytest.param("Elevation", id="Elevation"), + pytest.param("ELEVATION", id="ELEVATION"), + pytest.param("topo", id="topo"), + pytest.param("TOPO", id="TOPO"), + pytest.param("height", id="height"), + pytest.param("HEIGHT", id="HEIGHT"), + pytest.param("altitude", id="altitude"), + pytest.param("ALTITUDE", id="ALTITUDE"), + pytest.param("depth", id="depth"), + pytest.param("DEPTH", id="DEPTH"), + pytest.param("Band1", id="Band1"), + pytest.param("dem", id="dem"), + pytest.param("DEM", id="DEM"), + pytest.param("topography", id="topography"), + pytest.param("bathymetry", id="bathymetry"), + pytest.param("bathy", id="bathy"), + pytest.param("bedrock", id="bedrock"), +] diff --git a/tests/netcdf/conftest.py b/tests/netcdf/conftest.py new file mode 100644 index 000000000..94687e129 --- /dev/null +++ b/tests/netcdf/conftest.py @@ -0,0 +1,172 @@ +""" +Shared pytest fixtures for GeoClaw NetCDF interrogator tests. + +Factory functions, parametrize lists, and helper constants live in +_helpers.py so they can be imported directly by test files without +going through the pytest fixture machinery. + +Fixtures provided here +---------------------- +topo_file_factory write a topo Dataset to a tmp_path .nc file +met_file_factory write a met Dataset to a tmp_path .nc file +nc4_topo_file_factory write a topo file via netCDF4 for precise + attribute placement (fill value tests) +""" +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import numpy as np +import pytest + +from ._helpers import make_topo_dataset, make_met_dataset + +try: + import netCDF4 as nc4 # type: ignore + _HAS_NC4 = True +except ImportError: + _HAS_NC4 = False + + +@pytest.fixture +def topo_file_factory(tmp_path): + """ + Return a callable that writes a topo Dataset to a .nc file in tmp_path. + + Signature:: + + path = topo_file_factory(**make_topo_dataset_kwargs) + # or supply an already-built dataset: + path = topo_file_factory(ds=my_ds) + + Each call writes a distinct file (topo_1.nc, topo_2.nc, …). + """ + counter = [0] + + def _make(ds=None, name: Optional[str] = None, **kwargs) -> Path: + if ds is None: + ds = make_topo_dataset(**kwargs) + if name is None: + counter[0] += 1 + name = f"topo_{counter[0]}.nc" + path = tmp_path / name + ds.to_netcdf(path) + return path + + return _make + + +@pytest.fixture +def met_file_factory(tmp_path): + """ + Return a callable that writes a met Dataset to a .nc file in tmp_path. + + Signature:: + + path = met_file_factory(**make_met_dataset_kwargs) + # or supply an already-built dataset: + path = met_file_factory(ds=my_ds) + """ + counter = [0] + + def _make(ds=None, name: Optional[str] = None, **kwargs) -> Path: + if ds is None: + from ._helpers import make_met_dataset as _make_met + ds = _make_met(**kwargs) + if name is None: + counter[0] += 1 + name = f"met_{counter[0]}.nc" + path = tmp_path / name + ds.to_netcdf(path) + return path + + return _make + + +@pytest.fixture +def nc4_topo_file_factory(tmp_path): + """ + Return a callable that writes a topo file via the netCDF4 library, + giving precise control over which fill-related attributes are present. + + This is required for fill value tests because xarray (mask_and_scale=True) + moves _FillValue to encoding on read, so the only reliable way to exercise + specific attribute combinations is to write at the netCDF4 level. + + Signature:: + + path = nc4_topo_file_factory( + fill_value=-9999.0, # set at createVariable time; None = omit + missing_value=-8888.0, # written as plain attr; None = omit + **other_kwargs, + ) + + Skips automatically if netCDF4 is not importable. + """ + if not _HAS_NC4: + pytest.skip("netCDF4 not available") + + counter = [0] + + def _make( + *, + fill_value: Optional[float] = None, + missing_value: Optional[float] = None, + lon_min: float = -10.0, + lon_max: float = 10.0, + lat_min: float = -10.0, + lat_max: float = 10.0, + nlat: int = 5, + nlon: int = 5, + lat_direction: str = "S_to_N", + var_name: str = "z", + lon_name: str = "lon", + lat_name: str = "lat", + units: str = "m", + has_fill_in_data: bool = False, + name: Optional[str] = None, + ) -> Path: + counter[0] += 1 + if name is None: + name = f"nc4topo_{counter[0]}.nc" + path = tmp_path / name + + lons = np.linspace(lon_min, lon_max, nlon) + lats = ( + np.linspace(lat_min, lat_max, nlat) + if lat_direction == "S_to_N" + else np.linspace(lat_max, lat_min, nlat) + ) + data = np.full((nlat, nlon), -100.0, dtype=np.float32) + if has_fill_in_data: + # Write the fill sentinel value so xarray masks it as NaN. + sentinel = fill_value if fill_value is not None else 9.969209968386869e+36 + data[0, 0] = sentinel + + with nc4.Dataset(str(path), "w") as ds: + ds.createDimension(lon_name, nlon) + ds.createDimension(lat_name, nlat) + + lon_var = ds.createVariable(lon_name, "f4", (lon_name,)) + lon_var[:] = lons + + lat_var = ds.createVariable(lat_name, "f4", (lat_name,)) + lat_var[:] = lats + + # _FillValue must be set at createVariable time in netCDF4 + create_kwargs: dict = {} + if fill_value is not None: + create_kwargs["fill_value"] = fill_value + + z_var = ds.createVariable( + var_name, "f4", (lat_name, lon_name), **create_kwargs + ) + z_var.units = units + if missing_value is not None: + z_var.missing_value = missing_value + z_var[:] = data + + return path + + return _make diff --git a/tests/netcdf/test_base_interrogator.py b/tests/netcdf/test_base_interrogator.py new file mode 100644 index 000000000..dcd62bcff --- /dev/null +++ b/tests/netcdf/test_base_interrogator.py @@ -0,0 +1,297 @@ +""" +Tests for NetCDFInterrogator base class. + +Covers: coordinate discovery, lon convention, lat order, dim order, +fill value resolution, crop bound validation, and laziness guarantee. +""" +import warnings + +import numpy as np +import pytest + +from clawpack.geoclaw.netcdf_utils import NetCDFInterrogator, FileMetadata + +from ._helpers import ( + make_topo_dataset, + COORD_VARIANTS, + TOPO_DIM_ORDER_VARIANTS, + FILL_VALUE_VARIANTS, +) + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + + +# ============================================================ +# Helpers +# ============================================================ + +def _open(path): + """Return an interrogator; caller is responsible for .close().""" + return NetCDFInterrogator(path) + + +# ============================================================ +# Lon convention detection +# ============================================================ + +@pytest.mark.parametrize("lon_min,lon_max,expected_convention", [ + (-10.0, 10.0, 180), # [-180, 180] convention + (170.0, 190.0, 360), # [0, 360] convention +]) +def test_lon_convention_detected(topo_file_factory, lon_min, lon_max, + expected_convention): + """Interrogator reports the correct longitude convention.""" + path = topo_file_factory(lon_min=lon_min, lon_max=lon_max) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lon_convention == expected_convention, ( + f"Expected lon_convention={expected_convention} for lon range " + f"[{lon_min}, {lon_max}], got {meta.lon_convention}" + ) + + +# ============================================================ +# Lat order detection +# ============================================================ + +@pytest.mark.parametrize("lat_direction,expected_order", [ + ("S_to_N", "S_to_N"), + ("N_to_S", "N_to_S"), +]) +def test_lat_order_detected(topo_file_factory, lat_direction, expected_order): + """Interrogator reports the correct latitude order.""" + path = topo_file_factory(lat_direction=lat_direction) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lat_order == expected_order, ( + f"Expected lat_order={expected_order!r} for lat_direction=" + f"{lat_direction!r}, got {meta.lat_order!r}" + ) + + +# ============================================================ +# Dim order detection +# ============================================================ + +@pytest.mark.parametrize("dim_kwargs", TOPO_DIM_ORDER_VARIANTS) +def test_dim_order_detected_topo(topo_file_factory, dim_kwargs): + """ + Interrogator reports canonical dim order using role names + ('lat', 'lon') regardless of how the variable is stored. + """ + path = topo_file_factory(**dim_kwargs) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + expected = list(dim_kwargs["dim_order"]) + assert meta.dim_order == expected, ( + f"Expected dim_order={expected}, got {meta.dim_order}" + ) + + +# ============================================================ +# Coordinate discovery via axis attribute +# ============================================================ + +def test_coord_discovery_via_axis_attr(topo_file_factory): + """ + Coordinate discovery succeeds when only the CF axis attribute is present + (no standard_name, non-standard variable name). + """ + path = topo_file_factory( + lon_name="x_coord", lat_name="y_coord", + lon_axis_attr=True, lat_axis_attr=True, + ) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lon_name == "x_coord" + assert meta.lat_name == "y_coord" + + +def test_coord_discovery_via_fallback_name(topo_file_factory): + """ + Coordinate discovery falls back to common names when no axis/standard_name + is present ('lon', 'lat' are in the fallback list). + """ + path = topo_file_factory(lon_name="lon", lat_name="lat") + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lon_name == "lon" + assert meta.lat_name == "lat" + + +def test_missing_lon_coord_raises(topo_file_factory): + """ + interrogate() raises ValueError when no longitude coordinate can be found. + """ + import xarray as xr + # Build a dataset whose coordinate names cannot be recognised + ds = make_topo_dataset(lon_name="xpos", lat_name="lat") + # Drop the axis/standard_name hints so it can't be found + ds["xpos"].attrs.clear() + path = topo_file_factory(ds=ds) + with NetCDFInterrogator(path) as intr: + with pytest.raises(ValueError, match="longitude"): + intr.interrogate("z") + + +# ============================================================ +# Fill value resolution +# ============================================================ + +@pytest.mark.parametrize("fill_cfg", FILL_VALUE_VARIANTS) +def test_fill_value_resolution(nc4_topo_file_factory, fill_cfg): + """ + _resolve_fill_value returns the correct value across all attribute + placement combinations. Conflicting _FillValue / missing_value must not + raise or warn from the interrogator (that is the normalizer's job). + + Note: xarray itself emits SerializationWarning when it opens a file that + has conflicting fill values; that warning is suppressed here because it + comes from xarray internals, not from any interrogator code. + """ + path = nc4_topo_file_factory( + fill_value=fill_cfg["fill_value"], + missing_value=fill_cfg["missing_value"], + ) + with warnings.catch_warnings(): + warnings.simplefilter("error") # any interrogator warning is a failure + # xarray emits SerializationWarning for conflicting fill values during + # open_dataset; suppress it so it does not mask real test failures. + warnings.filterwarnings("ignore", message=".*multiple fill values.*") + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + + expected = fill_cfg["expected"] + if expected is None: + assert meta.fill_value is None, ( + f"Expected fill_value=None, got {meta.fill_value}" + ) + else: + assert meta.fill_value == pytest.approx(expected), ( + f"Expected fill_value≈{expected}, got {meta.fill_value}" + ) + + +def test_conflicting_fill_values_no_interrogator_warning(nc4_topo_file_factory): + """ + Conflicting _FillValue and missing_value must not produce a warning from + NetCDFInterrogator. (Warnings are the CFNormalizer's responsibility.) + + xarray itself emits SerializationWarning when opening such a file; that + warning is excluded from the assertion because it originates in xarray + internals, not in any interrogator code. + """ + path = nc4_topo_file_factory(fill_value=-9999.0, missing_value=-8888.0) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + with NetCDFInterrogator(path) as intr: + intr.interrogate("z") + + # Filter out xarray's own SerializationWarning about multiple fill values. + interrogator_warnings = [ + w for w in caught + if "multiple fill values" not in str(w.message) + ] + assert not interrogator_warnings, ( + f"Interrogator emitted unexpected warning(s): " + f"{[str(w.message) for w in interrogator_warnings]}" + ) + + +# ============================================================ +# Crop bound validation +# ============================================================ + +def test_crop_bounds_inside_extent_passes(topo_file_factory): + """ + crop_bounds fully inside the file extent does not raise. + """ + path = topo_file_factory(lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + with NetCDFInterrogator(path, crop_bounds=(-5.0, 5.0, -5.0, 5.0)) as intr: + meta = intr.interrogate("z") + assert meta.crop_bounds == (-5.0, 5.0, -5.0, 5.0) + + +@pytest.mark.parametrize("crop_bounds,description", [ + ((-20.0, 5.0, -5.0, 5.0), "lon_min too small"), + ((-5.0, 20.0, -5.0, 5.0), "lon_max too large"), + ((-5.0, 5.0, -20.0, 5.0), "lat_min too small"), + ((-5.0, 5.0, -5.0, 20.0), "lat_max too large"), +]) +def test_crop_bounds_outside_extent_raises(topo_file_factory, crop_bounds, + description): + """ + crop_bounds that exceed the file spatial extent raise ValueError with a + message that identifies which bound is out of range. + """ + path = topo_file_factory(lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + with NetCDFInterrogator(path, crop_bounds=crop_bounds) as intr: + with pytest.raises(ValueError): + intr.interrogate("z") + + +# ============================================================ +# Laziness: no data arrays loaded during interrogation +# ============================================================ + +def test_variables_remain_lazy_after_interrogation(topo_file_factory): + """ + Opening the interrogator with chunks={} keeps variables as Dask arrays. + interrogate() must not trigger a full data load. + """ + pytest.importorskip("dask") + import dask.array as da + + path = topo_file_factory() + with NetCDFInterrogator(path) as intr: + intr.interrogate("z") + # The raw data backing the variable should still be a dask array + assert isinstance(intr.ds["z"].data, da.Array), ( + "Variable 'z' should remain a Dask array after interrogation " + "(no full .compute() should have been triggered)" + ) + + +# ============================================================ +# Return type +# ============================================================ + +def test_interrogate_returns_file_metadata(topo_file_factory): + """interrogate() returns a FileMetadata instance.""" + path = topo_file_factory() + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert isinstance(meta, FileMetadata) + + +def test_interrogate_missing_variable_raises(topo_file_factory): + """interrogate() raises KeyError when the requested variable is absent.""" + path = topo_file_factory() + with NetCDFInterrogator(path) as intr: + with pytest.raises(KeyError, match="not_a_var"): + intr.interrogate("not_a_var") + + +# ============================================================ +# All coordinate variant combinations (smoke tests) +# ============================================================ + +@pytest.mark.parametrize("coord_kwargs", COORD_VARIANTS) +def test_coord_variants_smoke(topo_file_factory, coord_kwargs): + """ + interrogate() completes without error for every coord variant and + reports the expected convention and order. + """ + path = topo_file_factory(**coord_kwargs) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + + lon_max = coord_kwargs["lon_max"] + lat_dir = coord_kwargs["lat_direction"] + expected_conv = 360 if lon_max > 180 else 180 + expected_order = lat_dir + + assert meta.lon_convention == expected_conv + assert meta.lat_order == expected_order diff --git a/tests/netcdf/test_cf_normalizer.py b/tests/netcdf/test_cf_normalizer.py new file mode 100644 index 000000000..e889d6220 --- /dev/null +++ b/tests/netcdf/test_cf_normalizer.py @@ -0,0 +1,324 @@ +""" +Tests for CFNormalizer. + +Covers: coordinate renaming, axis/standard_name/units attribute injection, +_FillValue / missing_value conflict resolution, unknown variable passthrough, +and idempotency. +""" +import warnings + +import numpy as np +import pytest +import xarray as xr + +from clawpack.geoclaw.netcdf_utils import CFNormalizer + +from ._helpers import make_topo_dataset + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + + +# ============================================================ +# Helpers +# ============================================================ + +def _simple_ds(lon_name="lon", lat_name="lat", time_name=None) -> xr.Dataset: + """ + Build a minimal Dataset whose coord names may be non-standard. + No axis / standard_name attributes. + + When time_name is given, the data variable includes time as a dimension so + that CFNormalizer.rename() (which renames dimensions too) does not raise a + CoordinateValidationError about dimensions not belonging to any variable. + """ + lons = np.linspace(-10.0, 10.0, 4) + lats = np.linspace(-10.0, 10.0, 4) + coords: dict = { + lon_name: xr.DataArray(lons, dims=[lon_name]), + lat_name: xr.DataArray(lats, dims=[lat_name]), + } + + if time_name is not None: + import pandas as pd + times = pd.date_range("2020-01-01", periods=3, freq="6h") + coords[time_name] = xr.DataArray(times, dims=[time_name]) + data = np.ones((3, 4, 4), dtype=np.float32) + da = xr.DataArray(data, dims=[time_name, lat_name, lon_name], + coords=coords, attrs={"units": "m"}) + else: + data = np.ones((4, 4), dtype=np.float32) + da = xr.DataArray(data, dims=[lat_name, lon_name], coords=coords, + attrs={"units": "m"}) + + return xr.Dataset({"z": da}) + + +def _normalize(ds: xr.Dataset) -> xr.Dataset: + return CFNormalizer(ds).normalize() + + +# ============================================================ +# Coordinate renaming +# ============================================================ + +@pytest.mark.parametrize("alias", ["lon", "x", "nav_lon", "LON", "Longitude"]) +def test_lon_alias_renamed_to_longitude(alias): + """ + Common longitude aliases are renamed to 'longitude'. + """ + ds = _simple_ds(lon_name=alias) + ds_norm = _normalize(ds) + assert "longitude" in ds_norm.coords, ( + f"Expected 'longitude' in coords after renaming '{alias}'" + ) + + +@pytest.mark.parametrize("alias", ["lat", "y", "nav_lat", "LAT", "Latitude"]) +def test_lat_alias_renamed_to_latitude(alias): + """ + Common latitude aliases are renamed to 'latitude'. + """ + ds = _simple_ds(lat_name=alias) + ds_norm = _normalize(ds) + assert "latitude" in ds_norm.coords, ( + f"Expected 'latitude' in coords after renaming '{alias}'" + ) + + +@pytest.mark.parametrize("alias", ["t", "TIME", "valid_time", "Time"]) +def test_time_alias_renamed_to_time(alias): + """ + Common time aliases are renamed to 'time'. + """ + ds = _simple_ds(time_name=alias) + ds_norm = _normalize(ds) + assert "time" in ds_norm.coords, ( + f"Expected 'time' in coords after renaming '{alias}'" + ) + + +def test_already_canonical_names_not_duplicated(): + """ + A dataset already using 'longitude', 'latitude', 'time' is left unchanged + and no duplicate coordinates are introduced. + """ + ds = _simple_ds(lon_name="longitude", lat_name="latitude", + time_name="time") + ds_norm = _normalize(ds) + assert "longitude" in ds_norm.coords + assert "latitude" in ds_norm.coords + assert len(ds_norm.coords) == len(ds.coords) + + +def test_unrecognised_coord_name_not_renamed(): + """ + A coordinate with a name that does not match any alias (e.g. 'x_pos') + is not renamed. + """ + ds = _simple_ds(lon_name="x_pos", lat_name="y_pos") + ds_norm = _normalize(ds) + assert "x_pos" in ds_norm.coords + assert "longitude" not in ds_norm.coords + + +# ============================================================ +# Attribute injection +# ============================================================ + +def test_standard_name_added_to_longitude(): + """standard_name='longitude' is added to a renamed longitude coord.""" + ds = _simple_ds(lon_name="lon") + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs.get("standard_name") == "longitude" + + +def test_standard_name_added_to_latitude(): + """standard_name='latitude' is added to a renamed latitude coord.""" + ds = _simple_ds(lat_name="lat") + ds_norm = _normalize(ds) + assert ds_norm["latitude"].attrs.get("standard_name") == "latitude" + + +def test_axis_x_added_to_longitude(): + """axis='X' is added to the longitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs.get("axis") == "X" + + +def test_axis_y_added_to_latitude(): + """axis='Y' is added to the latitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["latitude"].attrs.get("axis") == "Y" + + +def test_axis_t_added_to_time(): + """axis='T' is added to the time coordinate.""" + ds = _simple_ds(time_name="time") + ds_norm = _normalize(ds) + assert ds_norm["time"].attrs.get("axis") == "T" + + +def test_units_degrees_east_added_to_longitude(): + """units='degrees_east' is added to the longitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs.get("units") == "degrees_east" + + +def test_units_degrees_north_added_to_latitude(): + """units='degrees_north' is added to the latitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["latitude"].attrs.get("units") == "degrees_north" + + +def test_existing_attrs_not_overwritten(): + """ + Attributes already present on a coordinate are not overwritten by the + normalizer (setdefault semantics). + """ + ds = _simple_ds(lon_name="lon") + ds["lon"].attrs["standard_name"] = "custom_longitude" + ds["lon"].attrs["axis"] = "custom_axis" + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs["standard_name"] == "custom_longitude" + assert ds_norm["longitude"].attrs["axis"] == "custom_axis" + + +# ============================================================ +# Fill value conflict resolution +# ============================================================ + +def test_only_missing_value_promoted_to_fill_value(): + """ + When only missing_value is present it is promoted to _FillValue and + missing_value is removed. + """ + ds = _simple_ds() + ds["z"].attrs["missing_value"] = -8888.0 + + ds_norm = _normalize(ds) + + assert ds_norm["z"].attrs.get("_FillValue") == -8888.0, ( + "missing_value should have been promoted to _FillValue" + ) + assert "missing_value" not in ds_norm["z"].attrs, ( + "missing_value should have been removed after promotion" + ) + + +def test_conflicting_fill_values_warns_and_uses_fill_value(): + """ + Conflicting _FillValue and missing_value emit a UserWarning and resolve + to _FillValue (CF precedence); missing_value is removed. + """ + ds = _simple_ds() + ds["z"].attrs["_FillValue"] = -9999.0 + ds["z"].attrs["missing_value"] = -8888.0 + + with pytest.warns(UserWarning, match="_FillValue|missing_value|conflict"): + ds_norm = _normalize(ds) + + assert ds_norm["z"].attrs.get("_FillValue") == -9999.0 + assert "missing_value" not in ds_norm["z"].attrs + + +def test_agreeing_fill_values_no_warning(): + """ + When _FillValue and missing_value agree, no warning is emitted and the + value is kept. + """ + ds = _simple_ds() + ds["z"].attrs["_FillValue"] = -9999.0 + ds["z"].attrs["missing_value"] = -9999.0 + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + ds_norm = _normalize(ds) + + assert not caught, ( + f"Unexpected warnings when fill values agree: " + f"{[str(w.message) for w in caught]}" + ) + assert "missing_value" not in ds_norm["z"].attrs + + +def test_neither_fill_value_no_change(): + """When no fill-value attributes are present the variable is unchanged.""" + ds = _simple_ds() + assert "_FillValue" not in ds["z"].attrs + assert "missing_value" not in ds["z"].attrs + + ds_norm = _normalize(ds) + + assert "_FillValue" not in ds_norm["z"].attrs + assert "missing_value" not in ds_norm["z"].attrs + + +# ============================================================ +# Unknown variables left untouched +# ============================================================ + +def test_unknown_data_variable_attrs_unchanged(): + """ + Data variables with names that are not part of CF coordinate standards + are not modified (no silent attribute injection on arbitrary variables). + """ + ds = _simple_ds() + ds["z"].attrs["custom_attr"] = "do_not_touch" + original_attrs = dict(ds["z"].attrs) + + ds_norm = _normalize(ds) + + # custom_attr must still be present and unchanged + assert ds_norm["z"].attrs.get("custom_attr") == "do_not_touch" + + +# ============================================================ +# Original dataset not modified (copy semantics) +# ============================================================ + +def test_original_dataset_not_modified(): + """CFNormalizer operates on a copy; the input dataset is not modified.""" + ds = _simple_ds(lon_name="lon") + assert "longitude" not in ds.coords # confirm pre-condition + + _ = _normalize(ds) + + assert "longitude" not in ds.coords, ( + "CFNormalizer should not modify the original dataset" + ) + assert "lon" in ds.coords + + +# ============================================================ +# Idempotency +# ============================================================ + +def test_normalize_is_idempotent(): + """ + Calling CFNormalizer.normalize() twice produces the same result as once. + No attributes are doubled or changed on a second pass. + """ + ds = _simple_ds(lon_name="lon", lat_name="lat", time_name="time") + ds["z"].attrs["missing_value"] = -8888.0 # will be promoted on first pass + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + ds_once = _normalize(ds) + ds_twice = _normalize(ds_once) + + # Coordinate attributes should be the same + for coord in ("longitude", "latitude", "time"): + if coord in ds_once.coords: + assert ds_once[coord].attrs == ds_twice[coord].attrs, ( + f"Coord '{coord}' attrs differ between first and second normalize" + ) + + # Data variable attributes should be stable + for var in ds_once.data_vars: + assert ds_once[var].attrs == ds_twice[var].attrs, ( + f"Variable '{var}' attrs differ between first and second normalize" + ) diff --git a/tests/netcdf/test_descriptor_writer.py b/tests/netcdf/test_descriptor_writer.py new file mode 100644 index 000000000..a307e81de --- /dev/null +++ b/tests/netcdf/test_descriptor_writer.py @@ -0,0 +1,360 @@ +""" +Tests for DescriptorWriter. + +Covers: topo descriptor round-trip, met descriptor structure, optional +fields omitted when not specified, and crop_bounds presence/absence. +""" +import io +import re + +import pytest + +from clawpack.geoclaw.netcdf_utils import ( + DescriptorWriter, + TopoInterrogator, + MetInterrogator, + TopoMetadata, + MetMetadata, + MetVariableInfo, +) + +from ._helpers import make_topo_dataset, make_met_dataset + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + +_VAR_MAP = {"wind_u": "u10", "wind_v": "v10", "pressure": "msl"} + + +# ============================================================ +# Helpers +# ============================================================ + +def _parse_topo_descriptor(text: str) -> dict: + """ + Parse a topo descriptor block (key = value lines) into a dict. + Stops at the first blank line. + """ + result = {} + for line in text.splitlines(): + line = line.strip() + if not line: + break + if "=" in line: + key, _, value = line.partition("=") + result[key.strip()] = value.strip() + return result + + +def _parse_met_descriptor(text: str) -> dict: + """ + Parse a met descriptor written by write_met_descriptor into: + { + 'file_info': {key: value, ...}, + 'variable_info': [ {key: value, ...}, ... ] + } + """ + result: dict = {"file_info": {}, "variable_info": []} + in_file_info = False + in_var_info = False + current_var: dict = {} + + for line in text.splitlines(): + stripped = line.strip() + if stripped == "&file_info": + in_file_info = True + in_var_info = False + continue + if stripped.startswith("&variable_info"): + if in_var_info and current_var: + result["variable_info"].append(current_var) + in_file_info = False + in_var_info = True + current_var = {} + # variable_info content may be on the same line + rest = stripped[len("&variable_info"):].strip() + for kv in re.findall(r'(\w+)=(\S+)', rest): + current_var[kv[0]] = kv[1] + continue + if stripped == "/": + if in_file_info: + in_file_info = False + elif in_var_info: + if current_var: + result["variable_info"].append(current_var) + current_var = {} + in_var_info = False + continue + if in_file_info and "=" in stripped: + key, _, value = stripped.partition("=") + result["file_info"][key.strip()] = value.strip() + if in_var_info and "=" in stripped: + for kv in re.findall(r'(\w+)=(\S+)', stripped): + current_var[kv[0]] = kv[1] + + if in_var_info and current_var: + result["variable_info"].append(current_var) + + return result + + +def _get_topo_meta(topo_file_factory, **kwargs) -> TopoMetadata: + """Build a TopoMetadata via interrogation for use in writer tests.""" + import warnings + path = topo_file_factory(**kwargs) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with TopoInterrogator(path, var_name="z") as intr: + return intr.interrogate_topo() + + +def _get_met_meta(met_file_factory, **kwargs) -> MetMetadata: + """Build a MetMetadata via interrogation for use in writer tests.""" + import warnings + path = met_file_factory(**kwargs) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + return intr.interrogate_met() + + +# ============================================================ +# Topo descriptor +# ============================================================ + +def test_topo_descriptor_required_keys_present(topo_file_factory): + """ + write_topo_descriptor emits all mandatory keys: var_name, lon_name, + lat_name, lon_offset, lat_order, dim_order, fill_action. + """ + meta = _get_topo_meta(topo_file_factory) + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + for key in ("var_name", "lon_name", "lat_name", "lon_offset", + "lat_order", "dim_order", "fill_action"): + assert key in parsed, f"Missing required key '{key}' in topo descriptor" + + +def test_topo_descriptor_values_match_metadata(topo_file_factory): + """ + Values in the topo descriptor text match the TopoMetadata they came from. + """ + meta = _get_topo_meta(topo_file_factory, + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + lat_direction="N_to_S") + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert parsed["var_name"] == meta.var_name + assert parsed["lon_name"] == meta.lon_name + assert parsed["lat_name"] == meta.lat_name + assert float(parsed["lon_offset"]) == pytest.approx(meta.lon_offset) + assert parsed["lat_order"] == meta.lat_order + assert parsed["dim_order"] == ",".join(meta.dim_order) + assert parsed["fill_action"] == meta.fill_action + + +def test_topo_descriptor_fill_value_omitted_when_none(topo_file_factory): + """fill_value line is absent from descriptor when TopoMetadata.fill_value is None.""" + meta = _get_topo_meta(topo_file_factory) + # Force fill_value to None in the metadata + import dataclasses + meta = dataclasses.replace(meta, fill_value=None) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "fill_value" not in parsed + + +def test_topo_descriptor_fill_value_present_when_set(topo_file_factory): + """fill_value line IS written when TopoMetadata.fill_value is not None.""" + import dataclasses + meta = _get_topo_meta(topo_file_factory) + meta = dataclasses.replace(meta, fill_value=-9999.0) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "fill_value" in parsed + assert float(parsed["fill_value"]) == pytest.approx(-9999.0) + + +def test_topo_descriptor_crop_bounds_omitted_when_none(topo_file_factory): + """crop_bounds line is absent when no crop_bounds were specified.""" + meta = _get_topo_meta(topo_file_factory) + assert meta.crop_bounds is None # confirm fixture has no crop + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "crop_bounds" not in parsed + + +def test_topo_descriptor_crop_bounds_present_when_set(topo_file_factory): + """crop_bounds line is written with all four values when specified.""" + import dataclasses + meta = _get_topo_meta(topo_file_factory, + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + meta = dataclasses.replace(meta, crop_bounds=(-5.0, 5.0, -5.0, 5.0)) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "crop_bounds" in parsed + parts = parsed["crop_bounds"].split() + assert len(parts) == 4 + lon0, lon1, lat0, lat1 = map(float, parts) + assert lon0 == pytest.approx(-5.0) + assert lon1 == pytest.approx(5.0) + assert lat0 == pytest.approx(-5.0) + assert lat1 == pytest.approx(5.0) + + +def test_descriptor_writes_lon_offset_not_convention(topo_file_factory): + """ + Topo descriptor must contain 'lon_offset' as a parseable float and must + NOT contain 'lon_convention'. This verifies the format-version-2 change. + """ + import dataclasses + meta = _get_topo_meta(topo_file_factory) + # Exercise a non-zero offset to confirm the value round-trips. + meta = dataclasses.replace(meta, lon_offset=-360.0) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + text = buf.getvalue() + parsed = _parse_topo_descriptor(text) + + assert "lon_offset" in parsed, "Descriptor must contain 'lon_offset'" + assert "lon_convention" not in parsed, ( + "Descriptor must NOT contain 'lon_convention' (superseded by lon_offset)" + ) + assert float(parsed["lon_offset"]) == pytest.approx(-360.0) + + +def test_topo_descriptor_ends_with_blank_line(topo_file_factory): + """ + Descriptor block ends with a blank line so the Fortran parser can detect + the end of the block. + """ + meta = _get_topo_meta(topo_file_factory) + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + text = buf.getvalue() + assert text.endswith("\n\n"), ( + "Topo descriptor must end with a blank line (two consecutive newlines) " + "for the Fortran read loop to terminate correctly" + ) + + +# ============================================================ +# Met descriptor +# ============================================================ + +def test_met_descriptor_file_info_block_present(met_file_factory): + """write_met_descriptor writes a &file_info block.""" + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert parsed["file_info"], "file_info block is missing or empty" + + +def test_met_descriptor_file_info_required_keys(met_file_factory): + """ + &file_info block contains all mandatory keys: lon_name, lat_name, + time_name, dim_order, lon_convention, lat_order, fill_action, time_offset. + """ + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + fi = parsed["file_info"] + + for key in ("lon_name", "lat_name", "time_name", + "dim_order", "lon_convention", "lat_order", + "fill_action", "time_offset"): + assert key in fi, f"Missing key '{key}' in &file_info block" + + +def test_met_descriptor_variable_info_blocks_for_all_roles(met_file_factory): + """ + One &variable_info block is written for each entry in variable_map. + """ + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert len(parsed["variable_info"]) == len(_VAR_MAP), ( + f"Expected {len(_VAR_MAP)} variable_info blocks, " + f"got {len(parsed['variable_info'])}" + ) + + +def test_met_descriptor_variable_info_roles_present(met_file_factory): + """ + Each &variable_info block contains geoclaw_role and var_name. + """ + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + for block in parsed["variable_info"]: + assert "var_name" in block, ( + f"variable_info block missing 'var_name': {block}" + ) + assert "geoclaw_role" in block, ( + f"variable_info block missing 'geoclaw_role': {block}" + ) + + +def test_met_descriptor_crop_bounds_omitted_when_none(met_file_factory): + """crop_bounds is absent from &file_info when not specified.""" + meta = _get_met_meta(met_file_factory) + assert meta.crop_bounds is None + + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert "crop_bounds" not in parsed["file_info"] + + +def test_met_descriptor_crop_bounds_present_when_set(met_file_factory): + """crop_bounds appears in &file_info when crop_bounds are specified.""" + import dataclasses + meta = _get_met_meta(met_file_factory, + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + meta = dataclasses.replace(meta, crop_bounds=(-5.0, 5.0, -5.0, 5.0)) + + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert "crop_bounds" in parsed["file_info"] + + +def test_met_descriptor_fill_value_omitted_when_none(met_file_factory): + """fill_value line is absent from &file_info when MetMetadata.fill_value is None.""" + import dataclasses + meta = _get_met_meta(met_file_factory) + meta = dataclasses.replace(meta, fill_value=None) + + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert "fill_value" not in parsed["file_info"] diff --git a/tests/netcdf/test_met_interrogator.py b/tests/netcdf/test_met_interrogator.py new file mode 100644 index 000000000..ef2a75411 --- /dev/null +++ b/tests/netcdf/test_met_interrogator.py @@ -0,0 +1,296 @@ +""" +Tests for MetInterrogator. + +Covers: variable presence/absence, grid consistency, unit passthrough and +conversion, time offset calculation, ensemble dimension handling, and +descriptor output structure. +""" +import warnings + +import numpy as np +import pandas as pd +import pytest + +from clawpack.geoclaw.netcdf_utils import MetInterrogator, MetMetadata + +from ._helpers import ( + make_met_dataset, + COORD_VARIANTS, + MET_DIM_ORDER_VARIANTS, + PRESSURE_UNIT_VARIANTS, + WIND_UNIT_VARIANTS, +) + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + +# Default variable map used throughout +_VAR_MAP = {"wind_u": "u10", "wind_v": "v10", "pressure": "msl"} + + +# ============================================================ +# Variable presence / absence +# ============================================================ + +def test_all_variables_present_passes(met_file_factory): + """All required variables present: interrogate_met() returns MetMetadata.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + assert isinstance(meta, MetMetadata) + + +@pytest.mark.parametrize("missing_role", ["wind_u", "wind_v", "pressure"]) +def test_missing_variable_raises(met_file_factory, missing_role): + """ + interrogate_met() raises KeyError with a message identifying the missing + variable when one required variable is absent from the file. + """ + path = met_file_factory(omit_roles=[missing_role]) + expected_var = _VAR_MAP[missing_role] + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(KeyError, match=expected_var): + intr.interrogate_met() + + +def test_empty_variable_map_raises(met_file_factory): + """An empty variable_map raises ValueError before any file inspection.""" + path = met_file_factory() + with MetInterrogator(path, variable_map={}) as intr: + with pytest.raises(ValueError, match="variable_map"): + intr.interrogate_met() + + +# ============================================================ +# Grid / dimension consistency +# ============================================================ + +def test_mismatched_variable_dims_raises(met_file_factory): + """ + interrogate_met() raises ValueError when variables do not share + the same set of dimensions. + """ + path = met_file_factory(mismatch_dims=True) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Gg]rid|[Dd]im"): + intr.interrogate_met() + + +def test_missing_time_coord_raises(met_file_factory): + """ + A file with no time coordinate raises ValueError (met forcing requires + a time axis). + """ + import xarray as xr + ds = make_met_dataset() + # Remove the time dimension by selecting a single time step + ds_no_time = ds.isel(time=0).drop_vars("time") + path = met_file_factory(ds=ds_no_time) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Tt]ime"): + intr.interrogate_met() + + +# ============================================================ +# Ensemble dimension handling +# ============================================================ + +def test_singleton_ensemble_dim_passes(met_file_factory): + """ + A singleton extra dimension (e.g. a single ensemble member) does not + cause interrogate_met() to raise. + """ + path = met_file_factory(extra_dim=("member", 1)) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + assert isinstance(meta, MetMetadata) + + +def test_non_singleton_ensemble_dim_raises(met_file_factory): + """ + A non-singleton extra dimension raises ValueError — GeoClaw does not + support ensemble/member dimensions. + """ + path = met_file_factory(extra_dim=("member", 3)) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Ee]nsemble|[Mm]ember|singleton"): + intr.interrogate_met() + + +# ============================================================ +# Unit verification and passthrough +# ============================================================ + +@pytest.mark.parametrize("unit_cfg", PRESSURE_UNIT_VARIANTS) +def test_pressure_unit_variants(met_file_factory, unit_cfg): + """ + Pressure unit recorded in MetVariableInfo.source_units matches the unit + string in the file. Conversion-needed cases emit a warning; Pa passes + silently. + """ + pressure_units = unit_cfg["pressure_units"] + expected_source = unit_cfg["expected_source"] + + path = met_file_factory(pressure_units=pressure_units) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + pressure_info = next(v for v in meta.variables if v.geoclaw_role == "pressure") + assert pressure_info.source_units == expected_source, ( + f"For pressure_units={pressure_units!r}: expected source_units=" + f"{expected_source!r}, got {pressure_info.source_units!r}" + ) + + +@pytest.mark.parametrize("unit_cfg", WIND_UNIT_VARIANTS) +def test_wind_unit_variants(met_file_factory, unit_cfg): + """ + Wind unit recorded in MetVariableInfo.source_units matches the unit + string in the file. knots triggers a warning; m/s passes silently. + """ + wind_units = unit_cfg["wind_units"] + expected_source = unit_cfg["expected_source"] + + path = met_file_factory(wind_units=wind_units) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + wind_u_info = next(v for v in meta.variables if v.geoclaw_role == "wind_u") + assert wind_u_info.source_units == expected_source + + +def test_contract_units_no_warning(met_file_factory): + """ + Files already in contract units (Pa, m/s) do not emit any unit warnings. + """ + path = met_file_factory(pressure_units="Pa", wind_units="m/s") + with warnings.catch_warnings(): + warnings.simplefilter("error") + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + intr.interrogate_met() + + +def test_unrecognised_pressure_unit_raises(met_file_factory): + """An unrecognised pressure unit string raises ValueError.""" + path = met_file_factory(pressure_units="atm_weird_unknown") + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Uu]nrecogni"): + intr.interrogate_met() + + +# ============================================================ +# Time offset calculation +# ============================================================ + +def test_time_offset_unix_epoch_default(met_file_factory): + """ + With no time_reference, time_offset is seconds between the Unix epoch + (1970-01-01) and the first time value in the file. + """ + time_start = "2020-01-01" + path = met_file_factory(time_start=time_start) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + expected_offset = pd.Timestamp(time_start).timestamp() + assert meta.time_offset == pytest.approx(expected_offset, rel=1e-6), ( + f"Expected time_offset≈{expected_offset}, got {meta.time_offset}" + ) + + +def test_time_offset_custom_reference(met_file_factory): + """ + time_offset is zero when time_reference matches the first time in the file. + """ + time_start = "2020-06-15T12:00:00" + path = met_file_factory(time_start=time_start) + with MetInterrogator(path, variable_map=_VAR_MAP, + time_reference=time_start) as intr: + meta = intr.interrogate_met() + assert meta.time_offset == pytest.approx(0.0, abs=1.0), ( + f"Expected time_offset≈0, got {meta.time_offset}" + ) + + +# ============================================================ +# MetMetadata structure +# ============================================================ + +def test_metadata_variables_all_roles_present(met_file_factory): + """ + MetMetadata.variables contains one entry per variable_map role. + """ + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + roles = {v.geoclaw_role for v in meta.variables} + assert roles == set(_VAR_MAP.keys()), ( + f"Expected roles {set(_VAR_MAP.keys())}, got {roles}" + ) + + +def test_metadata_variable_names_match_map(met_file_factory): + """MetVariableInfo.var_name matches the value in variable_map.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + for v in meta.variables: + assert v.var_name == _VAR_MAP[v.geoclaw_role], ( + f"Role {v.geoclaw_role!r}: expected var_name=" + f"{_VAR_MAP[v.geoclaw_role]!r}, got {v.var_name!r}" + ) + + +def test_fill_action_default_is_warn(met_file_factory): + """Default fill_action for MetInterrogator is 'warn'.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + assert meta.fill_action == "warn" + + +def test_fill_action_abort_propagated(met_file_factory): + """fill_action='abort' is forwarded into MetMetadata.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP, + fill_action="abort") as intr: + meta = intr.interrogate_met() + assert meta.fill_action == "abort" + + +def test_invalid_fill_action_raises(met_file_factory): + """ + Passing an invalid fill_action raises ValueError at construction. + + A real file path is required because MetInterrogator opens the file in + super().__init__() before the fill_action validation check fires. + """ + path = met_file_factory() + with pytest.raises(ValueError, match="fill_action"): + MetInterrogator(path, variable_map=_VAR_MAP, fill_action="ignore") + + +# ============================================================ +# All coordinate and dim-order variants +# ============================================================ + +@pytest.mark.parametrize("coord_kwargs", COORD_VARIANTS) +def test_coord_variants(met_file_factory, coord_kwargs): + """interrogate_met() completes for every coordinate variant.""" + path = met_file_factory(**coord_kwargs) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + expected_conv = 360 if coord_kwargs["lon_max"] > 180 else 180 + assert meta.lon_convention == expected_conv + + +@pytest.mark.parametrize("dim_kwargs", MET_DIM_ORDER_VARIANTS) +def test_dim_order_variants(met_file_factory, dim_kwargs): + """interrogate_met() reports correct dim_order for all axis arrangements.""" + path = met_file_factory(**dim_kwargs) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + expected = [{"lat": "lat", "lon": "lon", "time": "time"}.get(d, d) + for d in dim_kwargs["dim_order"]] + assert meta.dim_order == expected diff --git a/tests/netcdf/test_topo_interrogator.py b/tests/netcdf/test_topo_interrogator.py new file mode 100644 index 000000000..1e804bc35 --- /dev/null +++ b/tests/netcdf/test_topo_interrogator.py @@ -0,0 +1,471 @@ +""" +Tests for TopoInterrogator. + +Covers: fill-in-crop detection, fill-outside-crop pass, unit verification, +unit-conversion warn path, missing units warn path, descriptor key presence, +all coordinate variant combinations, and topo_entries() lon wrapping. +""" +import warnings + +import pytest + +from clawpack.geoclaw.netcdf_utils import TopoInterrogator, TopoMetadata + +from ._helpers import ( + make_topo_dataset, + COORD_VARIANTS, + TOPO_CF_STANDARD_NAME_VARIANTS, + TOPO_DIM_ORDER_VARIANTS, + TOPO_FALLBACK_NAME_VARIANTS, +) + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + + +# ============================================================ +# Fill value in / outside crop region +# ============================================================ + +def test_fill_in_crop_region_raises(topo_file_factory): + """ + TopoInterrogator raises ValueError when NaN is present within the + crop region. Silent NaN in bathymetry would corrupt simulations. + """ + path = topo_file_factory( + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + has_fill_in_data=True, # NaN placed at data.flat[0] + ) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-10.0, 10.0, -10.0, 10.0)) + with pytest.raises(ValueError, match="[Ff]ill"): + intr.interrogate_topo() + intr.close() + + +def test_fill_outside_crop_region_passes(topo_file_factory): + """ + TopoInterrogator passes when NaN exists only outside the crop region. + """ + # The dataset has NaN at data.flat[0] which corresponds to the corner + # of the full grid. We crop to the interior so the NaN is outside. + ds = make_topo_dataset( + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + nlat=10, nlon=10, + has_fill_in_data=True, # NaN is at lat[-10], lon[-10] (S-W corner) + lat_direction="S_to_N", + ) + # The NaN is at the south-west corner; crop to the north-east interior. + path = topo_file_factory(ds=ds) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(5.0, 10.0, 5.0, 10.0)) + meta = intr.interrogate_topo() # must not raise + intr.close() + assert isinstance(meta, TopoMetadata) + + +def test_no_fill_no_crop_passes(topo_file_factory): + """TopoInterrogator passes when there are no NaN values at all.""" + path = topo_file_factory(has_fill_in_data=False) + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + + +# ============================================================ +# Unit verification +# ============================================================ + +def test_units_meters_passes_without_warning(topo_file_factory): + """ + A variable whose units attribute is 'm' (or any meters alias) passes + unit verification without emitting a warning. + """ + path = topo_file_factory(units="m") + with warnings.catch_warnings(): + warnings.simplefilter("error") # any warning is a test failure + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.source_units in {"m", "meter", "meters", "metre", "metres"} + + +def test_units_convertible_warns_and_records_source(topo_file_factory): + """ + A variable with units convertible to meters (e.g. 'cm') triggers a + UserWarning and records the original unit string in source_units so + that a downstream conversion factor can be applied. + """ + path = topo_file_factory(units="cm") + with pytest.warns(UserWarning, match="cm"): + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.source_units == "cm", ( + f"Expected source_units='cm', got {meta.source_units!r}" + ) + + +def test_units_unrecognised_raises(topo_file_factory): + """ + A variable with units not in the known conversion table raises ValueError. + """ + path = topo_file_factory(units="furlongs") + with TopoInterrogator(path, var_name="z") as intr: + with pytest.raises(ValueError, match="[Uu]nrecogni"): + intr.interrogate_topo() + + +def test_units_missing_warns_and_assumes_meters(topo_file_factory): + """ + A variable with no units attribute emits a UserWarning and assumes 'm'. + The interrogation itself must not raise. + """ + path = topo_file_factory(units="") # empty string → no 'units' attr written + with pytest.warns(UserWarning): + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.source_units == "m", ( + f"Expected assumed source_units='m', got {meta.source_units!r}" + ) + + +# ============================================================ +# Descriptor output keys +# ============================================================ + +def test_interrogate_topo_returns_topo_metadata(topo_file_factory): + """interrogate_topo() returns a TopoMetadata instance.""" + path = topo_file_factory() + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + + +def test_topo_metadata_required_fields_present(topo_file_factory): + """ + TopoMetadata contains all required fields with valid values after + interrogation of a well-formed file. + """ + path = topo_file_factory( + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + units="m", + ) + # crop_bounds is passed to TopoInterrogator, not to the dataset factory. + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-5.0, 5.0, -5.0, 5.0)) + meta = intr.interrogate_topo() + intr.close() + + assert meta.var_name == "z" + assert meta.lon_name is not None and len(meta.lon_name) > 0 + assert meta.lat_name is not None and len(meta.lat_name) > 0 + assert meta.lon_convention in (180, 360) + assert meta.lat_order in ("N_to_S", "S_to_N") + assert len(meta.dim_order) == 2 + assert meta.fill_action == "abort" + assert meta.source_units == "m" + assert meta.crop_bounds == (-5.0, 5.0, -5.0, 5.0) + + +def test_topo_metadata_crop_bounds_none_when_not_specified(topo_file_factory): + """When no crop_bounds are given, TopoMetadata.crop_bounds is None.""" + path = topo_file_factory() + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.crop_bounds is None + + +# ============================================================ +# All coordinate variant combinations +# ============================================================ + +@pytest.mark.parametrize("coord_kwargs", COORD_VARIANTS) +def test_coord_variants(topo_file_factory, coord_kwargs): + """ + interrogate_topo() completes without error for every coordinate variant + and reports consistent lon_convention / lat_order values. + """ + path = topo_file_factory(**coord_kwargs) + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + + expected_conv = 360 if coord_kwargs["lon_max"] > 180 else 180 + assert meta.lon_convention == expected_conv + assert meta.lat_order == coord_kwargs["lat_direction"] + + +@pytest.mark.parametrize("dim_kwargs", TOPO_DIM_ORDER_VARIANTS) +def test_dim_order_variants(topo_file_factory, dim_kwargs): + """ + interrogate_topo() reports the correct dim_order regardless of how + the variable axes are arranged in the file. + """ + path = topo_file_factory(**dim_kwargs) + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.dim_order == list(dim_kwargs["dim_order"]) + + +# ============================================================ +# Variable auto-detection (no var_name supplied) +# ============================================================ + +@pytest.mark.parametrize("std_name", TOPO_CF_STANDARD_NAME_VARIANTS) +def test_autodetect_by_cf_standard_name(topo_file_factory, std_name): + """ + When var_name is omitted, TopoInterrogator finds the elevation variable + by matching the CF standard_name attribute. The matched name is written + back into the returned metadata. + """ + ds = make_topo_dataset(var_name="topo_data", var_standard_name=std_name) + path = topo_file_factory(ds=ds) + with TopoInterrogator(path) as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + assert meta.var_name == "topo_data" + + +@pytest.mark.parametrize("var_name", TOPO_FALLBACK_NAME_VARIANTS) +def test_autodetect_by_fallback_name(topo_file_factory, var_name): + """ + When var_name is omitted, TopoInterrogator falls back to matching the + variable name against the built-in list of common elevation names. + """ + path = topo_file_factory(var_name=var_name) + with TopoInterrogator(path) as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + assert meta.var_name == var_name + + +def test_autodetect_no_match_raises(topo_file_factory): + """ + When var_name is omitted and no variable name or CF standard_name matches + any known elevation identifier, interrogate_topo() raises ValueError with + a message that mentions auto-detect and lists available variables. + """ + path = topo_file_factory(var_name="completely_unknown_field") + with TopoInterrogator(path) as intr: + with pytest.raises(ValueError, match="auto-detect"): + intr.interrogate_topo() + + +# ============================================================ +# topo_entries() — longitude offset and wrap logic +# ============================================================ + +def test_topo_entries_no_wrap(topo_file_factory): + """ + File [-180, 180], crop (-90, 0, -45, 45): single entry, offset 0, + crop_bounds in metadata equal the requested file-coord crop. + """ + path = topo_file_factory(lon_min=-180.0, lon_max=180.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-90.0, 0.0, -45.0, 45.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 1 + ttype, fpath, meta = entries[0] + assert ttype == 4 + assert meta.lon_offset == pytest.approx(0.0) + assert meta.crop_bounds is not None + lon0, lon1, lat0, lat1 = meta.crop_bounds + assert lon0 == pytest.approx(-90.0) + assert lon1 == pytest.approx(0.0) + assert lat0 == pytest.approx(-45.0) + assert lat1 == pytest.approx(45.0) + + +def test_topo_entries_simple_shift(topo_file_factory): + """ + File [0, 360], crop (0, 180, -45, 45): single entry with lon_offset 0.0. + """ + path = topo_file_factory(lon_min=0.0, lon_max=360.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(0.0, 180.0, -45.0, 45.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 1 + assert entries[0][2].lon_offset == pytest.approx(0.0) + + +def test_topo_entries_simple_shift_negative(topo_file_factory): + """ + File [0, 360], crop (-180, 0, -45, 45): 1 or 2 entries are both valid; + combined domain coverage must equal 180 degrees. + """ + path = topo_file_factory(lon_min=0.0, lon_max=360.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-180.0, 0.0, -45.0, 45.0)) + entries = intr.topo_entries() + intr.close() + + assert 1 <= len(entries) <= 2 + # Total lon coverage across all entries must equal the requested domain width. + total = sum( + meta.crop_bounds[1] - meta.crop_bounds[0] + for _, _, meta in entries + if meta.crop_bounds is not None + ) + assert total == pytest.approx(180.0) + + +def test_topo_entries_wrap_required(topo_file_factory): + """ + File [-180, 180], domain (-360, 0): requires two entries. + Entry 1: file_crop [-180, 0], lon_offset 0.0. + Entry 2: file_crop [0, 180], lon_offset -360.0. + Combined domain coverage == 360 degrees. + """ + path = topo_file_factory(lon_min=-180.0, lon_max=180.0, + lat_min=-90.0, lat_max=90.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-360.0, 0.0, -90.0, 90.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 2 + + # Collect (file_crop_min, file_crop_max, lon_offset) pairs (order not mandated). + pairs = { + (meta.crop_bounds[0], meta.crop_bounds[1], meta.lon_offset) + for _, _, meta in entries + if meta.crop_bounds is not None + } + assert (-180.0, 0.0, 0.0) in pairs + assert (0.0, 180.0, -360.0) in pairs + + +def test_topo_entries_no_coverage(topo_file_factory): + """ + File [0, 90], domain [-180, -90]: no candidate offset can cover the + domain; _compute_lon_entries raises ValueError. + """ + path = topo_file_factory(lon_min=0.0, lon_max=90.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-180.0, -90.0, -45.0, 45.0)) + with pytest.raises(ValueError): + intr.topo_entries() + intr.close() + + +def test_topo_entries_near_global_gap(topo_file_factory): + """ + Near-global file (e.g. GEBCO style) with lons [-179.99, 179.99] has a + gap of ~0.0042 degrees at the dateline — one grid cell. A domain that + straddles the dateline (crop [-211, -99, 40, 75]) must still produce two + entries without raising ValueError, since the gap is within one grid + spacing. + """ + import numpy as np + import xarray as xr + + # 121 points from -179.99 to 179.99 gives spacing ~3.0 degrees; the + # "missing" wrap column sits between 179.99 and -179.99 + 360 = 180.01. + lons = np.linspace(-179.99, 179.99, 121) + lats = np.linspace(40.0, 75.0, 10) + data = np.full((len(lats), len(lons)), -500.0, dtype=np.float32) + coords = { + "lon": xr.DataArray(lons, dims=["lon"]), + "lat": xr.DataArray(lats, dims=["lat"]), + } + ds = xr.Dataset( + {"z": xr.DataArray(data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m"})} + ) + path = topo_file_factory(ds=ds) + + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-211.0, -99.0, 40.0, 75.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 2, ( + f"Expected 2 entries for dateline-straddling crop, got {len(entries)}" + ) + # The seam gap (domain width minus combined file-coord coverage) must be + # within one grid spacing. crop_bounds widths equal domain-coord widths + # because lon_offset is a constant shift. + lon_spacing = float(lons[1] - lons[0]) + total_covered = sum( + meta.crop_bounds[1] - meta.crop_bounds[0] + for _, _, meta in entries + if meta.crop_bounds is not None + ) + domain_width = -99.0 - (-211.0) + gap = domain_width - total_covered + assert gap <= lon_spacing, ( + f"Seam gap {gap:.6f} exceeds grid spacing {lon_spacing:.6f}" + ) + + +def test_topo_entries_genuine_gap_still_errors(topo_file_factory): + """ + File [-90, 90] genuinely cannot cover domain [-200, -50]: the uncovered + gap (~60 degrees) is far larger than the grid spacing, so ValueError + must still be raised even with the near-global-gap tolerance. + """ + import numpy as np + import xarray as xr + + lons = np.linspace(-90.0, 90.0, 91) + lats = np.linspace(40.0, 75.0, 10) + data = np.full((len(lats), len(lons)), -500.0, dtype=np.float32) + coords = { + "lon": xr.DataArray(lons, dims=["lon"]), + "lat": xr.DataArray(lats, dims=["lat"]), + } + ds = xr.Dataset( + {"z": xr.DataArray(data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m"})} + ) + path = topo_file_factory(ds=ds) + + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-200.0, -50.0, 40.0, 75.0)) + with pytest.raises(ValueError, match="[Gg]ap"): + intr.topo_entries() + intr.close() + + +def test_autodetect_cf_standard_name_takes_priority(topo_file_factory): + """ + CF standard_name detection takes priority over the fallback name list. + A dataset with both a CF-tagged variable and an 'elevation' variable must + return the CF-tagged one. + """ + import numpy as np + import xarray as xr + + lons = np.linspace(-10.0, 10.0, 5) + lats = np.linspace(-10.0, 10.0, 5) + coords = { + "lon": xr.DataArray(lons, dims=["lon"]), + "lat": xr.DataArray(lats, dims=["lat"]), + } + data = np.full((5, 5), -100.0, dtype=np.float32) + ds = xr.Dataset( + { + # CF-tagged variable: should be selected first + "cf_topo": xr.DataArray( + data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m", "standard_name": "surface_altitude"}, + ), + # Fallback-name variable: present but must not be selected + "elevation": xr.DataArray( + data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m"}, + ), + } + ) + path = topo_file_factory(ds=ds) + with TopoInterrogator(path) as intr: + meta = intr.interrogate_topo() + assert meta.var_name == "cf_topo" diff --git a/tests/test_storm.py b/tests/test_storm.py index afbf1a92d..2015139b3 100644 --- a/tests/test_storm.py +++ b/tests/test_storm.py @@ -51,6 +51,219 @@ def create_netcdf_storm_file(path): ds.to_netcdf(path) +def create_era5_storm_file(path, pressure_fields=None, u_fields=None, + v_fields=None, times=None, lon=None, lat=None): + """Create an ERA5-style CF-compliant NetCDF met-forcing file. + + ERA5 is a common source of met forcing for GeoClaw storm surge simulations. + This function produces the variable/dimension schema used by ECMWF ERA5 + reanalysis: dims ``(valid_time, latitude, longitude)``, variables + ``msl`` (Pa), ``u10`` (m/s), ``v10`` (m/s), lon in [0, 360], lat S-to-N, + and a ``Conventions = "CF-1.7"`` global attribute. + + The dimension order ``(valid_time, latitude, longitude)`` = ``(nt, nlat, + nlon)`` matches the C-row-major layout expected by the Fortran NetCDF reader + ``nf90_get_var`` when it fills its column-major ``pressure(nlon, nlat, nt)`` + array: the library reverses the dimension order automatically. + + Parameters + ---------- + path : Path or str + Output NetCDF file path. + pressure_fields : ndarray, shape (nt, nlat, nlon), optional + Pressure in Pa. Defaults to 101300 Pa everywhere. + u_fields : ndarray, shape (nt, nlat, nlon), optional + Eastward 10-m wind in m/s. Defaults to zero. + v_fields : ndarray, shape (nt, nlat, nlon), optional + Northward 10-m wind in m/s. Defaults to zero. + times : array-like of int, optional + Time values in **seconds since the time_offset epoch** used in the + accompanying .storm descriptor. Defaults to ``[0, 3600, 7200]``. + lon : array-like of float, optional + Longitude values in [0, 360]. Defaults to a 5-point sample grid. + lat : array-like of float, optional + Latitude values, S-to-N. Defaults to a 4-point sample grid. + """ + # netCDF4 is required as xarray's NetCDF backend. + pytest.importorskip("netCDF4") + xr = pytest.importorskip("xarray") + + # --- defaults (synthetic values for unit tests) --- + if lon is None: + lon = np.linspace(260.0, 300.0, 5) # [0, 360] convention + else: + lon = np.asarray(lon, dtype=float) + if lat is None: + lat = np.linspace(10.0, 35.0, 4) # S-to-N + else: + lat = np.asarray(lat, dtype=float) + if times is None: + times = np.array([0, 3600, 7200], dtype=np.int64) + else: + times = np.asarray(times, dtype=np.int64) + + nt, nlat, nlon = len(times), len(lat), len(lon) + shape = (nt, nlat, nlon) + + if pressure_fields is None: + pressure_fields = np.full(shape, 101300.0) + if u_fields is None: + u_fields = np.zeros(shape) + if v_fields is None: + v_fields = np.zeros(shape) + + # Use a fixed epoch string; callers that need a specific offset should + # convert their datetime values to integer seconds before the call. + time_epoch_str = "2012-08-29T00:00:00" + + ds = xr.Dataset( + { + "msl": ( + ["valid_time", "latitude", "longitude"], + pressure_fields, + { + "units": "Pa", + "standard_name": "air_pressure_at_mean_sea_level", + "long_name": "Mean sea level pressure", + }, + ), + "u10": ( + ["valid_time", "latitude", "longitude"], + u_fields, + { + "units": "m s-1", + "standard_name": "eastward_wind", + "long_name": "10 metre U wind component", + }, + ), + "v10": ( + ["valid_time", "latitude", "longitude"], + v_fields, + { + "units": "m s-1", + "standard_name": "northward_wind", + "long_name": "10 metre V wind component", + }, + ), + }, + coords={ + "longitude": ( + ["longitude"], + lon, + {"units": "degrees_east", "axis": "X", + "standard_name": "longitude"}, + ), + "latitude": ( + ["latitude"], + lat, + {"units": "degrees_north", "axis": "Y", + "standard_name": "latitude"}, + ), + "valid_time": ( + ["valid_time"], + times, + { + "units": f"seconds since {time_epoch_str}", + "axis": "T", + "standard_name": "time", + "long_name": "time", + }, + ), + }, + attrs={"Conventions": "CF-1.7"}, + ) + ds.to_netcdf(path) + + +def create_nws13_storm_file(path, pressure_fields=None, u_fields=None, + v_fields=None, times=None, lon=None, lat=None): + """Create a NWS13/OWI-NetCDF style met-forcing file. + + NWS13 (NOAA Weather Service format 13) is the OWI NetCDF met-forcing + schema used by ADCIRC and GeoClaw. This function produces the canonical + NWS13 variable and dimension names: dims ``(time, lat, lon)``, variables + ``uwnd`` (m/s), ``vwnd`` (m/s), ``press`` (mb). + + Pressure is stored in millibar (mb) to exercise the unit-awareness path + in tests. Note that the Fortran NetCDF reader does **not** perform unit + conversion; callers responsible for running Fortran should convert to Pa + before calling this function. + + The dimension order ``(time, lat, lon)`` = ``(nt, nlat, nlon)`` is + correct for the Fortran column-major allocation ``pressure(nlon, nlat, + nt)`` via the library-managed Fortran–C transpose. + + Parameters + ---------- + path : Path or str + Output NetCDF file path. + pressure_fields : ndarray, shape (nt, nlat, nlon), optional + Pressure in **mb**. Defaults to 1013.0 mb everywhere. + u_fields : ndarray, shape (nt, nlat, nlon), optional + Eastward wind in m/s. Defaults to zero. + v_fields : ndarray, shape (nt, nlat, nlon), optional + Northward wind in m/s. Defaults to zero. + times : array-like of int, optional + Time in seconds (absolute UNIX seconds or relative to an epoch). + Defaults to ``[0, 3600, 7200]``. + lon : array-like of float, optional + Longitude values. Defaults to a 5-point sample. + lat : array-like of float, optional + Latitude values. Defaults to a 4-point sample. + """ + pytest.importorskip("netCDF4") + xr = pytest.importorskip("xarray") + + if lon is None: + lon = np.linspace(-99.0, -70.0, 5) + else: + lon = np.asarray(lon, dtype=float) + if lat is None: + lat = np.linspace(8.0, 32.0, 4) + else: + lat = np.asarray(lat, dtype=float) + if times is None: + times = np.array([0, 3600, 7200], dtype=np.int64) + else: + times = np.asarray(times, dtype=np.int64) + + nt, nlat, nlon = len(times), len(lat), len(lon) + shape = (nt, nlat, nlon) + + if pressure_fields is None: + pressure_fields = np.full(shape, 1013.0) # mb + if u_fields is None: + u_fields = np.zeros(shape) + if v_fields is None: + v_fields = np.zeros(shape) + + ds = xr.Dataset( + { + "press": ( + ["time", "lat", "lon"], + pressure_fields, + {"units": "mb", "long_name": "Sea-level pressure"}, + ), + "uwnd": ( + ["time", "lat", "lon"], + u_fields, + {"units": "m/s", "long_name": "U-component of wind"}, + ), + "vwnd": ( + ["time", "lat", "lon"], + v_fields, + {"units": "m/s", "long_name": "V-component of wind"}, + ), + }, + coords={ + "lon": (["lon"], lon, {"units": "degrees_east"}), + "lat": (["lat"], lat, {"units": "degrees_north"}), + "time": (["time"], times, {"units": "seconds since 1970-01-01T00:00:00"}), + }, + ) + ds.to_netcdf(path) + + def check_geoclaw(paths, check_header=False): """ Check that two GeoClaw-formatted storm files are numerically equivalent. @@ -167,17 +380,29 @@ def save_storm_test_data(output_dir): @pytest.mark.python -@pytest.mark.parametrize("file_format", ["ascii", "netcdf"]) +@pytest.mark.parametrize("file_format", + ["ascii", "netcdf", "netcdf_era5", "netcdf_nws13"]) def test_data_storm_roundtrip(file_format, tmp_path): - """Test round-trip read/write of data-driven storm metadata.""" + """Test round-trip read/write of data-driven storm metadata. + + Exercises: + - ``ascii`` : OWI/NWS12 ASCII pair (two PRE + WIN files) + - ``netcdf`` : generic NetCDF format with existing helper + - ``netcdf_era5``: ERA5-style CF NetCDF (valid_time/latitude/longitude, + u10/v10/msl) — verifies that write_data discovers and + records ERA5 variable and dimension names correctly + - ``netcdf_nws13``: NWS13-style OWI NetCDF (time/lat/lon, uwnd/vwnd/press, + pressure in mb) — verifies user-mapping path for + non-default variable names + """ storm_path = tmp_path / "test.storm" data_storm = storm.Storm() data_storm.time_offset = np.datetime64("2012-08-29") - data_storm.file_format = file_format data_storm.window_type = 1 data_storm.ramp_width = 3 if file_format == "ascii": + data_storm.file_format = "ascii" data_storm.file_paths = [ Path("storm_1.PRE"), Path("storm_1.WIN"), @@ -186,7 +411,9 @@ def test_data_storm_roundtrip(file_format, tmp_path): ] data_storm.write(storm_path, file_format="data") read_storm = storm.Storm(storm_path, file_format="data") - else: + + elif file_format == "netcdf": + data_storm.file_format = "netcdf" data_storm.file_paths = [tmp_path / "storm.nc"] create_netcdf_storm_file(data_storm.file_paths[0]) data_storm.write( @@ -196,10 +423,67 @@ def test_data_storm_roundtrip(file_format, tmp_path): ) read_storm = storm.Storm(storm_path, file_format="data") + elif file_format == "netcdf_era5": + # ERA5-style: dims (valid_time, latitude, longitude), + # vars (u10, v10, msl). write_data uses MetInterrogator, which + # discovers "valid_time" via CF axis='T' automatically; dim_mapping + # is accepted for backwards compatibility but not required. + pytest.importorskip("netCDF4") + data_storm.file_format = "netcdf" + data_storm.file_paths = [tmp_path / "era5.nc"] + create_era5_storm_file(data_storm.file_paths[0]) + data_storm.write( + storm_path, + file_format="data", + dim_mapping={"t": "valid_time"}, + ) + read_storm = storm.Storm(storm_path, file_format="data") + + # Verify the descriptor body contains the correct coordinate and + # variable names in the new &file_info / &variable_info format. + desc_text = storm_path.read_text() + assert "lon_name = longitude" in desc_text + assert "lat_name = latitude" in desc_text + assert "time_name = valid_time" in desc_text + assert "var_name=u10" in desc_text + assert "var_name=v10" in desc_text + assert "var_name=msl" in desc_text + + elif file_format == "netcdf_nws13": + # NWS13-style: dims (time, lat, lon), vars (uwnd, vwnd, press, mb). + # write_data uses MetInterrogator; variable names require an explicit + # var_mapping because "uwnd"/"vwnd"/"press" are not in the default + # fallback lists used by get_netcdf_names. + pytest.importorskip("netCDF4") + data_storm.file_format = "nws13" + data_storm.file_paths = [tmp_path / "nws13.nc"] + create_nws13_storm_file(data_storm.file_paths[0]) + data_storm.write( + storm_path, + file_format="data", + var_mapping={ + "wind_u": "uwnd", + "wind_v": "vwnd", + "pressure": "press", + }, + ) + read_storm = storm.Storm(storm_path, file_format="data") + + # Verify the descriptor body contains the correct coordinate and + # variable names in the new &file_info / &variable_info format. + desc_text = storm_path.read_text() + assert "lon_name = lon" in desc_text + assert "lat_name = lat" in desc_text + assert "time_name = time" in desc_text + assert "var_name=uwnd" in desc_text + assert "var_name=vwnd" in desc_text + assert "var_name=press" in desc_text + assert data_storm.time_offset == read_storm.time_offset assert data_storm.file_format in DATA_FILE_FORMAT_MAP[read_storm.file_format] assert data_storm.window_type == read_storm.window_type assert data_storm.ramp_width == read_storm.ramp_width + assert data_storm.storm_time_scale == read_storm.storm_time_scale assert len(data_storm.file_paths) == len(read_storm.file_paths) for i, path in enumerate(data_storm.file_paths): assert read_storm.file_paths[i] == path @@ -227,6 +511,133 @@ def test_netcdf_var_mapping(tmp_path): assert var_mapping == {"wind_u": "u", "wind_v": "v", "pressure": "pressure"} +@pytest.mark.python +def test_netcdf_var_mapping_era5(tmp_path): + """Test ERA5-style CF NetCDF dimension and variable name discovery. + + ERA5 is a common source of met forcing for GeoClaw storm surge simulations. + This test verifies that ``util.get_netcdf_names`` correctly maps ERA5 + dimension names (``valid_time``, ``latitude``, ``longitude``) and variable + names (``u10``, ``v10``, ``msl``) to the GeoClaw roles ``t``/``x``/``y`` + and ``wind_u``/``wind_v``/``pressure``. + + ``valid_time`` requires an explicit ``user_mapping`` because it is not in + the default "t" fallback list ``["t", "time"]`` used by + ``util.get_netcdf_names``. ``u10``, ``v10``, and ``msl`` are resolved + automatically from the default variable fallback lists. + + TODO: ``util.get_netcdf_names`` and + ``netcdf_utils.NetCDFInterrogator._find_coord_name`` both implement + dimension/variable discovery (the latter is CF-first via axis/standard_name + attributes). These parallel implementations should eventually be unified; + they are kept separate for now and flagged here for future consolidation. + """ + pytest.importorskip("netCDF4") + + nc_path = tmp_path / "era5.nc" + create_era5_storm_file(nc_path) + + # "valid_time" is not in the default "t" fallback list; must be specified. + dim_mapping = util.get_netcdf_names( + nc_path, + lookup_type="dim", + user_mapping={"t": "valid_time"}, + ) + # u10, v10, msl are in the default variable fallback lists. + var_mapping = util.get_netcdf_names( + nc_path, + lookup_type="var", + ) + + assert dim_mapping["x"] == "longitude" + assert dim_mapping["y"] == "latitude" + assert dim_mapping["t"] == "valid_time" + assert var_mapping["wind_u"] == "u10" + assert var_mapping["wind_v"] == "v10" + assert var_mapping["pressure"] == "msl" + + +@pytest.mark.python +def test_netcdf_var_mapping_nws13(tmp_path): + """Test NWS13/OWI-NetCDF dimension and variable name discovery. + + NWS13 is the OWI NetCDF met-forcing format used by ADCIRC and GeoClaw. + This test verifies that ``util.get_netcdf_names`` correctly maps NWS13 + dimension names (``lon``, ``lat``, ``time``) and variable names + (``uwnd``, ``vwnd``, ``press``) when an explicit ``user_mapping`` is + provided for the variable names. + + ``lon``, ``lat``, and ``time`` are resolved automatically from the default + dimension fallback lists. ``uwnd``, ``vwnd``, and ``press`` are NOT in + the default variable fallback lists and require an explicit mapping. + + TODO: ``util.get_netcdf_names`` and + ``netcdf_utils.MetInterrogator`` (variable unit / role discovery) both + implement variable-name lookup. These parallel implementations should + eventually be unified; they are kept separate for now and flagged here for + future consolidation. + """ + pytest.importorskip("netCDF4") + + nc_path = tmp_path / "nws13.nc" + create_nws13_storm_file(nc_path) + + # lon, lat, time are auto-discovered from the default fallback lists. + dim_mapping = util.get_netcdf_names( + nc_path, + lookup_type="dim", + ) + # uwnd, vwnd, press require an explicit user_mapping. + var_mapping = util.get_netcdf_names( + nc_path, + lookup_type="var", + user_mapping={ + "wind_u": "uwnd", + "wind_v": "vwnd", + "pressure": "press", + }, + ) + + assert dim_mapping["x"] == "lon" + assert dim_mapping["y"] == "lat" + assert dim_mapping["t"] == "time" + assert var_mapping["wind_u"] == "uwnd" + assert var_mapping["wind_v"] == "vwnd" + assert var_mapping["pressure"] == "press" + + +@pytest.mark.python +@pytest.mark.skip( + reason=( + "WRF string-time axis and curvilinear grid require MetPreprocessor, " + "not yet implemented" + ) +) +def test_netcdf_wrf_stub(tmp_path): + """Stub for future WRF NetCDF met-forcing test (currently skipped). + + WRF output files differ from ERA5 and NWS13 in two ways that require + pre-processing before the GeoClaw Fortran reader can consume them: + + 1. **String time axis**: WRF stores time as ``char Times(Time, DateStrLen)`` + (ISO 8601 strings) rather than a numeric variable. GeoClaw's Fortran + reader expects integer seconds; a ``MetPreprocessor`` step is needed to + decode and convert the time axis. + + 2. **Curvilinear grid**: WRF uses a map-projected (Lambert conformal, + polar stereographic, or Mercator) curvilinear grid described by 2-D + ``XLAT``/``XLONG`` arrays rather than 1-D coordinate variables. + GeoClaw's reader expects regular (lon, lat) grids; regridding to a + regular lat/lon grid is required before use. + + Once a ``MetPreprocessor`` class (or equivalent) is implemented to handle + these two cases, this test slot should be filled in with a synthetic WRF + file (minimal curvilinear grid, string time) and an end-to-end round-trip + assertion. + """ + pass # placeholder — see docstring for implementation notes + + @pytest.mark.python @pytest.mark.parametrize( "speeds_knots, expected_categories", diff --git a/tests/test_storm_time_scale.py b/tests/test_storm_time_scale.py new file mode 100644 index 000000000..7d88fbbfe --- /dev/null +++ b/tests/test_storm_time_scale.py @@ -0,0 +1,85 @@ +""" +Self-contained test for the storm_time_scale time-axis arithmetic. + +No GeoClaw runtime or test framework is required — only numpy and assert. +Mirrors the Fortran arithmetic in data_storm_module.f90 case(2): + + t_ref = t[0] + t_scaled[i] = t_ref + round(scale * (t[i] - t_ref)) +""" + +import numpy as np + + +def apply_time_scale(t_raw, scale): + """Apply storm_time_scale to a raw time array. + + Parameters + ---------- + t_raw : array-like of float + Times in any consistent unit (e.g. seconds since epoch). + scale : float + storm_time_scale value. + + Returns + ------- + numpy.ndarray of int + Scaled integer times matching Fortran nint() rounding. + """ + t = np.asarray(t_raw, dtype=np.float64) + t_ref = t[0] + return np.rint(t_ref + scale * (t - t_ref)).astype(np.int64) + + +def _hourly_times(n=10): + """Return n hourly timestamps as float64 seconds since Unix epoch.""" + epoch = np.datetime64("1970-01-01T00:00:00", "s") + start = np.datetime64("2012-08-28T00:00:00", "s") + t0 = float((start - epoch) / np.timedelta64(1, "s")) + return np.array([t0 + i * 3600.0 for i in range(n)]) + + +def test_scale_one_is_noop(): + t = _hourly_times(10) + scaled = apply_time_scale(t, 1.0) + expected = np.rint(t).astype(np.int64) + assert np.array_equal(scaled, expected), f"scale=1.0 changed values: {scaled - expected}" + + +def test_scale_two_doubles_duration(): + t = _hourly_times(10) + scaled = apply_time_scale(t, 2.0) + # Duration from first to last should be doubled + orig_duration = t[-1] - t[0] + scaled_duration = scaled[-1] - scaled[0] + assert abs(scaled_duration - 2.0 * orig_duration) < 1, ( + f"Expected duration {2*orig_duration}, got {scaled_duration}" + ) + + +def test_anchor_invariant(): + t = _hourly_times(10) + t_ref = np.rint(t[0]).astype(np.int64) + for scale in [0.5, 1.0, 2.0, 3.7]: + scaled = apply_time_scale(t, scale) + assert scaled[0] == t_ref, ( + f"scale={scale}: anchor changed from {t_ref} to {scaled[0]}" + ) + + +def test_scale_half_compresses_duration(): + t = _hourly_times(10) + scaled = apply_time_scale(t, 0.5) + orig_duration = t[-1] - t[0] + scaled_duration = scaled[-1] - scaled[0] + assert abs(scaled_duration - 0.5 * orig_duration) < 1, ( + f"Expected duration {0.5*orig_duration}, got {scaled_duration}" + ) + + +if __name__ == "__main__": + test_scale_one_is_noop() + test_scale_two_doubles_duration() + test_anchor_invariant() + test_scale_half_compresses_duration() + print("All storm_time_scale arithmetic tests passed.")