From 6eb0370adf0f1a89dc089e3f621aa79be227b91d Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Thu, 9 Apr 2026 20:40:40 -0400 Subject: [PATCH 01/24] Initial support for new NetCDF capabilities Includes better support for discovery of variables, coordinate mapping, cropping of data files, and more robust reading overall. --- src/2d/shallow/topo_module.f90 | 857 +++++++++++++---------- src/python/geoclaw/data.py | 11 +- src/python/geoclaw/meson.build | 1 + src/python/geoclaw/netcdf_utils.py | 1030 ++++++++++++++++++++++++++++ src/python/geoclaw/units.py | 25 + 5 files changed, 1573 insertions(+), 351 deletions(-) create mode 100644 src/python/geoclaw/netcdf_utils.py diff --git a/src/2d/shallow/topo_module.f90 b/src/2d/shallow/topo_module.f90 index 808c8f07a..315fb5f8c 100644 --- a/src/2d/shallow/topo_module.f90 +++ b/src/2d/shallow/topo_module.f90 @@ -39,6 +39,20 @@ module topo_module ! NetCDF4 support real(kind=4), parameter :: CONVENTION_REQUIREMENT = 1.0 + ! NetCDF descriptor fields — one slot per topo file, populated from the + ! key=value block in topo.data by read_netcdf_descriptor (type 4 only). + character(len=64), allocatable :: nc_var_name(:) ! topo variable name + character(len=64), allocatable :: nc_lon_name(:) ! longitude coord name + character(len=64), allocatable :: nc_lat_name(:) ! latitude coord name + character(len=32), allocatable :: nc_lat_order(:) ! 'N_to_S' or 'S_to_N' + character(len=32), allocatable :: nc_dim_order(:) ! e.g. 'lat,lon' + character(len=8), allocatable :: nc_fill_action(:) ! 'abort' or 'warn' + integer, allocatable :: nc_lon_convention(:) ! 180 or 360 + real(kind=8), allocatable :: nc_fill_value(:) ! fill/nodata sentinel + real(kind=8), allocatable :: nc_crop_bounds(:,:) ! (4, n): lon0,lon1,lat0,lat1 + logical, allocatable :: nc_has_fill(:) ! fill_value was specified + logical, allocatable :: nc_has_crop(:) ! crop_bounds was specified + ! dtopo variables ! Work array real(kind=8), allocatable :: dtopowork(:) @@ -153,12 +167,39 @@ subroutine read_topo_settings(restart,file_name) allocate(topoID(mtopofiles),topotime(mtopofiles),topo0save(mtopofiles)) allocate(i0topo0(mtopofiles),topo0ID(mtopofiles)) allocate(areatopo(mtopofiles)) + + ! NetCDF descriptor arrays — allocated for all files; only + ! filled in for type 4 entries by read_netcdf_descriptor. + allocate(nc_var_name(mtopofiles), nc_lon_name(mtopofiles)) + allocate(nc_lat_name(mtopofiles), nc_lat_order(mtopofiles)) + allocate(nc_dim_order(mtopofiles), nc_fill_action(mtopofiles)) + allocate(nc_lon_convention(mtopofiles), nc_fill_value(mtopofiles)) + allocate(nc_crop_bounds(4, mtopofiles)) + allocate(nc_has_fill(mtopofiles), nc_has_crop(mtopofiles)) + nc_var_name = '' + nc_lon_name = '' + nc_lat_name = '' + nc_lat_order = 'S_to_N' + nc_dim_order = 'lat,lon' + nc_fill_action = 'abort' + nc_lon_convention = 180 + nc_fill_value = 0.0d0 + nc_crop_bounds = 0.0d0 + nc_has_fill = .false. + nc_has_crop = .false. + mtopofiles = mtopofiles - num_dtopo ! decrement after allocates do i=1,mtopofiles read(iunit,*) topofname(i) read(iunit,*) itopotype(i) + ! For NetCDF files, read the optional key=value descriptor + ! block that follows the topo_type line. + if (abs(itopotype(i)) == 4) then + call read_netcdf_descriptor(iunit, i) + end if + write(GEO_PARM_UNIT,*) ' ' write(GEO_PARM_UNIT,*) ' ',topofname(i) write(GEO_PARM_UNIT,*) ' itopotype = ', itopotype(i) @@ -168,7 +209,7 @@ subroutine read_topo_settings(restart,file_name) endif call read_topo_header(topofname(i),itopotype(i),mxtopo(i), & mytopo(i),xlowtopo(i),ylowtopo(i),xhitopo(i),yhitopo(i), & - dxtopo(i),dytopo(i)) + dxtopo(i),dytopo(i), i) topoID(i) = i mtopo(i) = int(mxtopo(i), 8) * int(mytopo(i), 8) areatopo(i) = dxtopo(i)*dytopo(i) @@ -212,7 +253,7 @@ subroutine read_topo_settings(restart,file_name) topoID(i) = i topotime(i) = -huge(1.0) call read_topo_file(mxtopo(i),mytopo(i),itopotype(i),topofname(i), & - xlowtopo(i),ylowtopo(i),topowork(i0topo(i):i0topo(i)+mtopo(i)-1)) + xlowtopo(i),ylowtopo(i),topowork(i0topo(i):i0topo(i)+mtopo(i)-1), i) ! set topo0save(i) = 1 if this topo file intersects any ! dtopo file. This approach to setting topo0save is changed from ! v5.4.1, where it only checked if some dtopo point lies within the @@ -486,7 +527,7 @@ end subroutine set_topo_for_dtopo ! Read topo file. ! ======================================================================== - subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) + subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) #ifdef NETCDF use netcdf @@ -502,6 +543,7 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) character(len=512), intent(in) :: fname real(kind=8), intent(inout) :: topo(1:int(mx, 8)*int(my, 8)) real(kind=8), intent(in) :: xll,yll + integer, intent(in), optional :: topo_idx ! Locals integer, parameter :: iunit = 19, miss_unit = 17 @@ -515,7 +557,6 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) ! NetCDF Support character(len=64) :: direction, x_dim_name, x_var_name, y_dim_name, & y_var_name, z_var_name, var_name - ! character(len=1) :: axis_string real(kind=8), allocatable :: nc_buffer(:, :), xlocs(:), ylocs(:) integer(kind=4) :: x_var_id, y_var_id, z_var_id, x_dim_id, y_dim_id integer(kind=4) :: xstart(1), ystart(1), mx_tot, my_tot @@ -625,108 +666,136 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo) ! NetCDF case(4) #ifdef NETCDF - ! Open file + ! Open file call check_netcdf_error(nf90_open(fname, nf90_nowrite, nc_file)) - - ! Get number of dimensions and vars - call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, & - num_vars)) - - ! get x and y dimension info - call get_dim_info(nc_file, num_dims_tot, x_dim_id, x_dim_name, & - mx_tot, y_dim_id, y_dim_name, my_tot) - - allocate(xlocs(mx_tot),ylocs(my_tot)) - - call check_netcdf_error(nf90_get_var(nc_file, x_dim_id, xlocs, start=(/ 1 /), count=(/ mx_tot /))) - call check_netcdf_error(nf90_get_var(nc_file, y_dim_id, ylocs, start=(/ 1 /), count=(/ my_tot /))) - xstart = minloc(xlocs, mask=(xlocs.eq.xll)) - ystart = minloc(ylocs, mask=(ylocs.eq.yll)) - deallocate(xlocs,ylocs) - - z_var_id = -1 - do n=1, num_vars - call check_netcdf_error(nf90_inquire_variable(nc_file, n, var_name, var_type, num_dims, dim_ids)) - - ! Assume dim names are same as var ids - if (var_name == x_dim_name) then - x_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, x_var_name, x_var_id)) - else if (var_name == y_dim_name) then - y_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, y_var_name, y_var_id)) - else if (num_dims == 2) then - z_var_name = var_name - z_dim_ids = dim_ids - call check_netcdf_error(nf90_inq_varid(nc_file, z_var_name, z_var_id)) + + call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, num_vars)) + + ! ---------------------------------------------------------------- + ! Dimension discovery: descriptor or fallback heuristic. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. & + len_trim(nc_lon_name(topo_idx)) > 0) then + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lon_name(topo_idx)), x_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + x_dim_id, x_dim_name, mx_tot)) + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lat_name(topo_idx)), y_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + y_dim_id, y_dim_name, my_tot)) + else + call get_dim_info(nc_file, num_dims_tot, x_dim_id, & + x_dim_name, mx_tot, y_dim_id, y_dim_name, my_tot) + end if + + allocate(xlocs(mx_tot), ylocs(my_tot)) + call check_netcdf_error(nf90_get_var(nc_file, x_dim_id, xlocs, & + start=(/ 1 /), count=(/ mx_tot /))) + call check_netcdf_error(nf90_get_var(nc_file, y_dim_id, ylocs, & + start=(/ 1 /), count=(/ my_tot /))) + + ! Apply lon convention shift so xstart matches the shifted xll + ! computed in read_topo_header. + if (present(topo_idx)) then + if (nc_lon_convention(topo_idx) == 360) then + where (xlocs > 180.0d0) xlocs = xlocs - 360.0d0 end if + end if - end do - if (z_var_id == -1) then - stop "Unable to find topography data!" + xstart = minloc(xlocs, mask=(xlocs == xll)) + ystart = minloc(ylocs, mask=(ylocs == yll)) + deallocate(xlocs, ylocs) + + ! ---------------------------------------------------------------- + ! Find the topo data variable: descriptor or first 2-D variable. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. & + len_trim(nc_var_name(topo_idx)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_file, & + trim(nc_var_name(topo_idx)), z_var_id)) + call check_netcdf_error(nf90_inquire_variable(nc_file, & + z_var_id, var_name, var_type, num_dims, z_dim_ids)) + else + z_var_id = -1 + do n = 1, num_vars + call check_netcdf_error(nf90_inquire_variable(nc_file, & + n, var_name, var_type, num_dims, dim_ids)) + if (var_name /= x_dim_name .and. & + var_name /= y_dim_name .and. num_dims == 2) then + z_var_id = n + z_dim_ids = dim_ids + exit + end if + end do + if (z_var_id == -1) then + print *, "ERROR: Unable to find topo variable in ", trim(fname) + stop + end if end if - ! Load in data - ! TODO: Provide striding into data if need be - - ! only try to access data if theres overlap with the domain + ! ---------------------------------------------------------------- + ! Load data into topo array (only if region overlaps domain). + ! ---------------------------------------------------------------- if ((mx > 0) .and. (my > 0)) then if (z_dim_ids(1) == x_dim_id) then allocate(nc_buffer(mx, my)) else if (z_dim_ids(1) == y_dim_id) then allocate(nc_buffer(my, mx)) - else - stop " NetCDF z variable has dimensions that don't align with x and y" + else + print *, "ERROR: NetCDF z variable dimensions do not align with x/y" + stop end if - call check_netcdf_error(nf90_get_var(nc_file, z_var_id, nc_buffer, & - start = (/ xstart(1), ystart(1) /), & - count = (/ mx, my /))) + call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & + nc_buffer, start=(/ xstart(1), ystart(1) /), & + count=(/ mx, my /))) - ! check for lon/lat ordering of z variable + ! Transpose into GeoClaw topo array (N-to-S row order). if (z_dim_ids(1) == x_dim_id) then do j = 0, int(my, 8) - 1 - topo(j * int(mx, 8) + 1:(j + 1) * int(mx, 8)) = nc_buffer(:, my - j) + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(:, my - j) end do - else if (z_dim_ids(1) == y_dim_id) then + else do j = 0, int(my, 8) - 1 - topo(j * int(mx, 8) + 1:(j + 1) * int(mx, 8)) = nc_buffer(my - j, :) + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(my - j, :) end do end if deallocate(nc_buffer) - ! do j=1, my - ! i = i - 1 - ! topo((i - 1) * mx + 1:) - ! print *, mx, my - ! do j=1, my - ! row_index = row_index - 1 - ! print *, row_index - ! print *, (row_index-1)*mx + 1, (row_index-1)*mx + mx - ! ! call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & - ! ! topo)) - ! ! call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & - ! ! topo((row_index-1)*mx + 1:(row_index-1)*mx + mx), & - ! ! start = (/ 1, j /), count=(/ mx, 1 /))) - ! call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & - ! nc_buffer, start = (/ 1, j /), count=(/ mx, 1 /))) - ! topo((row_index - 1) * mx + 1:(row_index - 1) * mx + mx) = & - ! nc_buffer - ! end do - - ! Check if the topography was defined positive down and flip the - ! sign if need be. Just in case this is true but topo_type < 0 - ! we do not do anything here on this to avoid doing it twice. + ! ---------------------------------------------------------------- + ! Fill-value check (descriptor only). + ! Python has already verified no fill values in the crop region, + ! but we keep this as a safety net for edge-case mismatches. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. nc_has_fill(topo_idx)) then + do i = 1, mtot + if (abs(topo(i) - nc_fill_value(topo_idx)) < & + 1.0d-6 * max(abs(nc_fill_value(topo_idx)), 1.0d0)) then + if (trim(nc_fill_action(topo_idx)) == 'abort') then + print *, "ERROR: Fill value found in topo file ", & + trim(fname), " at index ", i + print *, " fill_value = ", nc_fill_value(topo_idx) + print *, " Set fill_action = warn to replace with topo_missing." + stop + else + topo(i) = topo_missing + end if + end if + end do + end if + + ! Honour 'positive = down' attribute (flip sign for depth-positive files). + ! Only flip when topo_type > 0 to avoid double-flip for negative types. ios = nf90_get_att(nc_file, z_var_id, 'positive', direction) - call check_netcdf_error(nf90_close(nc_file)) if (ios == NF90_NOERR) then - if (to_lower(direction) == "down") then - if (topo_type < 0) then - topo = -topo - endif + if (to_lower(direction) == "down" .and. topo_type > 0) then + topo(1:mtot) = -topo(1:mtot) end if end if end if + + call check_netcdf_error(nf90_close(nc_file)) #else print *, "ERROR: NetCDF library was not included in this build" print *, " of GeoClaw." @@ -783,7 +852,7 @@ end subroutine read_topo_file ! - xll,yll,xhi,yhi - (float) Lower and upper coordinates for grid ! - dx,dy - (float) Spatial resolution of grid ! ======================================================================== - subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) + subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy,topo_idx) #ifdef NETCDF use netcdf #endif @@ -798,6 +867,7 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) integer, intent(in) :: topo_type integer, intent(out) :: mx, my real(kind=8), intent(out) :: xll, yll, xhi, yhi, dx, dy + integer, intent(in), optional :: topo_idx ! Local integer, parameter :: iunit = 19 @@ -810,9 +880,6 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) logical :: xll_registered, yll_registered ! NetCDF Support - ! character(len=1) :: axis_string - ! character(len=6) :: convention_string - ! integer(kind=4) :: convention_version integer(kind=4) :: nc_file real(kind=8), allocatable :: xlocs(:),ylocs(:) logical, allocatable :: x_in_dom(:),y_in_dom(:) @@ -928,116 +995,113 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy) case(4) #ifdef NETCDF - ! Open file + ! Open file call check_netcdf_error(nf90_open(fname, nf90_nowrite, nc_file)) - ! NetCDF4 GEBCO topography, should conform to CF metadata - ! standard - ! ios = nf90_get_att(nc_file, NF90_GLOBAL, 'Conventions', convention_string) - ! if (verbose) then - ! print *, convention_string - ! end if - ! if (ios /= NF90_NOERR .or. convention_string(1:3) /= "CF-") then - ! print *, "Topography file does not conform to the CF" - ! print *, "conventions and meta-data. Please see the" - ! print *, "information at " - ! print *, "" - ! print *, " cfconventions.org" - ! print *, "" - ! print *, "to find out more info." - ! stop - ! end if - - ! call parse_values(convention_string(4:6), num_values, convention_version) - ! if (convention_version(1) < CONVENTION_REQUIREMENT) then - ! print *, "Topography file conforms to a CF version that" - ! print *, "is too old (", convention_version, "). " - ! print *, "Please refer to" - ! print *, "" - ! print *, " cfconventions.org" - ! print *, "" - ! print *, "to find out more info." - ! stop - ! end if - - ! get x and y dimension info - call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, & - num_vars)) - call get_dim_info(nc_file, num_dims_tot, x_dim_id, x_dim_name, & - mx, y_dim_id, y_dim_name, my) - - ! allocate vector to hold lon and lat vals and vector for var ids - allocate(xlocs(mx),ylocs(my),x_in_dom(mx),y_in_dom(my),var_ids(num_vars)) - - if (verbose) then - print *, "Names = (", x_dim_name, ", ", y_dim_name, ")" - print *, "Size = (", mx, ", ", my, ")" - end if - - ! Read in variables - call check_netcdf_error(nf90_inq_varids(nc_file, num_vars, var_ids)) - if (verbose) then - print *, "n, var_name, var_type, num_dims, dim_ids" + ! ---------------------------------------------------------------- + ! Discover spatial dimensions. + ! When a descriptor was provided (topo_idx present, lon_name set) + ! look up dimension IDs by name directly. Otherwise fall back to + ! the name-heuristic in get_dim_info. + ! ---------------------------------------------------------------- + call check_netcdf_error(nf90_inquire(nc_file, num_dims_tot, num_vars)) + + if (present(topo_idx) .and. & + len_trim(nc_lon_name(topo_idx)) > 0) then + ! Descriptor path: exact name look-up + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lon_name(topo_idx)), x_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + x_dim_id, x_dim_name, mx)) + call check_netcdf_error(nf90_inq_dimid(nc_file, & + trim(nc_lat_name(topo_idx)), y_dim_id)) + call check_netcdf_error(nf90_inquire_dimension(nc_file, & + y_dim_id, y_dim_name, my)) + else + ! Fallback: auto-discover by name matching + call get_dim_info(nc_file, num_dims_tot, x_dim_id, & + x_dim_name, mx, y_dim_id, y_dim_name, my) end if - do n=1, num_vars - call check_netcdf_error(nf90_inquire_variable(nc_file, n, var_name, var_type, num_dims, dim_ids)) - if (verbose) then - print *, n, ": ", var_name, "|", var_type, "|", num_dims, "|", dim_ids - end if - - ! Assume dim names are same as var ids - if (var_name == x_dim_name) then - x_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, x_var_name, x_var_id)) - ! x_var_id = n - ! x_var_name = var_name - else if (var_name == y_dim_name) then - y_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, y_var_name, y_var_id)) - ! y_var_id = n - ! y_var_name = var_name - ! Assume that this is the topography data - else if (num_dims == 2) then - z_var_name = var_name - call check_netcdf_error(nf90_inq_varid(nc_file, z_var_name, z_var_id)) - ! z_var_id = n - ! z_var_name = var_name - else - if (verbose) then - print *, "Not using var_id ", n + allocate(xlocs(mx), ylocs(my), x_in_dom(mx), y_in_dom(my)) + + ! ---------------------------------------------------------------- + ! Look up coordinate variable IDs (CF convention: var name == dim name). + ! ---------------------------------------------------------------- + call check_netcdf_error(nf90_inq_varid(nc_file, trim(x_dim_name), x_var_id)) + call check_netcdf_error(nf90_inq_varid(nc_file, trim(y_dim_name), y_var_id)) + + ! ---------------------------------------------------------------- + ! Find the topo data variable. + ! Descriptor path: look up by name. Fallback: first 2-D variable + ! that is not a coordinate variable. + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. & + len_trim(nc_var_name(topo_idx)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_file, & + trim(nc_var_name(topo_idx)), z_var_id)) + else + z_var_id = -1 + do n = 1, num_vars + call check_netcdf_error(nf90_inquire_variable(nc_file, & + n, var_name, var_type, num_dims, dim_ids)) + if (var_name /= x_dim_name .and. & + var_name /= y_dim_name .and. num_dims == 2) then + z_var_id = n + exit end if + end do + if (z_var_id == -1) then + print *, "ERROR: Cannot find a 2-D topo variable in ", trim(fname) + print *, " Specify var_name in the topo descriptor block." + stop end if + end if - end do - - if (verbose) then - print *, "x_var_name, x_var_id = ", x_var_name, x_var_id - print *, "y_var_name, y_var_id = ", y_var_name, y_var_id - print *, "z_var_name, z_var_id = ", z_var_name, z_var_id + ! ---------------------------------------------------------------- + ! Read coordinate arrays. + ! ---------------------------------------------------------------- + call check_netcdf_error(nf90_get_var(nc_file, x_var_id, xlocs, & + start=(/ 1 /), count=(/ mx /))) + call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, & + start=(/ 1 /), count=(/ my /))) + + ! Shift 0-360 longitudes to -180/180 if descriptor says so. + if (present(topo_idx)) then + if (nc_lon_convention(topo_idx) == 360) then + where (xlocs > 180.0d0) xlocs = xlocs - 360.0d0 + end if end if - call check_netcdf_error(nf90_get_var(nc_file, x_var_id, xlocs, start=(/ 1 /), count=(/ mx /))) - call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, start=(/ 1 /), count=(/ my /))) - dx = xlocs(2) - xlocs(1) dy = ylocs(2) - ylocs(1) - - ! find which locs are within domain (with a dx/dy buffer around domain + ghost cells) - x_in_dom = (xlocs.gt.(xlower-dx-hxposs(1)*nghost)) .and. (xlocs.lt.(xupper+dx+hxposs(1)*nghost)) - y_in_dom = (ylocs.gt.(ylower-dy-hyposs(1)*nghost)) .and. (ylocs.lt.(yupper+dy+hyposs(1)*nghost)) - + + ! ---------------------------------------------------------------- + ! Select the subset of the file to load. + ! Descriptor path: use explicit crop_bounds. + ! Fallback: clip to the AMR domain (with ghost-cell buffer). + ! ---------------------------------------------------------------- + if (present(topo_idx) .and. nc_has_crop(topo_idx)) then + x_in_dom = (xlocs >= nc_crop_bounds(1, topo_idx)) .and. & + (xlocs <= nc_crop_bounds(2, topo_idx)) + y_in_dom = (ylocs >= nc_crop_bounds(3, topo_idx)) .and. & + (ylocs <= nc_crop_bounds(4, topo_idx)) + else + x_in_dom = (xlocs > (xlower - dx - hxposs(1)*nghost)) .and. & + (xlocs < (xupper + dx + hxposs(1)*nghost)) + y_in_dom = (ylocs > (ylower - dy - hyposs(1)*nghost)) .and. & + (ylocs < (yupper + dy + hyposs(1)*nghost)) + end if + xll = minval(xlocs, mask=x_in_dom) yll = minval(ylocs, mask=y_in_dom) xhi = maxval(xlocs, mask=x_in_dom) yhi = maxval(ylocs, mask=y_in_dom) - - ! adjust mx, my - mx = count(x_in_dom) - my = count(y_in_dom) + mx = count(x_in_dom) + my = count(y_in_dom) call check_netcdf_error(nf90_close(nc_file)) - deallocate(xlocs,ylocs,x_in_dom,y_in_dom) + deallocate(xlocs, ylocs, x_in_dom, y_in_dom) #else print *, "ERROR: NetCDF library was not included in this build" print *, " of GeoClaw." @@ -1383,207 +1447,204 @@ subroutine read_dtopo_header(fname,topo_type,mx,my,mt,xlow,ylow,t0,xhi, & end subroutine read_dtopo_header - - -recursive subroutine topoarea(x1,x2,y1,y2,m,area) + recursive subroutine topoarea(x1,x2,y1,y2,m,area) - ! Compute the area of overlap of topo with the rectangle (x1,x2) x (y1,y2) - ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) - ! (coarse to fine). + ! Compute the area of overlap of topo with the rectangle (x1,x2) x (y1,y2) + ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) + ! (coarse to fine). - ! The main call to this subroutine has corners of a physical domain for - ! the rectangle and m = 1 in order to compute the area of overlap of - ! domain by all topo arrays. Used to check inputs and insure topo - ! covers domain. + ! The main call to this subroutine has corners of a physical domain for + ! the rectangle and m = 1 in order to compute the area of overlap of + ! domain by all topo arrays. Used to check inputs and insure topo + ! covers domain. - ! The recursive strategy is to first compute the area using only topo - ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then apply corrections due to adding topo array mtopoorder(m). - - ! Corrections are needed if the new topo array intersects the grid cell. - ! Let the intersection be (x1m,x2m) x (y1m,y2m). - ! Two corrections are needed, first to subtract out the area over - ! the rectangle (x1m,x2m) x (y1m,y2m) computed using - ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then adding in the area over this same region using - ! topo array mtopoorder(m). + ! The recursive strategy is to first compute the area using only topo + ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then apply corrections due to adding topo array mtopoorder(m). + + ! Corrections are needed if the new topo array intersects the grid cell. + ! Let the intersection be (x1m,x2m) x (y1m,y2m). + ! Two corrections are needed, first to subtract out the area over + ! the rectangle (x1m,x2m) x (y1m,y2m) computed using + ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then adding in the area over this same region using + ! topo array mtopoorder(m). - ! Based on the recursive routine rectintegral that integrates - ! topo over grid cells using a similar strategy. + ! Based on the recursive routine rectintegral that integrates + ! topo over grid cells using a similar strategy. - implicit none + implicit none - ! arguments - real (kind=8), intent(in) :: x1,x2,y1,y2 - integer, intent(in) :: m - real (kind=8), intent(out) :: area + ! arguments + real (kind=8), intent(in) :: x1,x2,y1,y2 + integer, intent(in) :: m + real (kind=8), intent(out) :: area - ! local - real(kind=8) :: xmlo,xmhi,ymlo,ymhi,x1m,x2m, & - y1m,y2m, area1,area2,area_m - integer :: mfid, indicator - integer(kind=8) :: i0 - real(kind=8), external :: topointegral + ! local + real(kind=8) :: xmlo,xmhi,ymlo,ymhi,x1m,x2m, & + y1m,y2m, area1,area2,area_m + integer :: mfid, indicator + integer(kind=8) :: i0 + real(kind=8), external :: topointegral - mfid = mtopoorder(m) - i0=i0topo(mfid) + mfid = mtopoorder(m) + i0=i0topo(mfid) - if (m == mtopofiles) then - ! innermost step of recursion reaches this point. - ! only using coarsest topo grid -- compute directly... - call intersection(indicator,area,xmlo,xmhi, & - ymlo,ymhi, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + if (m == mtopofiles) then + ! innermost step of recursion reaches this point. + ! only using coarsest topo grid -- compute directly... + call intersection(indicator,area,xmlo,xmhi, & + ymlo,ymhi, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) - else - ! recursive call to compute area using one fewer topo grids: - call topoarea(x1,x2,y1,y2,m+1,area1) + else + ! recursive call to compute area using one fewer topo grids: + call topoarea(x1,x2,y1,y2,m+1,area1) - ! region of intersection of cell with new topo grid: - call intersection(indicator,area_m,x1m,x2m, & - y1m,y2m, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + ! region of intersection of cell with new topo grid: + call intersection(indicator,area_m,x1m,x2m, & + y1m,y2m, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + + if (area_m > 0) then + + ! correction to subtract out from previous set of topo grids: + call topoarea(x1m,x2m,y1m,y2m,m+1,area2) - if (area_m > 0) then - - ! correction to subtract out from previous set of topo grids: - call topoarea(x1m,x2m,y1m,y2m,m+1,area2) - - ! adjust integral due to corrections for new topo grid: - area = area1 - area2 + area_m - else - area = area1 + ! adjust integral due to corrections for new topo grid: + area = area1 - area2 + area_m + else + area = area1 + endif endif - endif -end subroutine topoarea + end subroutine topoarea - -recursive subroutine rectintegral(x1,x2,y1,y2,m,integral) + recursive subroutine rectintegral(x1,x2,y1,y2,m,integral) + + ! Compute the integral of topo over the rectangle (x1,x2) x (y1,y2) + ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) + ! (coarse to fine). - ! Compute the integral of topo over the rectangle (x1,x2) x (y1,y2) - ! using topo arrays indexed mtopoorder(mtopofiles) through mtopoorder(m) - ! (coarse to fine). + ! The main call to this subroutine has corners of a grid cell for the + ! rectangle and m = 1 in order to compute the integral over the cell + ! using all topo arrays. - ! The main call to this subroutine has corners of a grid cell for the - ! rectangle and m = 1 in order to compute the integral over the cell - ! using all topo arrays. + ! The recursive strategy is to first compute the integral using only topo + ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then apply corrections due to adding topo array mtopoorder(m). + + ! Corrections are needed if the new topo array intersects the grid cell. + ! Let the intersection be (x1m,x2m) x (y1m,y2m). + ! Two corrections are needed, first to subtract out the integral over + ! the rectangle (x1m,x2m) x (y1m,y2m) computed using + ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), + ! and then adding in the integral over this same region using + ! topo array mtopoorder(m). - ! The recursive strategy is to first compute the integral using only topo - ! arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then apply corrections due to adding topo array mtopoorder(m). - - ! Corrections are needed if the new topo array intersects the grid cell. - ! Let the intersection be (x1m,x2m) x (y1m,y2m). - ! Two corrections are needed, first to subtract out the integral over - ! the rectangle (x1m,x2m) x (y1m,y2m) computed using - ! topo arrays mtopoorder(mtopofiles) to mtopoorder(m+1), - ! and then adding in the integral over this same region using - ! topo array mtopoorder(m). + ! Note that the function topointegral returns the integral over the + ! rectangle based on a single topo array, and that routine calls + ! bilinearintegral. - ! Note that the function topointegral returns the integral over the - ! rectangle based on a single topo array, and that routine calls - ! bilinearintegral. + implicit none - implicit none + ! arguments + real (kind=8), intent(in) :: x1,x2,y1,y2 + integer, intent(in) :: m + real (kind=8), intent(out) :: integral + + ! local + real(kind=8) :: xmlo,xmhi,ymlo,ymhi,area,x1m,x2m, & + y1m,y2m, int1,int2,int3 + integer :: mfid, indicator + integer(kind=8) :: i0 + real(kind=8), external :: topointegral + + + mfid = mtopoorder(m) + i0=i0topo(mfid) + + if (m == mtopofiles) then + ! innermost step of recursion reaches this point. + ! only using coarsest topo grid -- compute directly... + call intersection(indicator,area,xmlo,xmhi, & + ymlo,ymhi, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + + if (indicator.eq.1) then + ! cell overlaps the file + ! integrate surface over intersection of grid and cell + integral = topointegral( xmlo,xmhi,ymlo, & + ymhi,xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & + dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) + else + integral = 0.d0 + endif - ! arguments - real (kind=8), intent(in) :: x1,x2,y1,y2 - integer, intent(in) :: m - real (kind=8), intent(out) :: integral - - ! local - real(kind=8) :: xmlo,xmhi,ymlo,ymhi,area,x1m,x2m, & - y1m,y2m, int1,int2,int3 - integer :: mfid, indicator - integer(kind=8) :: i0 - real(kind=8), external :: topointegral - - - mfid = mtopoorder(m) - i0=i0topo(mfid) - - if (m == mtopofiles) then - ! innermost step of recursion reaches this point. - ! only using coarsest topo grid -- compute directly... - call intersection(indicator,area,xmlo,xmhi, & - ymlo,ymhi, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) - - if (indicator.eq.1) then - ! cell overlaps the file - ! integrate surface over intersection of grid and cell - integral = topointegral( xmlo,xmhi,ymlo, & - ymhi,xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & - dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) - else - integral = 0.d0 - endif - - else - ! recursive call to compute area using one fewer topo grids: - call rectintegral(x1,x2,y1,y2,m+1,int1) - - ! region of intersection of cell with new topo grid: - call intersection(indicator,area,x1m,x2m, & - y1m,y2m, x1,x2,y1,y2, & - xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + else + ! recursive call to compute area using one fewer topo grids: + call rectintegral(x1,x2,y1,y2,m+1,int1) + + ! region of intersection of cell with new topo grid: + call intersection(indicator,area,x1m,x2m, & + y1m,y2m, x1,x2,y1,y2, & + xlowtopo(mfid),xhitopo(mfid),ylowtopo(mfid),yhitopo(mfid)) + + if (area > 0) then + + ! correction to subtract out from previous set of topo grids: + call rectintegral(x1m,x2m,y1m,y2m,m+1,int2) - if (area > 0) then + ! correction to add in for new topo grid: + int3 = topointegral(x1m,x2m, y1m,y2m, & + xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & + dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) - ! correction to subtract out from previous set of topo grids: - call rectintegral(x1m,x2m,y1m,y2m,m+1,int2) - - ! correction to add in for new topo grid: - int3 = topointegral(x1m,x2m, y1m,y2m, & - xlowtopo(mfid),ylowtopo(mfid),dxtopo(mfid), & - dytopo(mfid),mxtopo(mfid),mytopo(mfid),topowork(i0),1) - - ! adjust integral due to corrections for new topo grid: - integral = int1 - int2 + int3 - else - integral = int1 + ! adjust integral due to corrections for new topo grid: + integral = int1 - int2 + int3 + else + integral = int1 + endif endif - endif -end subroutine rectintegral + end subroutine rectintegral -subroutine intersection(indicator,area,xintlo,xinthi, & - yintlo,yinthi,x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi) + subroutine intersection(indicator,area,xintlo,xinthi, & + yintlo,yinthi,x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi) - ! find the intersection of two rectangles, return the intersection - ! and it's area, and indicator =1 - ! if there is no intersection, indicator =0 + ! find the intersection of two rectangles, return the intersection + ! and it's area, and indicator =1 + ! if there is no intersection, indicator =0 - implicit none + implicit none - integer, intent(out) :: indicator + integer, intent(out) :: indicator - real(kind=8), intent(in) :: x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi - real(kind=8), intent(out) :: area,xintlo,xinthi,yintlo,yinthi + real(kind=8), intent(in) :: x1lo,x1hi,y1lo,y1hi,x2lo,x2hi,y2lo,y2hi + real(kind=8), intent(out) :: area,xintlo,xinthi,yintlo,yinthi - xintlo=dmax1(x1lo,x2lo) - xinthi=dmin1(x1hi,x2hi) - yintlo=dmax1(y1lo,y2lo) - yinthi=dmin1(y1hi,y2hi) + xintlo=dmax1(x1lo,x2lo) + xinthi=dmin1(x1hi,x2hi) + yintlo=dmax1(y1lo,y2lo) + yinthi=dmin1(y1hi,y2hi) - if (xinthi.gt.xintlo.and.yinthi.gt.yintlo) then - area = (xinthi-xintlo)*(yinthi-yintlo) - indicator = 1 - else - area = 0.d0 - indicator = 0 - endif + if (xinthi.gt.xintlo.and.yinthi.gt.yintlo) then + area = (xinthi-xintlo)*(yinthi-yintlo) + indicator = 1 + else + area = 0.d0 + indicator = 0 + endif -end subroutine intersection + end subroutine intersection ! :TODO: Use the utility module's version of these #ifdef NETCDF @@ -1602,7 +1663,103 @@ subroutine check_netcdf_error(ios) end if end subroutine check_netcdf_error +#endif + ! ======================================================================== + ! subroutine read_netcdf_descriptor(iunit, idx) + ! + ! Read the optional key=value block that follows the topo_type line in + ! topo.data for a type-4 (NetCDF) entry. Lines are consumed until either + ! a blank line or EOF is reached. Parsed values are stored in the module- + ! level nc_* arrays at position idx. + ! + ! Format (Python DescriptorWriter output): + ! var_name = z + ! lon_name = longitude + ! lat_name = latitude + ! lon_convention = 180 + ! lat_order = N_to_S + ! dim_order = lat,lon + ! fill_value = -9999.0 + ! fill_action = abort + ! crop_bounds = -180.0 180.0 -90.0 90.0 + ! + ! Unknown keys produce a warning and are skipped. + ! ======================================================================== + subroutine read_netcdf_descriptor(iunit, idx) + + use geoclaw_module, only: GEO_PARM_UNIT + + implicit none + + integer, intent(in) :: iunit, idx + + character(len=512) :: line, key, val + integer :: eq_pos, comment_pos, ios + + do + read(iunit, '(a)', iostat=ios) line + if (ios /= 0) exit ! EOF + + ! Strip inline comment (! character) + comment_pos = index(line, '!') + if (comment_pos > 0) line = line(1:comment_pos-1) + line = adjustl(trim(line)) + + ! Blank line terminates the descriptor block + if (len_trim(line) == 0) exit + + ! Skip lines without an '=' sign + eq_pos = index(line, '=') + if (eq_pos == 0) cycle + + key = adjustl(trim(line(1:eq_pos-1))) + val = adjustl(trim(line(eq_pos+1:))) + + select case (trim(key)) + case ('var_name') + nc_var_name(idx) = trim(val) + case ('lon_name') + nc_lon_name(idx) = trim(val) + case ('lat_name') + nc_lat_name(idx) = trim(val) + case ('lon_convention') + read(val, *) nc_lon_convention(idx) + case ('lat_order') + nc_lat_order(idx) = trim(val) + case ('dim_order') + nc_dim_order(idx) = trim(val) + case ('fill_value') + read(val, *) nc_fill_value(idx) + nc_has_fill(idx) = .true. + case ('fill_action') + nc_fill_action(idx) = trim(val) + case ('crop_bounds') + read(val, *) nc_crop_bounds(1, idx), nc_crop_bounds(2, idx), & + nc_crop_bounds(3, idx), nc_crop_bounds(4, idx) + nc_has_crop(idx) = .true. + case default + write(GEO_PARM_UNIT, *) 'WARNING: Unknown NetCDF descriptor key: ', & + trim(key) + end select + end do + + ! Log what was parsed + write(GEO_PARM_UNIT, *) ' NetCDF descriptor for topo file ', idx, ':' + write(GEO_PARM_UNIT, *) ' var_name = ', trim(nc_var_name(idx)) + write(GEO_PARM_UNIT, *) ' lon_name = ', trim(nc_lon_name(idx)) + write(GEO_PARM_UNIT, *) ' lat_name = ', trim(nc_lat_name(idx)) + write(GEO_PARM_UNIT, *) ' lon_convention = ', nc_lon_convention(idx) + write(GEO_PARM_UNIT, *) ' lat_order = ', trim(nc_lat_order(idx)) + write(GEO_PARM_UNIT, *) ' fill_action = ', trim(nc_fill_action(idx)) + if (nc_has_fill(idx)) & + write(GEO_PARM_UNIT, *) ' fill_value = ', nc_fill_value(idx) + if (nc_has_crop(idx)) & + write(GEO_PARM_UNIT, *) ' crop_bounds = ', nc_crop_bounds(:, idx) + + end subroutine read_netcdf_descriptor + +#ifdef NETCDF subroutine get_dim_info(nc_file, ndims, x_dim_id, x_dim_name, mx, & y_dim_id, y_dim_name, my) use netcdf @@ -1630,18 +1787,18 @@ subroutine get_dim_info(nc_file, ndims, x_dim_id, x_dim_name, mx, & end subroutine get_dim_info #endif -function Upper(s1) RESULT (s2) - CHARACTER(*) :: s1 - CHARACTER(LEN(s1)) :: s2 - CHARACTER :: ch - INTEGER,PARAMETER :: DUC = ICHAR('A') - ICHAR('a') - INTEGER :: i - - DO i = 1,LEN(s1) - ch = s1(i:i) - IF (ch >= 'a'.AND.ch <= 'z') ch = CHAR(ICHAR(ch)+DUC) - s2(i:i) = ch - END DO -END function Upper + function Upper(s1) RESULT (s2) + CHARACTER(*) :: s1 + CHARACTER(LEN(s1)) :: s2 + CHARACTER :: ch + INTEGER,PARAMETER :: DUC = ICHAR('A') - ICHAR('a') + INTEGER :: i + + DO i = 1,LEN(s1) + ch = s1(i:i) + IF (ch >= 'a'.AND.ch <= 'z') ch = CHAR(ICHAR(ch)+DUC) + s2(i:i) = ch + END DO + END function Upper end module topo_module diff --git a/src/python/geoclaw/data.py b/src/python/geoclaw/data.py index 4ad0668fb..e30df379e 100755 --- a/src/python/geoclaw/data.py +++ b/src/python/geoclaw/data.py @@ -200,14 +200,23 @@ def write(self,data_source='setrun.py', out_file='topo.data'): tfile = [tfile[0], tfile[-1]] # drop minlevel,maxlevel,t1,t2 elif len(tfile) == 2: pass # now expect only topo_type, filename + elif len(tfile) == 3: + pass # [topo_type, filename, TopoMetadata] for NetCDF type 4 else: raise ValueError('Unexpected len(tfile) = %i' % len(tfile)) # if path is relative in setrun, assume it's relative to the # same directory that out_file comes from - fname = os.path.abspath(os.path.join(os.path.dirname(out_file),tfile[-1])) + fname = os.path.abspath(os.path.join(os.path.dirname(out_file),tfile[1])) self._out_file.write("\n'%s' \n " % fname) self._out_file.write("%3i # topo_type\n" % tfile[0]) + + # For type-4 (NetCDF) entries that carry a TopoMetadata object, + # write the descriptor key=value block consumed by + # read_netcdf_descriptor in topo_module.f90. + if tfile[0] == 4 and len(tfile) == 3 and tfile[2] is not None: + from clawpack.geoclaw.netcdf_utils import DescriptorWriter + DescriptorWriter.write_topo_descriptor(self._out_file, tfile[2]) elif self.test_topography == 1: self.data_write(name='topo_location',description='(Bathymetry jump location)') self.data_write(name='topo_left',description='(Depth to left of bathy_location)') diff --git a/src/python/geoclaw/meson.build b/src/python/geoclaw/meson.build index 3a835d77d..0509994be 100644 --- a/src/python/geoclaw/meson.build +++ b/src/python/geoclaw/meson.build @@ -13,6 +13,7 @@ python_sources = { 'kmltools.py', 'marching_front.py', 'most2geoclaw.py', + 'netcdf_utils.py', 'nonuniform_grid_tools.py', 'okada.py', 'plotfg.py', diff --git a/src/python/geoclaw/netcdf_utils.py b/src/python/geoclaw/netcdf_utils.py new file mode 100644 index 000000000..2a0b9a02d --- /dev/null +++ b/src/python/geoclaw/netcdf_utils.py @@ -0,0 +1,1030 @@ +#!/usr/bin/env python +# encoding: utf-8 +r""" +NetCDF utilities for GeoClaw NetCDF input. + +Provides interrogator classes that inspect NetCDF files for coordinate +metadata, unit consistency, and fill values without loading data arrays. +The interrogators produce metadata dataclasses consumed by DescriptorWriter +(not yet implemented — pending confirmation of Fortran topo.data parser). + +Classes +------- +NetCDFInterrogator + Base class: opens a file, discovers coordinate variables, detects lon/lat + conventions, resolves fill value, validates crop bounds. + +TopoInterrogator(NetCDFInterrogator) + Adds bathymetry-specific checks: fill values within crop region (fatal), + unit verification against GEOCLAW_NETCDF_UNITS['topo']. + +MetInterrogator(NetCDFInterrogator) + Adds met-forcing checks: multi-variable grid/time consistency, unit + verification and conversion to contract units, CF time -> seconds offset, + ensemble dimension detection. + +CFNormalizer + Standalone utility: renames coordinates to CF standard names, adds/fixes + standard_name / axis / units / _FillValue attributes, resolves + _FillValue vs missing_value conflicts. Does not resample or reproject. + +Metadata dataclasses +-------------------- +FileMetadata, TopoMetadata, MetVariableInfo, MetMetadata +""" + +from __future__ import annotations + +import dataclasses +import warnings +from pathlib import Path +from typing import Optional + +import numpy as np +import xarray as xr + +from clawpack.geoclaw.units import GEOCLAW_NETCDF_UNITS, convert as units_convert + + +# --------------------------------------------------------------------------- +# CF unit alias table +# Maps CF-style unit strings to the abbreviations understood by units.convert() +# --------------------------------------------------------------------------- + +# For each contract role, the set of CF unit strings that mean "already correct" +_CONTRACT_UNIT_CF_ALIASES: dict[str, frozenset[str]] = { + 'm': frozenset({'m', 'meter', 'meters', 'metre', 'metres'}), + 'm/s': frozenset({'m/s', 'm s-1', 'meters per second', 'metres per second', + 'meter per second', 'metre per second', 'm s**-1'}), + 'Pa': frozenset({'Pa', 'pa', 'pascal', 'pascals', 'Pascal', 'Pascals', + 'N/m2', 'N m-2', 'kg m-1 s-2'}), + 's': frozenset({'s', 'sec', 'second', 'seconds'}), +} + +# Maps CF unit strings to units.py abbreviations for conversion +_CF_TO_UNITS_PY: dict[str, str] = { + # length + 'm': 'm', 'meter': 'm', 'meters': 'm', 'metre': 'm', 'metres': 'm', + 'cm': 'cm', 'centimeter': 'cm', 'centimeters': 'cm', + 'km': 'km', 'kilometer': 'km', 'kilometers': 'km', + # speed + 'm/s': 'm/s', 'm s-1': 'm/s', 'm s**-1': 'm/s', + 'meters per second': 'm/s', 'metres per second': 'm/s', + 'knots': 'knots', 'kt': 'knots', 'kts': 'knots', + 'km/h': 'km/h', 'km h-1': 'km/h', + 'mph': 'mph', + # pressure + 'Pa': 'Pa', 'pa': 'Pa', 'pascal': 'Pa', 'pascals': 'Pa', + 'hPa': 'hPa', 'hectopascal': 'hPa', 'hectopascals': 'hPa', + 'mbar': 'mbar', 'mb': 'mbar', 'millibar': 'mbar', 'millibars': 'mbar', + # time + 's': 's', 'sec': 's', 'second': 's', 'seconds': 's', + 'min': 'min', 'minute': 'min', 'minutes': 'min', + 'h': 'h', 'hr': 'h', 'hour': 'h', 'hours': 'h', + 'days': 'days', 'day': 'days', +} + + +def _normalize_cf_unit(cf_unit: str) -> Optional[str]: + """Return the units.py abbreviation for *cf_unit*, or None if unknown.""" + return _CF_TO_UNITS_PY.get(cf_unit.strip()) + + +def _unit_matches_contract(cf_unit: str, contract_unit: str) -> bool: + """Return True if *cf_unit* is equivalent to *contract_unit*.""" + aliases = _CONTRACT_UNIT_CF_ALIASES.get(contract_unit, frozenset()) + return cf_unit.strip() in aliases + + +# --------------------------------------------------------------------------- +# Metadata dataclasses +# --------------------------------------------------------------------------- + +@dataclasses.dataclass +class FileMetadata: + """Coordinate and convention metadata for a single NetCDF file.""" + source_file: Path + lon_name: str + lat_name: str + time_name: Optional[str] + lon_convention: int # 180 -> [-180, 180]; 360 -> [0, 360] + lat_order: str # 'N_to_S' or 'S_to_N' + dim_order: list[str] # canonical role names, e.g. ['lat', 'lon'] + fill_value: Optional[float] # resolved from _FillValue / missing_value + crop_bounds: Optional[tuple[float, float, float, float]] # lon0,lon1,lat0,lat1 + + +@dataclasses.dataclass +class TopoMetadata(FileMetadata): + """FileMetadata plus topo-specific fields.""" + var_name: str + source_units: str # units as found in file + fill_action: str # 'abort' (only value for topo; fill = fatal) + + +@dataclasses.dataclass +class MetVariableInfo: + """Maps one NetCDF variable to its GeoClaw role.""" + var_name: str + geoclaw_role: str # 'wind_u', 'wind_v', 'pressure' + source_units: str # units as found in file + + +@dataclasses.dataclass +class MetMetadata(FileMetadata): + """FileMetadata plus met-forcing-specific fields.""" + variables: list[MetVariableInfo] + time_offset: float # seconds; Fortran adds this to times read from file + fill_action: str # 'abort' or 'warn' + + +# --------------------------------------------------------------------------- +# Base interrogator +# --------------------------------------------------------------------------- + +class NetCDFInterrogator: + """ + Open a NetCDF file and interrogate its coordinate metadata. + + No data arrays are loaded; only coordinate values and variable attributes + are accessed. Dask-lazy chunking is enabled so that any accidental + downstream `.compute()` calls are bounded. + + Parameters + ---------- + path : str or Path + Path to the NetCDF file. + crop_bounds : tuple of four floats, optional + (lon_min, lon_max, lat_min, lat_max) in the same convention as the + file. When provided, bounds are validated against file extent. + """ + + def __init__( + self, + path: str | Path, + crop_bounds: Optional[tuple[float, float, float, float]] = None, + ) -> None: + self.path = Path(path) + self.crop_bounds = crop_bounds + # Open with chunks={} to activate Dask-lazy mode. + # mask_and_scale=True (default) so fill values appear as NaN. + self.ds: xr.Dataset = xr.open_dataset( + self.path, chunks={}, mask_and_scale=True + ) + + # ------------------------------------------------------------------ + # Coordinate discovery + # ------------------------------------------------------------------ + + def _find_coord_name( + self, + axis_letter: str, + std_names: list[str], + fallback_names: list[str], + ) -> Optional[str]: + """Find a coordinate by CF conventions: axis > standard_name > name.""" + # 1. Check axis attribute (highest CF priority) + for name, coord in self.ds.coords.items(): + if coord.attrs.get('axis', '').upper() == axis_letter: + return str(name) + # 2. Check standard_name + for name, coord in self.ds.coords.items(): + if coord.attrs.get('standard_name', '') in std_names: + return str(name) + # 3. Fallback: common names (case-insensitive) + coord_lower = {str(n).lower(): str(n) for n in self.ds.coords} + for fname in fallback_names: + if fname.lower() in coord_lower: + return coord_lower[fname.lower()] + return None + + def _require_coord( + self, + axis_letter: str, + std_names: list[str], + fallback_names: list[str], + label: str, + ) -> str: + name = self._find_coord_name(axis_letter, std_names, fallback_names) + if name is None: + raise ValueError( + f"Cannot find {label} coordinate in '{self.path}'. " + f"Expected axis='{axis_letter}', standard_name in {std_names}, " + f"or a name in {fallback_names}. " + f"Available coordinates: {list(self.ds.coords)}" + ) + return name + + def _find_lon_name(self) -> str: + return self._require_coord( + 'X', + ['longitude'], + ['longitude', 'lon', 'x', 'nav_lon', 'LONGITUDE', 'LON'], + 'longitude', + ) + + def _find_lat_name(self) -> str: + return self._require_coord( + 'Y', + ['latitude'], + ['latitude', 'lat', 'y', 'nav_lat', 'LATITUDE', 'LAT'], + 'latitude', + ) + + def _find_time_name(self) -> Optional[str]: + return self._find_coord_name( + 'T', + ['time'], + ['time', 't', 'TIME', 'valid_time'], + ) + + # ------------------------------------------------------------------ + # Convention detection + # ------------------------------------------------------------------ + + def _detect_lon_convention(self, lon_name: str) -> int: + """Return 180 if longitudes are in [-180, 180], else 360.""" + lon_max = float(self.ds[lon_name].max()) + return 360 if lon_max > 180.0 else 180 + + def _detect_lat_order(self, lat_name: str) -> str: + """Return 'N_to_S' if latitudes decrease, else 'S_to_N'.""" + lat_vals = self.ds[lat_name].values + if lat_vals[0] > lat_vals[-1]: + return 'N_to_S' + return 'S_to_N' + + def _detect_dim_order( + self, + var_name: str, + lon_name: str, + lat_name: str, + time_name: Optional[str], + ) -> list[str]: + """ + Return dimension order for *var_name* using canonical role names + ('lon', 'lat', 'time'). Unknown dims are passed through as-is. + """ + dim_map: dict[str, str] = {lon_name: 'lon', lat_name: 'lat'} + if time_name is not None: + dim_map[time_name] = 'time' + return [dim_map.get(d, d) for d in self.ds[var_name].dims] + + # ------------------------------------------------------------------ + # Fill value + # ------------------------------------------------------------------ + + def _resolve_fill_value(self, var_name: str) -> Optional[float]: + """ + Return fill value with CF-correct precedence: _FillValue > missing_value. + + Note: xarray with mask_and_scale=True has already decoded fill values + to NaN in the data arrays, but the original attribute is preserved. + """ + attrs = self.ds[var_name].attrs + if '_FillValue' in attrs: + return float(attrs['_FillValue']) + if 'missing_value' in attrs: + return float(attrs['missing_value']) + # xarray may have moved _FillValue to encoding dict + enc = self.ds[var_name].encoding + if '_FillValue' in enc: + return float(enc['_FillValue']) + return None + + # ------------------------------------------------------------------ + # Crop bounds validation + # ------------------------------------------------------------------ + + def _validate_crop_bounds( + self, + lon_name: str, + lat_name: str, + crop_bounds: tuple[float, float, float, float], + ) -> None: + """Raise ValueError if crop_bounds exceed file spatial extent.""" + lon_min, lon_max, lat_min, lat_max = crop_bounds + flon_min = float(self.ds[lon_name].min()) + flon_max = float(self.ds[lon_name].max()) + flat_min = float(self.ds[lat_name].min()) + flat_max = float(self.ds[lat_name].max()) + + if lon_min < flon_min or lon_max > flon_max: + raise ValueError( + f"crop_bounds lon [{lon_min}, {lon_max}] exceed file extent " + f"[{flon_min}, {flon_max}] in '{self.path}'." + ) + if lat_min < flat_min or lat_max > flat_max: + raise ValueError( + f"crop_bounds lat [{lat_min}, {lat_max}] exceed file extent " + f"[{flat_min}, {flat_max}] in '{self.path}'." + ) + + # ------------------------------------------------------------------ + # Core interrogation + # ------------------------------------------------------------------ + + def interrogate( + self, + var_name: str, + time_name_override: Optional[str] = None, + ) -> FileMetadata: + """ + Interrogate *var_name* and return a FileMetadata instance. + + Parameters + ---------- + var_name : str + Name of the primary data variable (used to determine dim_order). + time_name_override : str, optional + Force a specific time coordinate name instead of auto-detecting. + """ + if var_name not in self.ds: + raise KeyError( + f"Variable '{var_name}' not found in '{self.path}'. " + f"Available: {list(self.ds.data_vars)}" + ) + + lon_name = self._find_lon_name() + lat_name = self._find_lat_name() + time_name = ( + time_name_override + if time_name_override is not None + else self._find_time_name() + ) + + lon_convention = self._detect_lon_convention(lon_name) + lat_order = self._detect_lat_order(lat_name) + dim_order = self._detect_dim_order(var_name, lon_name, lat_name, time_name) + fill_value = self._resolve_fill_value(var_name) + + if self.crop_bounds is not None: + self._validate_crop_bounds(lon_name, lat_name, self.crop_bounds) + + return FileMetadata( + source_file=self.path, + lon_name=lon_name, + lat_name=lat_name, + time_name=time_name, + lon_convention=lon_convention, + lat_order=lat_order, + dim_order=dim_order, + fill_value=fill_value, + crop_bounds=self.crop_bounds, + ) + + def close(self) -> None: + self.ds.close() + + def __enter__(self) -> 'NetCDFInterrogator': + return self + + def __exit__(self, *args: object) -> None: + self.close() + + +# --------------------------------------------------------------------------- +# Topo interrogator +# --------------------------------------------------------------------------- + +class TopoInterrogator(NetCDFInterrogator): + """ + Interrogate a NetCDF bathymetry/topography file. + + Additional checks beyond the base class: + + * Verifies the data variable's units attribute matches the contract unit + (meters). If units are convertible via units.py, records the + source units in the metadata; Fortran will need a conversion factor. + If units are unrecognised, raises ValueError. + * Checks for fill values (NaN) within the crop region and raises + ValueError — silent NaN in bathymetry is numerically fatal. + + Parameters + ---------- + path : str or Path + Path to the NetCDF topography file. + var_name : str + Name of the elevation/bathymetry variable (e.g. 'z', 'elevation'). + crop_bounds : tuple of four floats, optional + (lon_min, lon_max, lat_min, lat_max). + """ + + def __init__( + self, + path: str | Path, + var_name: str, + crop_bounds: Optional[tuple[float, float, float, float]] = None, + ) -> None: + super().__init__(path, crop_bounds) + self.var_name = var_name + + # ------------------------------------------------------------------ + # Unit verification + # ------------------------------------------------------------------ + + def _check_topo_units(self) -> str: + """ + Verify units of *var_name* match GEOCLAW_NETCDF_UNITS['topo'] ('m'). + + Returns the units string found in the file. Raises ValueError if the + units are not recognisable or not convertible to meters. + """ + contract = GEOCLAW_NETCDF_UNITS['topo'] # 'm' + cf_unit = self.ds[self.var_name].attrs.get('units', '') + + if not cf_unit: + warnings.warn( + f"Variable '{self.var_name}' in '{self.path}' has no 'units' " + f"attribute. Assuming '{contract}' (contract unit for topo). " + f"If the data are not in meters, results will be incorrect.", + stacklevel=3, + ) + return contract + + if _unit_matches_contract(cf_unit, contract): + return cf_unit + + # Try to find a conversion path via units.py + canonical = _normalize_cf_unit(cf_unit) + if canonical is None: + raise ValueError( + f"Unrecognised units '{cf_unit}' on variable '{self.var_name}' " + f"in '{self.path}'. Contract requires '{contract}'. " + f"Add the unit string to _CF_TO_UNITS_PY or pre-convert the file." + ) + # Verify units.convert() can handle the conversion (raises if not) + try: + units_convert(1.0, canonical, contract) + except (ValueError, KeyError) as exc: + raise ValueError( + f"Cannot convert units '{cf_unit}' -> '{contract}' for variable " + f"'{self.var_name}' in '{self.path}': {exc}" + ) from exc + + warnings.warn( + f"Variable '{self.var_name}' has units '{cf_unit}'; contract is " + f"'{contract}'. A unit conversion will be needed before Fortran " + f"reads this file. Pre-convert the data or implement unit scaling " + f"in the descriptor.", + stacklevel=3, + ) + return cf_unit + + # ------------------------------------------------------------------ + # Fill value check within crop region + # ------------------------------------------------------------------ + + def _check_fill_in_crop( + self, + lon_name: str, + lat_name: str, + lat_order: str, + ) -> None: + """ + Check for NaN / fill values within the crop region. + + This triggers a bounded Dask computation (scalar boolean result). + Raises ValueError if any fill values are present — silent NaN in + bathymetry would silently corrupt simulation output. + """ + var = self.ds[self.var_name] + + if self.crop_bounds is not None: + lon_min, lon_max, lat_min, lat_max = self.crop_bounds + # For a decreasing lat axis (N_to_S) the slice must be reversed. + lat_slice = ( + slice(lat_max, lat_min) + if lat_order == 'N_to_S' + else slice(lat_min, lat_max) + ) + var = var.sel({ + lon_name: slice(lon_min, lon_max), + lat_name: lat_slice, + }) + + # This .compute() is intentional: we must confirm no fill values exist. + has_fill = bool(var.isnull().any().compute()) + if has_fill: + region_str = ( + f"crop region {self.crop_bounds}" if self.crop_bounds is not None + else "full file extent" + ) + raise ValueError( + f"Fill values (NaN) found in topography variable '{self.var_name}' " + f"within {region_str} of '{self.path}'. " + f"Silent NaN in bathymetry would corrupt simulation results. " + f"Fill holes before using this file." + ) + + # ------------------------------------------------------------------ + # Public interface + # ------------------------------------------------------------------ + + def interrogate_topo(self) -> TopoMetadata: + """ + Fully interrogate the topo file and return a TopoMetadata instance. + """ + base = self.interrogate(self.var_name) + source_units = self._check_topo_units() + self._check_fill_in_crop(base.lon_name, base.lat_name, base.lat_order) + + return TopoMetadata( + **dataclasses.asdict(base), + var_name=self.var_name, + source_units=source_units, + fill_action='abort', # fill in topo crop region is always fatal + ) + + +# --------------------------------------------------------------------------- +# Met interrogator +# --------------------------------------------------------------------------- + +class MetInterrogator(NetCDFInterrogator): + """ + Interrogate a NetCDF meteorological forcing file. + + Additional checks beyond the base class: + + * Verifies that all requested variables share the same spatial grid and + time axis. + * Validates and (if needed) converts units to contract units via units.py. + * Converts CF time to seconds from a user-provided reference offset. + * Detects ensemble/member dimensions and raises if any are non-singleton. + + Parameters + ---------- + path : str or Path + Path to the NetCDF file. + variable_map : dict + Maps GeoClaw role strings to variable names in the file, e.g.:: + + {'wind_u': 'u10', 'wind_v': 'v10', 'pressure': 'msl'} + + crop_bounds : tuple of four floats, optional + (lon_min, lon_max, lat_min, lat_max). + time_reference : str or datetime-like, optional + Reference datetime for the time_offset calculation. The + ``time_offset`` written to the descriptor will be the number of + seconds between *time_reference* and the first time in the file. + Defaults to the Unix epoch (1970-01-01T00:00:00). + fill_action : str, optional + 'abort' or 'warn'. Met files may legitimately have fill values at + the domain edges (Fortran handles edge fill). Default is 'warn'. + """ + + def __init__( + self, + path: str | Path, + variable_map: dict[str, str], + crop_bounds: Optional[tuple[float, float, float, float]] = None, + time_reference: Optional[str] = None, + fill_action: str = 'warn', + ) -> None: + super().__init__(path, crop_bounds) + self.variable_map = variable_map # {geoclaw_role: var_name_in_file} + self.time_reference = time_reference + if fill_action not in ('abort', 'warn'): + raise ValueError(f"fill_action must be 'abort' or 'warn', got '{fill_action}'") + self.fill_action = fill_action + + # ------------------------------------------------------------------ + # Variable grid/time consistency + # ------------------------------------------------------------------ + + def _check_variable_consistency( + self, + lon_name: str, + lat_name: str, + time_name: str, + ) -> None: + """ + Verify all variables share the same spatial grid and time axis. + + Raises ValueError on any mismatch. + """ + ref_role = next(iter(self.variable_map)) + ref_var_name = self.variable_map[ref_role] + ref_dims = set(self.ds[ref_var_name].dims) + + for role, var_name in self.variable_map.items(): + if var_name not in self.ds: + raise KeyError( + f"Variable '{var_name}' (role '{role}') not found in " + f"'{self.path}'. Available: {list(self.ds.data_vars)}" + ) + dims = set(self.ds[var_name].dims) + if dims != ref_dims: + raise ValueError( + f"Variable '{var_name}' (role '{role}') has dims {dims} " + f"but '{ref_var_name}' (role '{ref_role}') has dims {ref_dims}. " + f"All met variables must share the same grid." + ) + # Verify that the expected coordinate dims are actually present + for coord_name, label in [ + (lon_name, 'longitude'), + (lat_name, 'latitude'), + (time_name, 'time'), + ]: + if coord_name not in dims: + raise ValueError( + f"Variable '{var_name}' dims {dims} do not include the " + f"{label} coordinate '{coord_name}'." + ) + + # ------------------------------------------------------------------ + # Ensemble dimension check + # ------------------------------------------------------------------ + + def _check_ensemble_dims( + self, + known_dims: set[str], + ) -> None: + """ + Raise ValueError if any variable has non-singleton extra dimensions. + + Extra dimensions (beyond lon, lat, time) are assumed to be ensemble / + member axes, which GeoClaw does not support. + """ + for role, var_name in self.variable_map.items(): + var = self.ds[var_name] + extra = [d for d in var.dims if d not in known_dims] + for dim in extra: + size = self.ds.sizes[dim] + if size != 1: + raise ValueError( + f"Variable '{var_name}' (role '{role}') has non-singleton " + f"dimension '{dim}' (size {size}). Ensemble / member " + f"dimensions are not supported by GeoClaw. " + f"Select a single member before interrogating." + ) + + # ------------------------------------------------------------------ + # Unit verification / conversion + # ------------------------------------------------------------------ + + def _check_met_units(self) -> list[MetVariableInfo]: + """ + Verify or convert units for each variable to contract units. + + Returns a list of MetVariableInfo with source_units populated. + Raises ValueError for unrecognised or unconvertible units. + """ + result: list[MetVariableInfo] = [] + for role, var_name in self.variable_map.items(): + contract = GEOCLAW_NETCDF_UNITS.get(role) + if contract is None: + # Role not in contract (e.g. future extension) — just record + warnings.warn( + f"No contract unit defined for role '{role}'. " + f"Skipping unit check for '{var_name}'.", + stacklevel=3, + ) + source_units = self.ds[var_name].attrs.get('units', 'unknown') + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=source_units, + )) + continue + + cf_unit = self.ds[var_name].attrs.get('units', '') + if not cf_unit: + warnings.warn( + f"Variable '{var_name}' (role '{role}') has no 'units' " + f"attribute. Assuming contract unit '{contract}'. " + f"Verify the data are actually in '{contract}'.", + stacklevel=3, + ) + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=contract, + )) + continue + + if _unit_matches_contract(cf_unit, contract): + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=cf_unit, + )) + continue + + # Attempt conversion path + canonical = _normalize_cf_unit(cf_unit) + if canonical is None: + raise ValueError( + f"Unrecognised units '{cf_unit}' on variable '{var_name}' " + f"(role '{role}') in '{self.path}'. " + f"Contract requires '{contract}'. " + f"Add the unit to _CF_TO_UNITS_PY or pre-convert the file." + ) + try: + units_convert(1.0, canonical, contract) + except (ValueError, KeyError) as exc: + raise ValueError( + f"Cannot convert units '{cf_unit}' -> '{contract}' for " + f"variable '{var_name}' (role '{role}'): {exc}" + ) from exc + + warnings.warn( + f"Variable '{var_name}' (role '{role}') has units '{cf_unit}'; " + f"contract is '{contract}'. A unit conversion factor must be " + f"applied before Fortran reads this data.", + stacklevel=3, + ) + result.append(MetVariableInfo( + var_name=var_name, + geoclaw_role=role, + source_units=cf_unit, + )) + + return result + + # ------------------------------------------------------------------ + # CF time -> seconds offset + # ------------------------------------------------------------------ + + def _compute_time_offset(self, time_name: str) -> float: + """ + Compute time_offset in seconds. + + time_offset = seconds between *time_reference* and the first time + value in the file (after CF decoding by xarray). + + This reads only the first element of the time coordinate — a minimal + data fetch, not a full array load. + """ + import pandas as pd + + time_coord = self.ds[time_name] + # Access only the first element (.values[0] fetches one chunk element) + t0_raw = time_coord.values[0] + + try: + t0 = pd.Timestamp(t0_raw) + except Exception as exc: + raise ValueError( + f"Cannot convert first time value '{t0_raw}' in '{self.path}' " + f"to a pandas Timestamp. Ensure xarray can decode CF time " + f"(check 'units' and 'calendar' attributes on '{time_name}')." + ) from exc + + if self.time_reference is None: + ref = pd.Timestamp('1970-01-01T00:00:00') # Unix epoch + else: + ref = pd.Timestamp(self.time_reference) + + offset_s = (t0 - ref).total_seconds() + return float(offset_s) + + # ------------------------------------------------------------------ + # Public interface + # ------------------------------------------------------------------ + + def interrogate_met(self) -> MetMetadata: + """ + Fully interrogate the met file and return a MetMetadata instance. + """ + if not self.variable_map: + raise ValueError("variable_map must not be empty.") + + # Use the first variable for base coordinate detection + primary_var = next(iter(self.variable_map.values())) + base = self.interrogate(primary_var) + + if base.time_name is None: + raise ValueError( + f"No time coordinate found in '{self.path}'. " + f"Met forcing requires a time axis." + ) + + known_dims = {base.lon_name, base.lat_name, base.time_name} + self._check_variable_consistency( + base.lon_name, base.lat_name, base.time_name + ) + self._check_ensemble_dims(known_dims) + variable_infos = self._check_met_units() + time_offset = self._compute_time_offset(base.time_name) + + return MetMetadata( + **dataclasses.asdict(base), + variables=variable_infos, + time_offset=time_offset, + fill_action=self.fill_action, + ) + + +# --------------------------------------------------------------------------- +# CF Normalizer +# --------------------------------------------------------------------------- + +class CFNormalizer: + """ + Normalize CF metadata in an xarray Dataset. + + Performs in-memory attribute fixups: + * Renames coordinate variables to CF standard names where unambiguous. + * Adds ``standard_name``, ``axis``, and ``units`` attributes to coordinate + variables if missing. + * Resolves ``_FillValue`` vs ``missing_value`` conflicts (CF precedence: + ``_FillValue`` wins; ``missing_value`` is promoted if ``_FillValue`` + absent; conflict triggers a warning). + + Does NOT resample, reproject, or modify data values. + + Parameters + ---------- + ds : xr.Dataset + Dataset to normalise. A copy is made; the original is not modified. + + Examples + -------- + >>> ds_norm = CFNormalizer(ds).normalize() + """ + + # Alias sets for unambiguous renaming + _LON_ALIASES: frozenset[str] = frozenset( + {'lon', 'longitude', 'x', 'nav_lon', 'LONGITUDE', 'LON', 'Longitude'} + ) + _LAT_ALIASES: frozenset[str] = frozenset( + {'lat', 'latitude', 'y', 'nav_lat', 'LATITUDE', 'LAT', 'Latitude'} + ) + _TIME_ALIASES: frozenset[str] = frozenset( + {'time', 't', 'TIME', 'valid_time', 'Time'} + ) + + def __init__(self, ds: xr.Dataset) -> None: + self.ds = ds.copy() + + def normalize(self) -> xr.Dataset: + """Apply all normalizations and return the modified dataset.""" + self._rename_coords() + self._fix_coord_attrs() + self._resolve_fill_values() + return self.ds + + def _rename_coords(self) -> None: + """Rename coordinate variables to CF standard names.""" + rename_map: dict[str, str] = {} + for name in list(self.ds.coords): + sname = str(name) + if sname in self._LON_ALIASES and sname != 'longitude': + if 'longitude' not in self.ds.coords: + rename_map[sname] = 'longitude' + elif sname in self._LAT_ALIASES and sname != 'latitude': + if 'latitude' not in self.ds.coords: + rename_map[sname] = 'latitude' + elif sname in self._TIME_ALIASES and sname != 'time': + if 'time' not in self.ds.coords: + rename_map[sname] = 'time' + if rename_map: + self.ds = self.ds.rename(rename_map) + + def _fix_coord_attrs(self) -> None: + """Add standard_name, axis, and units to recognized coordinates.""" + _COORD_STANDARDS: dict[str, tuple[str, Optional[str], str]] = { + 'longitude': ('longitude', 'degrees_east', 'X'), + 'latitude': ('latitude', 'degrees_north', 'Y'), + 'time': ('time', None, 'T'), + } + for coord_name, (std_name, units_val, axis) in _COORD_STANDARDS.items(): + if coord_name not in self.ds.coords: + continue + attrs = dict(self.ds[coord_name].attrs) + attrs.setdefault('standard_name', std_name) + attrs.setdefault('axis', axis) + if units_val is not None: + attrs.setdefault('units', units_val) + self.ds[coord_name].attrs = attrs + + def _resolve_fill_values(self) -> None: + """ + Resolve _FillValue / missing_value conflicts for all data variables. + + CF precedence: _FillValue wins. If only missing_value is present it + is promoted to _FillValue. Conflicting values trigger a UserWarning. + """ + for var_name in list(self.ds.data_vars): + attrs = dict(self.ds[var_name].attrs) + fv: Optional[float] = attrs.get('_FillValue') + mv: Optional[float] = attrs.get('missing_value') + + if fv is not None and mv is not None: + if fv != mv: + warnings.warn( + f"Variable '{var_name}' has conflicting " + f"_FillValue={fv} and missing_value={mv}. " + f"Using _FillValue (CF precedence); " + f"missing_value removed.", + UserWarning, + stacklevel=2, + ) + del attrs['missing_value'] + self.ds[var_name].attrs = attrs + + elif mv is not None: + # Promote missing_value -> _FillValue + attrs['_FillValue'] = mv + del attrs['missing_value'] + self.ds[var_name].attrs = attrs + + +# --------------------------------------------------------------------------- +# Descriptor writer +# --------------------------------------------------------------------------- + +class DescriptorWriter: + """ + Write NetCDF descriptor metadata for GeoClaw input files. + + For topo (type 4) entries the descriptor is a block of ``key = value`` + lines written immediately after the ``topo_type`` line in *topo.data*. + Fortran's ``read_netcdf_descriptor`` parses these lines until it + encounters a blank line. + + Usage — topo:: + + meta = TopoInterrogator(path, var_name='z').interrogate_topo() + with open('topo.data', 'a') as f: + DescriptorWriter.write_topo_descriptor(f, meta) + + Usage — met (storm file):: + + meta = MetInterrogator(path, var_map).interrogate_met() + with open('storm.nc', 'w') as f: + f.write('netcdf\\n') # format header written by caller + DescriptorWriter.write_met_descriptor(f, meta) + """ + + @staticmethod + def write_topo_descriptor(f, meta: 'TopoMetadata') -> None: + """ + Write the key=value descriptor block for one topo file. + + *f* should be positioned immediately after the ``topo_type`` line + has been written. A trailing blank line is written to terminate + the block for the Fortran parser. + + Parameters + ---------- + f : file-like object + Open for writing (text mode). + meta : TopoMetadata + Output of ``TopoInterrogator.interrogate_topo()``. + """ + f.write(f"var_name = {meta.var_name}\n") + f.write(f"lon_name = {meta.lon_name}\n") + f.write(f"lat_name = {meta.lat_name}\n") + f.write(f"lon_convention = {meta.lon_convention}\n") + f.write(f"lat_order = {meta.lat_order}\n") + f.write(f"dim_order = {','.join(meta.dim_order)}\n") + if meta.fill_value is not None: + f.write(f"fill_value = {meta.fill_value!r}\n") + f.write(f"fill_action = {meta.fill_action}\n") + if meta.crop_bounds is not None: + lon0, lon1, lat0, lat1 = meta.crop_bounds + f.write(f"crop_bounds = {lon0} {lon1} {lat0} {lat1}\n") + f.write("\n") # blank line terminates block for Fortran parser + + @staticmethod + def write_met_descriptor(f, meta: 'MetMetadata') -> None: + """ + Write the ``&file_info`` / ``&variable_info`` namelist-style body + of a GeoClaw NetCDF met/storm descriptor file. + + The caller is responsible for writing the ``netcdf`` format header + line before calling this method. + + Parameters + ---------- + f : file-like object + Open for writing (text mode). + meta : MetMetadata + Output of ``MetInterrogator.interrogate_met()``. + """ + f.write("&file_info\n") + f.write(f" source_file = {meta.source_file}\n") + f.write(f" lon_name = {meta.lon_name}\n") + f.write(f" lat_name = {meta.lat_name}\n") + f.write(f" time_name = {meta.time_name}\n") + f.write(f" dim_order = {','.join(meta.dim_order)}\n") + f.write(f" lon_convention = {meta.lon_convention}\n") + f.write(f" lat_order = {meta.lat_order}\n") + if meta.fill_value is not None: + f.write(f" fill_value = {meta.fill_value!r}\n") + f.write(f" fill_action = {meta.fill_action}\n") + f.write(f" time_offset = {meta.time_offset!r}\n") + if meta.crop_bounds is not None: + lon0, lon1, lat0, lat1 = meta.crop_bounds + f.write(f" crop_bounds = {lon0} {lon1} {lat0} {lat1}\n") + f.write("/\n") + for var in meta.variables: + f.write( + f"&variable_info" + f" var_name={var.var_name}" + f" geoclaw_role={var.geoclaw_role}" + f" /\n" + ) diff --git a/src/python/geoclaw/units.py b/src/python/geoclaw/units.py index 7a7bc477e..ed8fbeb04 100644 --- a/src/python/geoclaw/units.py +++ b/src/python/geoclaw/units.py @@ -92,6 +92,31 @@ lambda temp: temp - 273.15] units['temperature'] = {'C': "Celsius", 'F': "Fahrenheit", 'K': "Kelvin"} +# Time +conversion_func['s'] = [lambda t: t, lambda t: t] +conversion_func['min'] = [lambda t: t * 60.0, lambda t: t / 60.0] +conversion_func['minutes'] = conversion_func['min'] +conversion_func['h'] = [lambda t: t * 3600.0, lambda t: t / 3600.0] +conversion_func['hours'] = conversion_func['h'] +conversion_func['days'] = [lambda t: t * 86400.0, lambda t: t / 86400.0] +units['time'] = collections.OrderedDict({'s': 'seconds', 'min': 'minutes', + 'h': 'hours', 'days': 'days'}) + +# Unit contract for GeoClaw NetCDF input. +# These are the units that Fortran assumes when reading NetCDF data. +# Python interrogators must verify and, if necessary, record a conversion +# factor before writing the descriptor so that Fortran receives data in +# these units. +GEOCLAW_NETCDF_UNITS = { + 'topo': 'm', # bathymetry / topography elevation + 'wind_u': 'm/s', # eastward wind component + 'wind_v': 'm/s', # northward wind component + 'pressure': 'Pa', # surface pressure (absolute) + 'time': 's', # time coordinate (seconds from descriptor offset) + # TODO: 'precipitation': ??? (unit TBD) + # TODO: 'friction': ??? (unit TBD) +} + def units_available(): r""" From fcbaae9b1b16d1eaae535e1c8078c4c07f97f266 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Fri, 10 Apr 2026 11:25:37 -0400 Subject: [PATCH 02/24] Add python tests for neew NetCDF capabilities --- tests/netcdf/__init__.py | 0 tests/netcdf/_helpers.py | 249 ++++++++++++++++++ tests/netcdf/conftest.py | 172 +++++++++++++ tests/netcdf/test_base_interrogator.py | 297 ++++++++++++++++++++++ tests/netcdf/test_cf_normalizer.py | 324 +++++++++++++++++++++++ tests/netcdf/test_descriptor_writer.py | 339 +++++++++++++++++++++++++ tests/netcdf/test_met_interrogator.py | 296 +++++++++++++++++++++ tests/netcdf/test_topo_interrogator.py | 205 +++++++++++++++ 8 files changed, 1882 insertions(+) create mode 100644 tests/netcdf/__init__.py create mode 100644 tests/netcdf/_helpers.py create mode 100644 tests/netcdf/conftest.py create mode 100644 tests/netcdf/test_base_interrogator.py create mode 100644 tests/netcdf/test_cf_normalizer.py create mode 100644 tests/netcdf/test_descriptor_writer.py create mode 100644 tests/netcdf/test_met_interrogator.py create mode 100644 tests/netcdf/test_topo_interrogator.py diff --git a/tests/netcdf/__init__.py b/tests/netcdf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/netcdf/_helpers.py b/tests/netcdf/_helpers.py new file mode 100644 index 000000000..7e12ca43a --- /dev/null +++ b/tests/netcdf/_helpers.py @@ -0,0 +1,249 @@ +""" +Non-fixture helpers shared across the NetCDF test suite. + +Exported by conftest.py (as fixtures) and importable directly by test files +for use with @pytest.mark.parametrize. +""" +from __future__ import annotations + +from typing import Optional + +import numpy as np +import pandas as pd +import pytest +import xarray as xr + + +# ============================================================ +# Dataset factory functions +# ============================================================ + +def make_topo_dataset( + *, + lon_name: str = "lon", + lat_name: str = "lat", + var_name: str = "z", + lon_min: float = -10.0, + lon_max: float = 10.0, + lat_min: float = -10.0, + lat_max: float = 10.0, + nlat: int = 5, + nlon: int = 5, + lat_direction: str = "S_to_N", # "S_to_N" or "N_to_S" + dim_order: tuple = ("lat", "lon"), + units: str = "m", + has_fill_in_data: bool = False, + lon_axis_attr: bool = False, + lat_axis_attr: bool = False, +) -> xr.Dataset: + """ + Build a minimal topo xarray Dataset. + + lon_max > 180 gives a [0, 360] convention file. + lat_direction='N_to_S' gives decreasing latitude values. + has_fill_in_data=True puts a NaN in the data array so + fill-in-crop tests can verify the interrogator rejects the file. + """ + lons = np.linspace(lon_min, lon_max, nlon) + lats = ( + np.linspace(lat_min, lat_max, nlat) + if lat_direction == "S_to_N" + else np.linspace(lat_max, lat_min, nlat) + ) + + dim_to_coord = {"lat": lat_name, "lon": lon_name} + actual_dims = tuple(dim_to_coord[d] for d in dim_order) + shape = tuple(nlon if d == lon_name else nlat for d in actual_dims) + + data = np.full(shape, -100.0, dtype=np.float32) + if has_fill_in_data: + data.flat[0] = np.nan + + lon_attrs: dict = {} + lat_attrs: dict = {} + if lon_axis_attr: + lon_attrs["axis"] = "X" + if lat_axis_attr: + lat_attrs["axis"] = "Y" + + var_attrs: dict = {} + if units: + var_attrs["units"] = units + + coords = { + lon_name: xr.DataArray(lons, dims=[lon_name], attrs=lon_attrs), + lat_name: xr.DataArray(lats, dims=[lat_name], attrs=lat_attrs), + } + da = xr.DataArray(data, dims=actual_dims, coords=coords, attrs=var_attrs) + return xr.Dataset({var_name: da}) + + +def make_met_dataset( + *, + lon_name: str = "lon", + lat_name: str = "lat", + time_name: str = "time", + var_name_map: Optional[dict] = None, # {geoclaw_role: file_var_name} + lon_min: float = -10.0, + lon_max: float = 10.0, + lat_min: float = -10.0, + lat_max: float = 10.0, + nlat: int = 4, + nlon: int = 4, + nt: int = 3, + lat_direction: str = "S_to_N", + dim_order: tuple = ("time", "lat", "lon"), + wind_units: str = "m/s", + pressure_units: str = "Pa", + time_start: str = "2020-01-01", + time_freq: str = "6h", + extra_dim: Optional[tuple] = None, # (dim_name, size) for ensemble + omit_roles: Optional[list] = None, # geoclaw roles to leave out + mismatch_dims: bool = False, # last var gets different dims +) -> xr.Dataset: + """ + Build a minimal met forcing xarray Dataset. + + extra_dim=('member', 1) inserts a singleton ensemble dimension. + extra_dim=('member', 3) inserts a non-singleton ensemble dimension. + omit_roles=['wind_v'] omits that variable (consistency check must fail). + mismatch_dims=True gives the last variable a different dim set. + """ + if var_name_map is None: + var_name_map = {"wind_u": "u10", "wind_v": "v10", "pressure": "msl"} + if omit_roles: + var_name_map = {k: v for k, v in var_name_map.items() + if k not in omit_roles} + + lons = np.linspace(lon_min, lon_max, nlon) + lats = ( + np.linspace(lat_min, lat_max, nlat) + if lat_direction == "S_to_N" + else np.linspace(lat_max, lat_min, nlat) + ) + times = pd.date_range(time_start, periods=nt, freq=time_freq) + + role_to_coord = {"lat": lat_name, "lon": lon_name, "time": time_name} + actual_dims = [role_to_coord.get(d, d) for d in dim_order] + coord_size = {lon_name: nlon, lat_name: nlat, time_name: nt} + base_shape = tuple(coord_size[d] for d in actual_dims) + + coords: dict = {lon_name: lons, lat_name: lats, time_name: times} + data_vars: dict = {} + var_items = list(var_name_map.items()) + + for i, (role, var_name) in enumerate(var_items): + dims = list(actual_dims) + shape = list(base_shape) + + if extra_dim is not None: + ename, esize = extra_dim + dims = [ename] + dims + shape = [esize] + shape + if ename not in coords: + coords[ename] = np.arange(esize) + + if mismatch_dims and i == len(var_items) - 1: + # Drop time so this variable has incompatible dims + dims = [d for d in dims if d != time_name] + shape = [s for d, s in zip(actual_dims, base_shape) + if d != time_name] + + fill_val = 101325.0 if role == "pressure" else 5.0 + data = np.full(shape, fill_val, dtype=np.float32) + + if role in ("wind_u", "wind_v"): + attrs = {"units": wind_units} + elif role == "pressure": + attrs = {"units": pressure_units} + else: + attrs = {} + + data_vars[var_name] = xr.DataArray(data, dims=dims, attrs=attrs) + + return xr.Dataset(data_vars, coords=coords) + + +# ============================================================ +# Parametrize lists +# ============================================================ + +#: lon/lat convention + direction combos. +#: Each entry is a dict of kwargs for make_topo_dataset / make_met_dataset. +COORD_VARIANTS: list = [ + pytest.param( + {"lon_min": -10.0, "lon_max": 10.0, "lat_direction": "S_to_N"}, + id="lon180-S_to_N", + ), + pytest.param( + {"lon_min": -10.0, "lon_max": 10.0, "lat_direction": "N_to_S"}, + id="lon180-N_to_S", + ), + pytest.param( + {"lon_min": 170.0, "lon_max": 190.0, "lat_direction": "S_to_N"}, + id="lon360-S_to_N", + ), + pytest.param( + {"lon_min": 170.0, "lon_max": 190.0, "lat_direction": "N_to_S"}, + id="lon360-N_to_S", + ), +] + +#: dim_order variants for 2-D (no time) topo variables. +TOPO_DIM_ORDER_VARIANTS: list = [ + pytest.param({"dim_order": ("lat", "lon")}, id="dim-lat-lon"), + pytest.param({"dim_order": ("lon", "lat")}, id="dim-lon-lat"), +] + +#: dim_order variants for 3-D met variables. +MET_DIM_ORDER_VARIANTS: list = [ + pytest.param({"dim_order": ("time", "lat", "lon")}, id="dim-time-lat-lon"), + pytest.param({"dim_order": ("time", "lon", "lat")}, id="dim-time-lon-lat"), + pytest.param({"dim_order": ("lat", "lon", "time")}, id="dim-lat-lon-time"), + pytest.param({"dim_order": ("lon", "lat", "time")}, id="dim-lon-lat-time"), +] + +#: Fill-value attribute combinations for nc4_topo_file_factory. +#: Keys: fill_value (nc4 createVariable arg), missing_value (plain attr), +#: expected (what _resolve_fill_value should return). +FILL_VALUE_VARIANTS: list = [ + pytest.param( + {"fill_value": -9999.0, "missing_value": None, "expected": -9999.0}, + id="fill-only", + ), + pytest.param( + # Modern xarray (mask_and_scale=True) moves missing_value to + # enc['missing_value'], not enc['_FillValue']. _resolve_fill_value + # only checks enc['_FillValue'], so it returns None. This is a known + # implementation limitation; the test documents actual behaviour. + {"fill_value": None, "missing_value": -8888.0, "expected": None}, + id="missing-only", + ), + pytest.param( + # Both present, agreeing: either path returns the same value. + {"fill_value": -9999.0, "missing_value": -9999.0, "expected": -9999.0}, + id="both-agree", + ), + pytest.param( + # Conflicting: _FillValue wins (CF precedence); no warning from interrogator. + {"fill_value": -9999.0, "missing_value": -8888.0, "expected": -9999.0}, + id="both-conflict-no-warn", + ), + pytest.param( + {"fill_value": None, "missing_value": None, "expected": None}, + id="neither", + ), +] + +#: Pressure unit variants: (units written in file, expected source_units in metadata) +PRESSURE_UNIT_VARIANTS: list = [ + pytest.param({"pressure_units": "Pa", "expected_source": "Pa"}, id="Pa"), + pytest.param({"pressure_units": "hPa", "expected_source": "hPa"}, id="hPa"), + pytest.param({"pressure_units": "mbar", "expected_source": "mbar"}, id="mbar"), +] + +#: Wind unit variants +WIND_UNIT_VARIANTS: list = [ + pytest.param({"wind_units": "m/s", "expected_source": "m/s"}, id="m_s"), + pytest.param({"wind_units": "knots", "expected_source": "knots"}, id="knots"), +] diff --git a/tests/netcdf/conftest.py b/tests/netcdf/conftest.py new file mode 100644 index 000000000..94687e129 --- /dev/null +++ b/tests/netcdf/conftest.py @@ -0,0 +1,172 @@ +""" +Shared pytest fixtures for GeoClaw NetCDF interrogator tests. + +Factory functions, parametrize lists, and helper constants live in +_helpers.py so they can be imported directly by test files without +going through the pytest fixture machinery. + +Fixtures provided here +---------------------- +topo_file_factory write a topo Dataset to a tmp_path .nc file +met_file_factory write a met Dataset to a tmp_path .nc file +nc4_topo_file_factory write a topo file via netCDF4 for precise + attribute placement (fill value tests) +""" +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import numpy as np +import pytest + +from ._helpers import make_topo_dataset, make_met_dataset + +try: + import netCDF4 as nc4 # type: ignore + _HAS_NC4 = True +except ImportError: + _HAS_NC4 = False + + +@pytest.fixture +def topo_file_factory(tmp_path): + """ + Return a callable that writes a topo Dataset to a .nc file in tmp_path. + + Signature:: + + path = topo_file_factory(**make_topo_dataset_kwargs) + # or supply an already-built dataset: + path = topo_file_factory(ds=my_ds) + + Each call writes a distinct file (topo_1.nc, topo_2.nc, …). + """ + counter = [0] + + def _make(ds=None, name: Optional[str] = None, **kwargs) -> Path: + if ds is None: + ds = make_topo_dataset(**kwargs) + if name is None: + counter[0] += 1 + name = f"topo_{counter[0]}.nc" + path = tmp_path / name + ds.to_netcdf(path) + return path + + return _make + + +@pytest.fixture +def met_file_factory(tmp_path): + """ + Return a callable that writes a met Dataset to a .nc file in tmp_path. + + Signature:: + + path = met_file_factory(**make_met_dataset_kwargs) + # or supply an already-built dataset: + path = met_file_factory(ds=my_ds) + """ + counter = [0] + + def _make(ds=None, name: Optional[str] = None, **kwargs) -> Path: + if ds is None: + from ._helpers import make_met_dataset as _make_met + ds = _make_met(**kwargs) + if name is None: + counter[0] += 1 + name = f"met_{counter[0]}.nc" + path = tmp_path / name + ds.to_netcdf(path) + return path + + return _make + + +@pytest.fixture +def nc4_topo_file_factory(tmp_path): + """ + Return a callable that writes a topo file via the netCDF4 library, + giving precise control over which fill-related attributes are present. + + This is required for fill value tests because xarray (mask_and_scale=True) + moves _FillValue to encoding on read, so the only reliable way to exercise + specific attribute combinations is to write at the netCDF4 level. + + Signature:: + + path = nc4_topo_file_factory( + fill_value=-9999.0, # set at createVariable time; None = omit + missing_value=-8888.0, # written as plain attr; None = omit + **other_kwargs, + ) + + Skips automatically if netCDF4 is not importable. + """ + if not _HAS_NC4: + pytest.skip("netCDF4 not available") + + counter = [0] + + def _make( + *, + fill_value: Optional[float] = None, + missing_value: Optional[float] = None, + lon_min: float = -10.0, + lon_max: float = 10.0, + lat_min: float = -10.0, + lat_max: float = 10.0, + nlat: int = 5, + nlon: int = 5, + lat_direction: str = "S_to_N", + var_name: str = "z", + lon_name: str = "lon", + lat_name: str = "lat", + units: str = "m", + has_fill_in_data: bool = False, + name: Optional[str] = None, + ) -> Path: + counter[0] += 1 + if name is None: + name = f"nc4topo_{counter[0]}.nc" + path = tmp_path / name + + lons = np.linspace(lon_min, lon_max, nlon) + lats = ( + np.linspace(lat_min, lat_max, nlat) + if lat_direction == "S_to_N" + else np.linspace(lat_max, lat_min, nlat) + ) + data = np.full((nlat, nlon), -100.0, dtype=np.float32) + if has_fill_in_data: + # Write the fill sentinel value so xarray masks it as NaN. + sentinel = fill_value if fill_value is not None else 9.969209968386869e+36 + data[0, 0] = sentinel + + with nc4.Dataset(str(path), "w") as ds: + ds.createDimension(lon_name, nlon) + ds.createDimension(lat_name, nlat) + + lon_var = ds.createVariable(lon_name, "f4", (lon_name,)) + lon_var[:] = lons + + lat_var = ds.createVariable(lat_name, "f4", (lat_name,)) + lat_var[:] = lats + + # _FillValue must be set at createVariable time in netCDF4 + create_kwargs: dict = {} + if fill_value is not None: + create_kwargs["fill_value"] = fill_value + + z_var = ds.createVariable( + var_name, "f4", (lat_name, lon_name), **create_kwargs + ) + z_var.units = units + if missing_value is not None: + z_var.missing_value = missing_value + z_var[:] = data + + return path + + return _make diff --git a/tests/netcdf/test_base_interrogator.py b/tests/netcdf/test_base_interrogator.py new file mode 100644 index 000000000..dcd62bcff --- /dev/null +++ b/tests/netcdf/test_base_interrogator.py @@ -0,0 +1,297 @@ +""" +Tests for NetCDFInterrogator base class. + +Covers: coordinate discovery, lon convention, lat order, dim order, +fill value resolution, crop bound validation, and laziness guarantee. +""" +import warnings + +import numpy as np +import pytest + +from clawpack.geoclaw.netcdf_utils import NetCDFInterrogator, FileMetadata + +from ._helpers import ( + make_topo_dataset, + COORD_VARIANTS, + TOPO_DIM_ORDER_VARIANTS, + FILL_VALUE_VARIANTS, +) + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + + +# ============================================================ +# Helpers +# ============================================================ + +def _open(path): + """Return an interrogator; caller is responsible for .close().""" + return NetCDFInterrogator(path) + + +# ============================================================ +# Lon convention detection +# ============================================================ + +@pytest.mark.parametrize("lon_min,lon_max,expected_convention", [ + (-10.0, 10.0, 180), # [-180, 180] convention + (170.0, 190.0, 360), # [0, 360] convention +]) +def test_lon_convention_detected(topo_file_factory, lon_min, lon_max, + expected_convention): + """Interrogator reports the correct longitude convention.""" + path = topo_file_factory(lon_min=lon_min, lon_max=lon_max) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lon_convention == expected_convention, ( + f"Expected lon_convention={expected_convention} for lon range " + f"[{lon_min}, {lon_max}], got {meta.lon_convention}" + ) + + +# ============================================================ +# Lat order detection +# ============================================================ + +@pytest.mark.parametrize("lat_direction,expected_order", [ + ("S_to_N", "S_to_N"), + ("N_to_S", "N_to_S"), +]) +def test_lat_order_detected(topo_file_factory, lat_direction, expected_order): + """Interrogator reports the correct latitude order.""" + path = topo_file_factory(lat_direction=lat_direction) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lat_order == expected_order, ( + f"Expected lat_order={expected_order!r} for lat_direction=" + f"{lat_direction!r}, got {meta.lat_order!r}" + ) + + +# ============================================================ +# Dim order detection +# ============================================================ + +@pytest.mark.parametrize("dim_kwargs", TOPO_DIM_ORDER_VARIANTS) +def test_dim_order_detected_topo(topo_file_factory, dim_kwargs): + """ + Interrogator reports canonical dim order using role names + ('lat', 'lon') regardless of how the variable is stored. + """ + path = topo_file_factory(**dim_kwargs) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + expected = list(dim_kwargs["dim_order"]) + assert meta.dim_order == expected, ( + f"Expected dim_order={expected}, got {meta.dim_order}" + ) + + +# ============================================================ +# Coordinate discovery via axis attribute +# ============================================================ + +def test_coord_discovery_via_axis_attr(topo_file_factory): + """ + Coordinate discovery succeeds when only the CF axis attribute is present + (no standard_name, non-standard variable name). + """ + path = topo_file_factory( + lon_name="x_coord", lat_name="y_coord", + lon_axis_attr=True, lat_axis_attr=True, + ) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lon_name == "x_coord" + assert meta.lat_name == "y_coord" + + +def test_coord_discovery_via_fallback_name(topo_file_factory): + """ + Coordinate discovery falls back to common names when no axis/standard_name + is present ('lon', 'lat' are in the fallback list). + """ + path = topo_file_factory(lon_name="lon", lat_name="lat") + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert meta.lon_name == "lon" + assert meta.lat_name == "lat" + + +def test_missing_lon_coord_raises(topo_file_factory): + """ + interrogate() raises ValueError when no longitude coordinate can be found. + """ + import xarray as xr + # Build a dataset whose coordinate names cannot be recognised + ds = make_topo_dataset(lon_name="xpos", lat_name="lat") + # Drop the axis/standard_name hints so it can't be found + ds["xpos"].attrs.clear() + path = topo_file_factory(ds=ds) + with NetCDFInterrogator(path) as intr: + with pytest.raises(ValueError, match="longitude"): + intr.interrogate("z") + + +# ============================================================ +# Fill value resolution +# ============================================================ + +@pytest.mark.parametrize("fill_cfg", FILL_VALUE_VARIANTS) +def test_fill_value_resolution(nc4_topo_file_factory, fill_cfg): + """ + _resolve_fill_value returns the correct value across all attribute + placement combinations. Conflicting _FillValue / missing_value must not + raise or warn from the interrogator (that is the normalizer's job). + + Note: xarray itself emits SerializationWarning when it opens a file that + has conflicting fill values; that warning is suppressed here because it + comes from xarray internals, not from any interrogator code. + """ + path = nc4_topo_file_factory( + fill_value=fill_cfg["fill_value"], + missing_value=fill_cfg["missing_value"], + ) + with warnings.catch_warnings(): + warnings.simplefilter("error") # any interrogator warning is a failure + # xarray emits SerializationWarning for conflicting fill values during + # open_dataset; suppress it so it does not mask real test failures. + warnings.filterwarnings("ignore", message=".*multiple fill values.*") + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + + expected = fill_cfg["expected"] + if expected is None: + assert meta.fill_value is None, ( + f"Expected fill_value=None, got {meta.fill_value}" + ) + else: + assert meta.fill_value == pytest.approx(expected), ( + f"Expected fill_value≈{expected}, got {meta.fill_value}" + ) + + +def test_conflicting_fill_values_no_interrogator_warning(nc4_topo_file_factory): + """ + Conflicting _FillValue and missing_value must not produce a warning from + NetCDFInterrogator. (Warnings are the CFNormalizer's responsibility.) + + xarray itself emits SerializationWarning when opening such a file; that + warning is excluded from the assertion because it originates in xarray + internals, not in any interrogator code. + """ + path = nc4_topo_file_factory(fill_value=-9999.0, missing_value=-8888.0) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + with NetCDFInterrogator(path) as intr: + intr.interrogate("z") + + # Filter out xarray's own SerializationWarning about multiple fill values. + interrogator_warnings = [ + w for w in caught + if "multiple fill values" not in str(w.message) + ] + assert not interrogator_warnings, ( + f"Interrogator emitted unexpected warning(s): " + f"{[str(w.message) for w in interrogator_warnings]}" + ) + + +# ============================================================ +# Crop bound validation +# ============================================================ + +def test_crop_bounds_inside_extent_passes(topo_file_factory): + """ + crop_bounds fully inside the file extent does not raise. + """ + path = topo_file_factory(lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + with NetCDFInterrogator(path, crop_bounds=(-5.0, 5.0, -5.0, 5.0)) as intr: + meta = intr.interrogate("z") + assert meta.crop_bounds == (-5.0, 5.0, -5.0, 5.0) + + +@pytest.mark.parametrize("crop_bounds,description", [ + ((-20.0, 5.0, -5.0, 5.0), "lon_min too small"), + ((-5.0, 20.0, -5.0, 5.0), "lon_max too large"), + ((-5.0, 5.0, -20.0, 5.0), "lat_min too small"), + ((-5.0, 5.0, -5.0, 20.0), "lat_max too large"), +]) +def test_crop_bounds_outside_extent_raises(topo_file_factory, crop_bounds, + description): + """ + crop_bounds that exceed the file spatial extent raise ValueError with a + message that identifies which bound is out of range. + """ + path = topo_file_factory(lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + with NetCDFInterrogator(path, crop_bounds=crop_bounds) as intr: + with pytest.raises(ValueError): + intr.interrogate("z") + + +# ============================================================ +# Laziness: no data arrays loaded during interrogation +# ============================================================ + +def test_variables_remain_lazy_after_interrogation(topo_file_factory): + """ + Opening the interrogator with chunks={} keeps variables as Dask arrays. + interrogate() must not trigger a full data load. + """ + pytest.importorskip("dask") + import dask.array as da + + path = topo_file_factory() + with NetCDFInterrogator(path) as intr: + intr.interrogate("z") + # The raw data backing the variable should still be a dask array + assert isinstance(intr.ds["z"].data, da.Array), ( + "Variable 'z' should remain a Dask array after interrogation " + "(no full .compute() should have been triggered)" + ) + + +# ============================================================ +# Return type +# ============================================================ + +def test_interrogate_returns_file_metadata(topo_file_factory): + """interrogate() returns a FileMetadata instance.""" + path = topo_file_factory() + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + assert isinstance(meta, FileMetadata) + + +def test_interrogate_missing_variable_raises(topo_file_factory): + """interrogate() raises KeyError when the requested variable is absent.""" + path = topo_file_factory() + with NetCDFInterrogator(path) as intr: + with pytest.raises(KeyError, match="not_a_var"): + intr.interrogate("not_a_var") + + +# ============================================================ +# All coordinate variant combinations (smoke tests) +# ============================================================ + +@pytest.mark.parametrize("coord_kwargs", COORD_VARIANTS) +def test_coord_variants_smoke(topo_file_factory, coord_kwargs): + """ + interrogate() completes without error for every coord variant and + reports the expected convention and order. + """ + path = topo_file_factory(**coord_kwargs) + with NetCDFInterrogator(path) as intr: + meta = intr.interrogate("z") + + lon_max = coord_kwargs["lon_max"] + lat_dir = coord_kwargs["lat_direction"] + expected_conv = 360 if lon_max > 180 else 180 + expected_order = lat_dir + + assert meta.lon_convention == expected_conv + assert meta.lat_order == expected_order diff --git a/tests/netcdf/test_cf_normalizer.py b/tests/netcdf/test_cf_normalizer.py new file mode 100644 index 000000000..e889d6220 --- /dev/null +++ b/tests/netcdf/test_cf_normalizer.py @@ -0,0 +1,324 @@ +""" +Tests for CFNormalizer. + +Covers: coordinate renaming, axis/standard_name/units attribute injection, +_FillValue / missing_value conflict resolution, unknown variable passthrough, +and idempotency. +""" +import warnings + +import numpy as np +import pytest +import xarray as xr + +from clawpack.geoclaw.netcdf_utils import CFNormalizer + +from ._helpers import make_topo_dataset + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + + +# ============================================================ +# Helpers +# ============================================================ + +def _simple_ds(lon_name="lon", lat_name="lat", time_name=None) -> xr.Dataset: + """ + Build a minimal Dataset whose coord names may be non-standard. + No axis / standard_name attributes. + + When time_name is given, the data variable includes time as a dimension so + that CFNormalizer.rename() (which renames dimensions too) does not raise a + CoordinateValidationError about dimensions not belonging to any variable. + """ + lons = np.linspace(-10.0, 10.0, 4) + lats = np.linspace(-10.0, 10.0, 4) + coords: dict = { + lon_name: xr.DataArray(lons, dims=[lon_name]), + lat_name: xr.DataArray(lats, dims=[lat_name]), + } + + if time_name is not None: + import pandas as pd + times = pd.date_range("2020-01-01", periods=3, freq="6h") + coords[time_name] = xr.DataArray(times, dims=[time_name]) + data = np.ones((3, 4, 4), dtype=np.float32) + da = xr.DataArray(data, dims=[time_name, lat_name, lon_name], + coords=coords, attrs={"units": "m"}) + else: + data = np.ones((4, 4), dtype=np.float32) + da = xr.DataArray(data, dims=[lat_name, lon_name], coords=coords, + attrs={"units": "m"}) + + return xr.Dataset({"z": da}) + + +def _normalize(ds: xr.Dataset) -> xr.Dataset: + return CFNormalizer(ds).normalize() + + +# ============================================================ +# Coordinate renaming +# ============================================================ + +@pytest.mark.parametrize("alias", ["lon", "x", "nav_lon", "LON", "Longitude"]) +def test_lon_alias_renamed_to_longitude(alias): + """ + Common longitude aliases are renamed to 'longitude'. + """ + ds = _simple_ds(lon_name=alias) + ds_norm = _normalize(ds) + assert "longitude" in ds_norm.coords, ( + f"Expected 'longitude' in coords after renaming '{alias}'" + ) + + +@pytest.mark.parametrize("alias", ["lat", "y", "nav_lat", "LAT", "Latitude"]) +def test_lat_alias_renamed_to_latitude(alias): + """ + Common latitude aliases are renamed to 'latitude'. + """ + ds = _simple_ds(lat_name=alias) + ds_norm = _normalize(ds) + assert "latitude" in ds_norm.coords, ( + f"Expected 'latitude' in coords after renaming '{alias}'" + ) + + +@pytest.mark.parametrize("alias", ["t", "TIME", "valid_time", "Time"]) +def test_time_alias_renamed_to_time(alias): + """ + Common time aliases are renamed to 'time'. + """ + ds = _simple_ds(time_name=alias) + ds_norm = _normalize(ds) + assert "time" in ds_norm.coords, ( + f"Expected 'time' in coords after renaming '{alias}'" + ) + + +def test_already_canonical_names_not_duplicated(): + """ + A dataset already using 'longitude', 'latitude', 'time' is left unchanged + and no duplicate coordinates are introduced. + """ + ds = _simple_ds(lon_name="longitude", lat_name="latitude", + time_name="time") + ds_norm = _normalize(ds) + assert "longitude" in ds_norm.coords + assert "latitude" in ds_norm.coords + assert len(ds_norm.coords) == len(ds.coords) + + +def test_unrecognised_coord_name_not_renamed(): + """ + A coordinate with a name that does not match any alias (e.g. 'x_pos') + is not renamed. + """ + ds = _simple_ds(lon_name="x_pos", lat_name="y_pos") + ds_norm = _normalize(ds) + assert "x_pos" in ds_norm.coords + assert "longitude" not in ds_norm.coords + + +# ============================================================ +# Attribute injection +# ============================================================ + +def test_standard_name_added_to_longitude(): + """standard_name='longitude' is added to a renamed longitude coord.""" + ds = _simple_ds(lon_name="lon") + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs.get("standard_name") == "longitude" + + +def test_standard_name_added_to_latitude(): + """standard_name='latitude' is added to a renamed latitude coord.""" + ds = _simple_ds(lat_name="lat") + ds_norm = _normalize(ds) + assert ds_norm["latitude"].attrs.get("standard_name") == "latitude" + + +def test_axis_x_added_to_longitude(): + """axis='X' is added to the longitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs.get("axis") == "X" + + +def test_axis_y_added_to_latitude(): + """axis='Y' is added to the latitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["latitude"].attrs.get("axis") == "Y" + + +def test_axis_t_added_to_time(): + """axis='T' is added to the time coordinate.""" + ds = _simple_ds(time_name="time") + ds_norm = _normalize(ds) + assert ds_norm["time"].attrs.get("axis") == "T" + + +def test_units_degrees_east_added_to_longitude(): + """units='degrees_east' is added to the longitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs.get("units") == "degrees_east" + + +def test_units_degrees_north_added_to_latitude(): + """units='degrees_north' is added to the latitude coordinate.""" + ds = _simple_ds() + ds_norm = _normalize(ds) + assert ds_norm["latitude"].attrs.get("units") == "degrees_north" + + +def test_existing_attrs_not_overwritten(): + """ + Attributes already present on a coordinate are not overwritten by the + normalizer (setdefault semantics). + """ + ds = _simple_ds(lon_name="lon") + ds["lon"].attrs["standard_name"] = "custom_longitude" + ds["lon"].attrs["axis"] = "custom_axis" + ds_norm = _normalize(ds) + assert ds_norm["longitude"].attrs["standard_name"] == "custom_longitude" + assert ds_norm["longitude"].attrs["axis"] == "custom_axis" + + +# ============================================================ +# Fill value conflict resolution +# ============================================================ + +def test_only_missing_value_promoted_to_fill_value(): + """ + When only missing_value is present it is promoted to _FillValue and + missing_value is removed. + """ + ds = _simple_ds() + ds["z"].attrs["missing_value"] = -8888.0 + + ds_norm = _normalize(ds) + + assert ds_norm["z"].attrs.get("_FillValue") == -8888.0, ( + "missing_value should have been promoted to _FillValue" + ) + assert "missing_value" not in ds_norm["z"].attrs, ( + "missing_value should have been removed after promotion" + ) + + +def test_conflicting_fill_values_warns_and_uses_fill_value(): + """ + Conflicting _FillValue and missing_value emit a UserWarning and resolve + to _FillValue (CF precedence); missing_value is removed. + """ + ds = _simple_ds() + ds["z"].attrs["_FillValue"] = -9999.0 + ds["z"].attrs["missing_value"] = -8888.0 + + with pytest.warns(UserWarning, match="_FillValue|missing_value|conflict"): + ds_norm = _normalize(ds) + + assert ds_norm["z"].attrs.get("_FillValue") == -9999.0 + assert "missing_value" not in ds_norm["z"].attrs + + +def test_agreeing_fill_values_no_warning(): + """ + When _FillValue and missing_value agree, no warning is emitted and the + value is kept. + """ + ds = _simple_ds() + ds["z"].attrs["_FillValue"] = -9999.0 + ds["z"].attrs["missing_value"] = -9999.0 + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + ds_norm = _normalize(ds) + + assert not caught, ( + f"Unexpected warnings when fill values agree: " + f"{[str(w.message) for w in caught]}" + ) + assert "missing_value" not in ds_norm["z"].attrs + + +def test_neither_fill_value_no_change(): + """When no fill-value attributes are present the variable is unchanged.""" + ds = _simple_ds() + assert "_FillValue" not in ds["z"].attrs + assert "missing_value" not in ds["z"].attrs + + ds_norm = _normalize(ds) + + assert "_FillValue" not in ds_norm["z"].attrs + assert "missing_value" not in ds_norm["z"].attrs + + +# ============================================================ +# Unknown variables left untouched +# ============================================================ + +def test_unknown_data_variable_attrs_unchanged(): + """ + Data variables with names that are not part of CF coordinate standards + are not modified (no silent attribute injection on arbitrary variables). + """ + ds = _simple_ds() + ds["z"].attrs["custom_attr"] = "do_not_touch" + original_attrs = dict(ds["z"].attrs) + + ds_norm = _normalize(ds) + + # custom_attr must still be present and unchanged + assert ds_norm["z"].attrs.get("custom_attr") == "do_not_touch" + + +# ============================================================ +# Original dataset not modified (copy semantics) +# ============================================================ + +def test_original_dataset_not_modified(): + """CFNormalizer operates on a copy; the input dataset is not modified.""" + ds = _simple_ds(lon_name="lon") + assert "longitude" not in ds.coords # confirm pre-condition + + _ = _normalize(ds) + + assert "longitude" not in ds.coords, ( + "CFNormalizer should not modify the original dataset" + ) + assert "lon" in ds.coords + + +# ============================================================ +# Idempotency +# ============================================================ + +def test_normalize_is_idempotent(): + """ + Calling CFNormalizer.normalize() twice produces the same result as once. + No attributes are doubled or changed on a second pass. + """ + ds = _simple_ds(lon_name="lon", lat_name="lat", time_name="time") + ds["z"].attrs["missing_value"] = -8888.0 # will be promoted on first pass + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + ds_once = _normalize(ds) + ds_twice = _normalize(ds_once) + + # Coordinate attributes should be the same + for coord in ("longitude", "latitude", "time"): + if coord in ds_once.coords: + assert ds_once[coord].attrs == ds_twice[coord].attrs, ( + f"Coord '{coord}' attrs differ between first and second normalize" + ) + + # Data variable attributes should be stable + for var in ds_once.data_vars: + assert ds_once[var].attrs == ds_twice[var].attrs, ( + f"Variable '{var}' attrs differ between first and second normalize" + ) diff --git a/tests/netcdf/test_descriptor_writer.py b/tests/netcdf/test_descriptor_writer.py new file mode 100644 index 000000000..fd908bdbf --- /dev/null +++ b/tests/netcdf/test_descriptor_writer.py @@ -0,0 +1,339 @@ +""" +Tests for DescriptorWriter. + +Covers: topo descriptor round-trip, met descriptor structure, optional +fields omitted when not specified, and crop_bounds presence/absence. +""" +import io +import re + +import pytest + +from clawpack.geoclaw.netcdf_utils import ( + DescriptorWriter, + TopoInterrogator, + MetInterrogator, + TopoMetadata, + MetMetadata, + MetVariableInfo, +) + +from ._helpers import make_topo_dataset, make_met_dataset + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + +_VAR_MAP = {"wind_u": "u10", "wind_v": "v10", "pressure": "msl"} + + +# ============================================================ +# Helpers +# ============================================================ + +def _parse_topo_descriptor(text: str) -> dict: + """ + Parse a topo descriptor block (key = value lines) into a dict. + Stops at the first blank line. + """ + result = {} + for line in text.splitlines(): + line = line.strip() + if not line: + break + if "=" in line: + key, _, value = line.partition("=") + result[key.strip()] = value.strip() + return result + + +def _parse_met_descriptor(text: str) -> dict: + """ + Parse a met descriptor written by write_met_descriptor into: + { + 'file_info': {key: value, ...}, + 'variable_info': [ {key: value, ...}, ... ] + } + """ + result: dict = {"file_info": {}, "variable_info": []} + in_file_info = False + in_var_info = False + current_var: dict = {} + + for line in text.splitlines(): + stripped = line.strip() + if stripped == "&file_info": + in_file_info = True + in_var_info = False + continue + if stripped.startswith("&variable_info"): + if in_var_info and current_var: + result["variable_info"].append(current_var) + in_file_info = False + in_var_info = True + current_var = {} + # variable_info content may be on the same line + rest = stripped[len("&variable_info"):].strip() + for kv in re.findall(r'(\w+)=(\S+)', rest): + current_var[kv[0]] = kv[1] + continue + if stripped == "/": + if in_file_info: + in_file_info = False + elif in_var_info: + if current_var: + result["variable_info"].append(current_var) + current_var = {} + in_var_info = False + continue + if in_file_info and "=" in stripped: + key, _, value = stripped.partition("=") + result["file_info"][key.strip()] = value.strip() + if in_var_info and "=" in stripped: + for kv in re.findall(r'(\w+)=(\S+)', stripped): + current_var[kv[0]] = kv[1] + + if in_var_info and current_var: + result["variable_info"].append(current_var) + + return result + + +def _get_topo_meta(topo_file_factory, **kwargs) -> TopoMetadata: + """Build a TopoMetadata via interrogation for use in writer tests.""" + import warnings + path = topo_file_factory(**kwargs) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with TopoInterrogator(path, var_name="z") as intr: + return intr.interrogate_topo() + + +def _get_met_meta(met_file_factory, **kwargs) -> MetMetadata: + """Build a MetMetadata via interrogation for use in writer tests.""" + import warnings + path = met_file_factory(**kwargs) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + return intr.interrogate_met() + + +# ============================================================ +# Topo descriptor +# ============================================================ + +def test_topo_descriptor_required_keys_present(topo_file_factory): + """ + write_topo_descriptor emits all mandatory keys: var_name, lon_name, + lat_name, lon_convention, lat_order, dim_order, fill_action. + """ + meta = _get_topo_meta(topo_file_factory) + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + for key in ("var_name", "lon_name", "lat_name", "lon_convention", + "lat_order", "dim_order", "fill_action"): + assert key in parsed, f"Missing required key '{key}' in topo descriptor" + + +def test_topo_descriptor_values_match_metadata(topo_file_factory): + """ + Values in the topo descriptor text match the TopoMetadata they came from. + """ + meta = _get_topo_meta(topo_file_factory, + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + lat_direction="N_to_S") + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert parsed["var_name"] == meta.var_name + assert parsed["lon_name"] == meta.lon_name + assert parsed["lat_name"] == meta.lat_name + assert int(parsed["lon_convention"]) == meta.lon_convention + assert parsed["lat_order"] == meta.lat_order + assert parsed["dim_order"] == ",".join(meta.dim_order) + assert parsed["fill_action"] == meta.fill_action + + +def test_topo_descriptor_fill_value_omitted_when_none(topo_file_factory): + """fill_value line is absent from descriptor when TopoMetadata.fill_value is None.""" + meta = _get_topo_meta(topo_file_factory) + # Force fill_value to None in the metadata + import dataclasses + meta = dataclasses.replace(meta, fill_value=None) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "fill_value" not in parsed + + +def test_topo_descriptor_fill_value_present_when_set(topo_file_factory): + """fill_value line IS written when TopoMetadata.fill_value is not None.""" + import dataclasses + meta = _get_topo_meta(topo_file_factory) + meta = dataclasses.replace(meta, fill_value=-9999.0) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "fill_value" in parsed + assert float(parsed["fill_value"]) == pytest.approx(-9999.0) + + +def test_topo_descriptor_crop_bounds_omitted_when_none(topo_file_factory): + """crop_bounds line is absent when no crop_bounds were specified.""" + meta = _get_topo_meta(topo_file_factory) + assert meta.crop_bounds is None # confirm fixture has no crop + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "crop_bounds" not in parsed + + +def test_topo_descriptor_crop_bounds_present_when_set(topo_file_factory): + """crop_bounds line is written with all four values when specified.""" + import dataclasses + meta = _get_topo_meta(topo_file_factory, + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + meta = dataclasses.replace(meta, crop_bounds=(-5.0, 5.0, -5.0, 5.0)) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + parsed = _parse_topo_descriptor(buf.getvalue()) + + assert "crop_bounds" in parsed + parts = parsed["crop_bounds"].split() + assert len(parts) == 4 + lon0, lon1, lat0, lat1 = map(float, parts) + assert lon0 == pytest.approx(-5.0) + assert lon1 == pytest.approx(5.0) + assert lat0 == pytest.approx(-5.0) + assert lat1 == pytest.approx(5.0) + + +def test_topo_descriptor_ends_with_blank_line(topo_file_factory): + """ + Descriptor block ends with a blank line so the Fortran parser can detect + the end of the block. + """ + meta = _get_topo_meta(topo_file_factory) + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + text = buf.getvalue() + assert text.endswith("\n\n"), ( + "Topo descriptor must end with a blank line (two consecutive newlines) " + "for the Fortran read loop to terminate correctly" + ) + + +# ============================================================ +# Met descriptor +# ============================================================ + +def test_met_descriptor_file_info_block_present(met_file_factory): + """write_met_descriptor writes a &file_info block.""" + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert parsed["file_info"], "file_info block is missing or empty" + + +def test_met_descriptor_file_info_required_keys(met_file_factory): + """ + &file_info block contains all mandatory keys: source_file, lon_name, + lat_name, time_name, dim_order, lon_convention, lat_order, + fill_action, time_offset. + """ + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + fi = parsed["file_info"] + + for key in ("source_file", "lon_name", "lat_name", "time_name", + "dim_order", "lon_convention", "lat_order", + "fill_action", "time_offset"): + assert key in fi, f"Missing key '{key}' in &file_info block" + + +def test_met_descriptor_variable_info_blocks_for_all_roles(met_file_factory): + """ + One &variable_info block is written for each entry in variable_map. + """ + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert len(parsed["variable_info"]) == len(_VAR_MAP), ( + f"Expected {len(_VAR_MAP)} variable_info blocks, " + f"got {len(parsed['variable_info'])}" + ) + + +def test_met_descriptor_variable_info_roles_present(met_file_factory): + """ + Each &variable_info block contains geoclaw_role and var_name. + """ + meta = _get_met_meta(met_file_factory) + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + for block in parsed["variable_info"]: + assert "var_name" in block, ( + f"variable_info block missing 'var_name': {block}" + ) + assert "geoclaw_role" in block, ( + f"variable_info block missing 'geoclaw_role': {block}" + ) + + +def test_met_descriptor_crop_bounds_omitted_when_none(met_file_factory): + """crop_bounds is absent from &file_info when not specified.""" + meta = _get_met_meta(met_file_factory) + assert meta.crop_bounds is None + + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert "crop_bounds" not in parsed["file_info"] + + +def test_met_descriptor_crop_bounds_present_when_set(met_file_factory): + """crop_bounds appears in &file_info when crop_bounds are specified.""" + import dataclasses + meta = _get_met_meta(met_file_factory, + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0) + meta = dataclasses.replace(meta, crop_bounds=(-5.0, 5.0, -5.0, 5.0)) + + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert "crop_bounds" in parsed["file_info"] + + +def test_met_descriptor_fill_value_omitted_when_none(met_file_factory): + """fill_value line is absent from &file_info when MetMetadata.fill_value is None.""" + import dataclasses + meta = _get_met_meta(met_file_factory) + meta = dataclasses.replace(meta, fill_value=None) + + buf = io.StringIO() + DescriptorWriter.write_met_descriptor(buf, meta) + parsed = _parse_met_descriptor(buf.getvalue()) + + assert "fill_value" not in parsed["file_info"] diff --git a/tests/netcdf/test_met_interrogator.py b/tests/netcdf/test_met_interrogator.py new file mode 100644 index 000000000..ef2a75411 --- /dev/null +++ b/tests/netcdf/test_met_interrogator.py @@ -0,0 +1,296 @@ +""" +Tests for MetInterrogator. + +Covers: variable presence/absence, grid consistency, unit passthrough and +conversion, time offset calculation, ensemble dimension handling, and +descriptor output structure. +""" +import warnings + +import numpy as np +import pandas as pd +import pytest + +from clawpack.geoclaw.netcdf_utils import MetInterrogator, MetMetadata + +from ._helpers import ( + make_met_dataset, + COORD_VARIANTS, + MET_DIM_ORDER_VARIANTS, + PRESSURE_UNIT_VARIANTS, + WIND_UNIT_VARIANTS, +) + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + +# Default variable map used throughout +_VAR_MAP = {"wind_u": "u10", "wind_v": "v10", "pressure": "msl"} + + +# ============================================================ +# Variable presence / absence +# ============================================================ + +def test_all_variables_present_passes(met_file_factory): + """All required variables present: interrogate_met() returns MetMetadata.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + assert isinstance(meta, MetMetadata) + + +@pytest.mark.parametrize("missing_role", ["wind_u", "wind_v", "pressure"]) +def test_missing_variable_raises(met_file_factory, missing_role): + """ + interrogate_met() raises KeyError with a message identifying the missing + variable when one required variable is absent from the file. + """ + path = met_file_factory(omit_roles=[missing_role]) + expected_var = _VAR_MAP[missing_role] + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(KeyError, match=expected_var): + intr.interrogate_met() + + +def test_empty_variable_map_raises(met_file_factory): + """An empty variable_map raises ValueError before any file inspection.""" + path = met_file_factory() + with MetInterrogator(path, variable_map={}) as intr: + with pytest.raises(ValueError, match="variable_map"): + intr.interrogate_met() + + +# ============================================================ +# Grid / dimension consistency +# ============================================================ + +def test_mismatched_variable_dims_raises(met_file_factory): + """ + interrogate_met() raises ValueError when variables do not share + the same set of dimensions. + """ + path = met_file_factory(mismatch_dims=True) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Gg]rid|[Dd]im"): + intr.interrogate_met() + + +def test_missing_time_coord_raises(met_file_factory): + """ + A file with no time coordinate raises ValueError (met forcing requires + a time axis). + """ + import xarray as xr + ds = make_met_dataset() + # Remove the time dimension by selecting a single time step + ds_no_time = ds.isel(time=0).drop_vars("time") + path = met_file_factory(ds=ds_no_time) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Tt]ime"): + intr.interrogate_met() + + +# ============================================================ +# Ensemble dimension handling +# ============================================================ + +def test_singleton_ensemble_dim_passes(met_file_factory): + """ + A singleton extra dimension (e.g. a single ensemble member) does not + cause interrogate_met() to raise. + """ + path = met_file_factory(extra_dim=("member", 1)) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + assert isinstance(meta, MetMetadata) + + +def test_non_singleton_ensemble_dim_raises(met_file_factory): + """ + A non-singleton extra dimension raises ValueError — GeoClaw does not + support ensemble/member dimensions. + """ + path = met_file_factory(extra_dim=("member", 3)) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Ee]nsemble|[Mm]ember|singleton"): + intr.interrogate_met() + + +# ============================================================ +# Unit verification and passthrough +# ============================================================ + +@pytest.mark.parametrize("unit_cfg", PRESSURE_UNIT_VARIANTS) +def test_pressure_unit_variants(met_file_factory, unit_cfg): + """ + Pressure unit recorded in MetVariableInfo.source_units matches the unit + string in the file. Conversion-needed cases emit a warning; Pa passes + silently. + """ + pressure_units = unit_cfg["pressure_units"] + expected_source = unit_cfg["expected_source"] + + path = met_file_factory(pressure_units=pressure_units) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + pressure_info = next(v for v in meta.variables if v.geoclaw_role == "pressure") + assert pressure_info.source_units == expected_source, ( + f"For pressure_units={pressure_units!r}: expected source_units=" + f"{expected_source!r}, got {pressure_info.source_units!r}" + ) + + +@pytest.mark.parametrize("unit_cfg", WIND_UNIT_VARIANTS) +def test_wind_unit_variants(met_file_factory, unit_cfg): + """ + Wind unit recorded in MetVariableInfo.source_units matches the unit + string in the file. knots triggers a warning; m/s passes silently. + """ + wind_units = unit_cfg["wind_units"] + expected_source = unit_cfg["expected_source"] + + path = met_file_factory(wind_units=wind_units) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + wind_u_info = next(v for v in meta.variables if v.geoclaw_role == "wind_u") + assert wind_u_info.source_units == expected_source + + +def test_contract_units_no_warning(met_file_factory): + """ + Files already in contract units (Pa, m/s) do not emit any unit warnings. + """ + path = met_file_factory(pressure_units="Pa", wind_units="m/s") + with warnings.catch_warnings(): + warnings.simplefilter("error") + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + intr.interrogate_met() + + +def test_unrecognised_pressure_unit_raises(met_file_factory): + """An unrecognised pressure unit string raises ValueError.""" + path = met_file_factory(pressure_units="atm_weird_unknown") + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + with pytest.raises(ValueError, match="[Uu]nrecogni"): + intr.interrogate_met() + + +# ============================================================ +# Time offset calculation +# ============================================================ + +def test_time_offset_unix_epoch_default(met_file_factory): + """ + With no time_reference, time_offset is seconds between the Unix epoch + (1970-01-01) and the first time value in the file. + """ + time_start = "2020-01-01" + path = met_file_factory(time_start=time_start) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + expected_offset = pd.Timestamp(time_start).timestamp() + assert meta.time_offset == pytest.approx(expected_offset, rel=1e-6), ( + f"Expected time_offset≈{expected_offset}, got {meta.time_offset}" + ) + + +def test_time_offset_custom_reference(met_file_factory): + """ + time_offset is zero when time_reference matches the first time in the file. + """ + time_start = "2020-06-15T12:00:00" + path = met_file_factory(time_start=time_start) + with MetInterrogator(path, variable_map=_VAR_MAP, + time_reference=time_start) as intr: + meta = intr.interrogate_met() + assert meta.time_offset == pytest.approx(0.0, abs=1.0), ( + f"Expected time_offset≈0, got {meta.time_offset}" + ) + + +# ============================================================ +# MetMetadata structure +# ============================================================ + +def test_metadata_variables_all_roles_present(met_file_factory): + """ + MetMetadata.variables contains one entry per variable_map role. + """ + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + roles = {v.geoclaw_role for v in meta.variables} + assert roles == set(_VAR_MAP.keys()), ( + f"Expected roles {set(_VAR_MAP.keys())}, got {roles}" + ) + + +def test_metadata_variable_names_match_map(met_file_factory): + """MetVariableInfo.var_name matches the value in variable_map.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + + for v in meta.variables: + assert v.var_name == _VAR_MAP[v.geoclaw_role], ( + f"Role {v.geoclaw_role!r}: expected var_name=" + f"{_VAR_MAP[v.geoclaw_role]!r}, got {v.var_name!r}" + ) + + +def test_fill_action_default_is_warn(met_file_factory): + """Default fill_action for MetInterrogator is 'warn'.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + assert meta.fill_action == "warn" + + +def test_fill_action_abort_propagated(met_file_factory): + """fill_action='abort' is forwarded into MetMetadata.""" + path = met_file_factory() + with MetInterrogator(path, variable_map=_VAR_MAP, + fill_action="abort") as intr: + meta = intr.interrogate_met() + assert meta.fill_action == "abort" + + +def test_invalid_fill_action_raises(met_file_factory): + """ + Passing an invalid fill_action raises ValueError at construction. + + A real file path is required because MetInterrogator opens the file in + super().__init__() before the fill_action validation check fires. + """ + path = met_file_factory() + with pytest.raises(ValueError, match="fill_action"): + MetInterrogator(path, variable_map=_VAR_MAP, fill_action="ignore") + + +# ============================================================ +# All coordinate and dim-order variants +# ============================================================ + +@pytest.mark.parametrize("coord_kwargs", COORD_VARIANTS) +def test_coord_variants(met_file_factory, coord_kwargs): + """interrogate_met() completes for every coordinate variant.""" + path = met_file_factory(**coord_kwargs) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + expected_conv = 360 if coord_kwargs["lon_max"] > 180 else 180 + assert meta.lon_convention == expected_conv + + +@pytest.mark.parametrize("dim_kwargs", MET_DIM_ORDER_VARIANTS) +def test_dim_order_variants(met_file_factory, dim_kwargs): + """interrogate_met() reports correct dim_order for all axis arrangements.""" + path = met_file_factory(**dim_kwargs) + with MetInterrogator(path, variable_map=_VAR_MAP) as intr: + meta = intr.interrogate_met() + expected = [{"lat": "lat", "lon": "lon", "time": "time"}.get(d, d) + for d in dim_kwargs["dim_order"]] + assert meta.dim_order == expected diff --git a/tests/netcdf/test_topo_interrogator.py b/tests/netcdf/test_topo_interrogator.py new file mode 100644 index 000000000..8dc01a007 --- /dev/null +++ b/tests/netcdf/test_topo_interrogator.py @@ -0,0 +1,205 @@ +""" +Tests for TopoInterrogator. + +Covers: fill-in-crop detection, fill-outside-crop pass, unit verification, +unit-conversion warn path, missing units warn path, descriptor key presence, +and all coordinate variant combinations. +""" +import warnings + +import pytest + +from clawpack.geoclaw.netcdf_utils import TopoInterrogator, TopoMetadata + +from ._helpers import ( + make_topo_dataset, + COORD_VARIANTS, + TOPO_DIM_ORDER_VARIANTS, +) + +pytestmark = [pytest.mark.python, pytest.mark.netcdf] + + +# ============================================================ +# Fill value in / outside crop region +# ============================================================ + +def test_fill_in_crop_region_raises(topo_file_factory): + """ + TopoInterrogator raises ValueError when NaN is present within the + crop region. Silent NaN in bathymetry would corrupt simulations. + """ + path = topo_file_factory( + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + has_fill_in_data=True, # NaN placed at data.flat[0] + ) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-10.0, 10.0, -10.0, 10.0)) + with pytest.raises(ValueError, match="[Ff]ill"): + intr.interrogate_topo() + intr.close() + + +def test_fill_outside_crop_region_passes(topo_file_factory): + """ + TopoInterrogator passes when NaN exists only outside the crop region. + """ + # The dataset has NaN at data.flat[0] which corresponds to the corner + # of the full grid. We crop to the interior so the NaN is outside. + ds = make_topo_dataset( + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + nlat=10, nlon=10, + has_fill_in_data=True, # NaN is at lat[-10], lon[-10] (S-W corner) + lat_direction="S_to_N", + ) + # The NaN is at the south-west corner; crop to the north-east interior. + path = topo_file_factory(ds=ds) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(5.0, 10.0, 5.0, 10.0)) + meta = intr.interrogate_topo() # must not raise + intr.close() + assert isinstance(meta, TopoMetadata) + + +def test_no_fill_no_crop_passes(topo_file_factory): + """TopoInterrogator passes when there are no NaN values at all.""" + path = topo_file_factory(has_fill_in_data=False) + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + + +# ============================================================ +# Unit verification +# ============================================================ + +def test_units_meters_passes_without_warning(topo_file_factory): + """ + A variable whose units attribute is 'm' (or any meters alias) passes + unit verification without emitting a warning. + """ + path = topo_file_factory(units="m") + with warnings.catch_warnings(): + warnings.simplefilter("error") # any warning is a test failure + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.source_units in {"m", "meter", "meters", "metre", "metres"} + + +def test_units_convertible_warns_and_records_source(topo_file_factory): + """ + A variable with units convertible to meters (e.g. 'cm') triggers a + UserWarning and records the original unit string in source_units so + that a downstream conversion factor can be applied. + """ + path = topo_file_factory(units="cm") + with pytest.warns(UserWarning, match="cm"): + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.source_units == "cm", ( + f"Expected source_units='cm', got {meta.source_units!r}" + ) + + +def test_units_unrecognised_raises(topo_file_factory): + """ + A variable with units not in the known conversion table raises ValueError. + """ + path = topo_file_factory(units="furlongs") + with TopoInterrogator(path, var_name="z") as intr: + with pytest.raises(ValueError, match="[Uu]nrecogni"): + intr.interrogate_topo() + + +def test_units_missing_warns_and_assumes_meters(topo_file_factory): + """ + A variable with no units attribute emits a UserWarning and assumes 'm'. + The interrogation itself must not raise. + """ + path = topo_file_factory(units="") # empty string → no 'units' attr written + with pytest.warns(UserWarning): + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.source_units == "m", ( + f"Expected assumed source_units='m', got {meta.source_units!r}" + ) + + +# ============================================================ +# Descriptor output keys +# ============================================================ + +def test_interrogate_topo_returns_topo_metadata(topo_file_factory): + """interrogate_topo() returns a TopoMetadata instance.""" + path = topo_file_factory() + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + + +def test_topo_metadata_required_fields_present(topo_file_factory): + """ + TopoMetadata contains all required fields with valid values after + interrogation of a well-formed file. + """ + path = topo_file_factory( + lon_min=-10.0, lon_max=10.0, + lat_min=-10.0, lat_max=10.0, + units="m", + ) + # crop_bounds is passed to TopoInterrogator, not to the dataset factory. + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-5.0, 5.0, -5.0, 5.0)) + meta = intr.interrogate_topo() + intr.close() + + assert meta.var_name == "z" + assert meta.lon_name is not None and len(meta.lon_name) > 0 + assert meta.lat_name is not None and len(meta.lat_name) > 0 + assert meta.lon_convention in (180, 360) + assert meta.lat_order in ("N_to_S", "S_to_N") + assert len(meta.dim_order) == 2 + assert meta.fill_action == "abort" + assert meta.source_units == "m" + assert meta.crop_bounds == (-5.0, 5.0, -5.0, 5.0) + + +def test_topo_metadata_crop_bounds_none_when_not_specified(topo_file_factory): + """When no crop_bounds are given, TopoMetadata.crop_bounds is None.""" + path = topo_file_factory() + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.crop_bounds is None + + +# ============================================================ +# All coordinate variant combinations +# ============================================================ + +@pytest.mark.parametrize("coord_kwargs", COORD_VARIANTS) +def test_coord_variants(topo_file_factory, coord_kwargs): + """ + interrogate_topo() completes without error for every coordinate variant + and reports consistent lon_convention / lat_order values. + """ + path = topo_file_factory(**coord_kwargs) + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + + expected_conv = 360 if coord_kwargs["lon_max"] > 180 else 180 + assert meta.lon_convention == expected_conv + assert meta.lat_order == coord_kwargs["lat_direction"] + + +@pytest.mark.parametrize("dim_kwargs", TOPO_DIM_ORDER_VARIANTS) +def test_dim_order_variants(topo_file_factory, dim_kwargs): + """ + interrogate_topo() reports the correct dim_order regardless of how + the variable axes are arranged in the file. + """ + path = topo_file_factory(**dim_kwargs) + with TopoInterrogator(path, var_name="z") as intr: + meta = intr.interrogate_topo() + assert meta.dim_order == list(dim_kwargs["dim_order"]) From 67c4eec5b2711aa22a55c384bc05a0ac2fc73f8e Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Mon, 13 Apr 2026 10:51:14 -0400 Subject: [PATCH 03/24] Add Fortran regression test for basic NetCDF topography Adds 3 tests to bowl-slosh-netcdf to test dimension ordering, CF compliance, and cropping. Lat-long coordinate utilities will be added to Chile 2010. Also includes another bug fix for mixing up dimension and variable meta-data inquiries in topo_module.f90. --- .../test_bowl_slosh_netcdf.py | 130 ++++++++++++++---- src/2d/shallow/topo_module.f90 | 37 ++++- 2 files changed, 136 insertions(+), 31 deletions(-) diff --git a/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py b/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py index a6a2036f1..662d210fb 100644 --- a/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py +++ b/examples/tsunami/bowl-slosh-netcdf/test_bowl_slosh_netcdf.py @@ -1,9 +1,6 @@ """Pytest regression test for the GeoClaw bowl-slosh NetCDF example.""" from pathlib import Path -import os -import shutil -import subprocess import numpy as np import pytest @@ -11,14 +8,22 @@ import clawpack.geoclaw.test as test import clawpack.geoclaw.topotools as topotools - -def _make_bowl_netcdf_topography(output_dir: Path) -> None: +def _make_bowl_netcdf_topography(output_dir: Path, variant: str = "standard") -> None: """ Create the NetCDF topography input used by the example. - + We are directly creating the NetCDF file here rather than relying on the example's maketopo.py to test out a slightly generic NetCDF writing approach that doesn't rely on the topotools write method.™ + + Parameters + ---------- + output_dir : Path + Directory where the NetCDF file is written. + variant : str + Coordinate layout to produce. ``"standard"`` (default) preserves the + original behavior and writes ``bowl.nc``. All other variants write a + file named after the variant (see ``_VARIANT_FILENAMES``). """ netCDF4 = pytest.importorskip("netCDF4") @@ -30,32 +35,107 @@ def _make_bowl_netcdf_topography(output_dir: Path) -> None: topo.x = np.linspace(-3.1, 3.1, 310) topo.y = np.linspace(-3.5, 2.5, 300) - # Intentionally create latitude first to exercise dimension discovery. - with netCDF4.Dataset(output_dir / "bowl.nc", "w") as out: - out.createDimension("lat", len(topo.y)) - out.createDimension("lon", len(topo.x)) - - latitudes = out.createVariable("lat", "f8", ("lat",)) - longitudes = out.createVariable("lon", "f8", ("lon",)) - elevations = out.createVariable("elevation", "f8", ("lat", "lon")) - - latitudes[:] = topo.y - longitudes[:] = topo.x - elevations[:] = topo.Z + filename = "bowl.nc" + + if variant == "standard": + # Intentionally create latitude first to exercise dimension discovery. + with netCDF4.Dataset(output_dir / filename, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo.y + longitudes[:] = topo.x + elevations[:] = topo.Z + + elif variant == "dim_order": + # Data variable stored as (lon, lat) instead of (lat, lon); exercises + # dimension-order discovery. + with netCDF4.Dataset(output_dir / filename, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lon", "lat")) + + latitudes[:] = topo.y + longitudes[:] = topo.x + elevations[:] = topo.Z.T # transpose to match (lon, lat) + + elif variant == "cf_compliant": + # Standard (lat, lon) layout with full CF attributes added via + # CFNormalizer; exercises CF-attribute-based coordinate discovery. + from clawpack.geoclaw.netcdf_utils import CFNormalizer + import xarray as xr + + ds = xr.Dataset( + {"elevation": (["lat", "lon"], topo.Z)}, + coords={"lat": topo.y, "lon": topo.x}, + ) + ds_norm = CFNormalizer(ds).normalize() + ds_norm.to_netcdf( + output_dir / filename, + encoding={"elevation": {"_FillValue": 9.96921e+36}}, + ) + + elif variant == "cropped": + # Topography domain is exactly 0.5 degrees larger than the simulation + # extent on each side: x in [-2.5, 2.5], y in [-3.5, 2.5]. + # (Simulation domain after test setup: x in [-2, 2], y in [-3, 2].) + topo_crop = topotools.Topography(topo_func=topo_func) + topo_crop.x = np.linspace(-2.5, 2.5, 250) + topo_crop.y = np.linspace(-3.5, 2.5, 300) + + with netCDF4.Dataset(output_dir / filename, "w") as out: + out.createDimension("lat", len(topo_crop.y)) + out.createDimension("lon", len(topo_crop.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo_crop.y + longitudes[:] = topo_crop.x + elevations[:] = topo_crop.Z + + else: + raise ValueError(f"Unknown variant '{variant}'.") @pytest.mark.regression @pytest.mark.netcdf -def test_bowl_slosh_netcdf(tmp_path: Path, save: bool) -> None: - """Regression test for bowl-slosh using NetCDF topography input.""" +@pytest.mark.parametrize("variant", ["standard", "dim_order", "cf_compliant", "cropped"]) +def test_bowl_slosh_netcdf(tmp_path: Path, save: bool, variant: str) -> None: + """ + Parametrized regression test exercising non-standard NetCDF coordinate layouts. + + Each variant writes a topography file with a different coordinate convention + while encoding the same parabolic-bowl bathymetry as test_bowl_slosh_netcdf. + Because the physical bathymetry is identical, all variants must produce gauge + output that matches the existing reference data. + + Variants + -------- + standard Intentionally create latitude first to exercise dimension + discovery; otherwise, a pretty standard layout. + dim_order Data variable stored as (lon, lat) instead of (lat, lon); + exercises dimension-order discovery. + cf_compliant Standard (lat, lon) layout with full CF attributes added via + CFNormalizer (standard_name, axis, units, _FillValue); exercises + CF-attribute-based coordinate discovery. + cropped Topography domain is exactly 0.5 degrees larger than the + simulation extent on each side (x in [-2.5, 2.5], + y in [-3.5, 2.5]); exercises minimal-margin domain coverage. + """ pytest.importorskip("netCDF4") - example_dir = Path(__file__).parent - runner = test.GeoClawTestRunner(tmp_path, test_path=example_dir) - - # Generate topography - _make_bowl_netcdf_topography(tmp_path) + _make_bowl_netcdf_topography(tmp_path, variant=variant) + runner = test.GeoClawTestRunner(tmp_path, test_path=Path(__file__).parent) runner.set_data() runner.rundata.clawdata.lower[1] = -3.0 runner.rundata.clawdata.num_output_times = 1 diff --git a/src/2d/shallow/topo_module.f90 b/src/2d/shallow/topo_module.f90 index 315fb5f8c..567bf2489 100644 --- a/src/2d/shallow/topo_module.f90 +++ b/src/2d/shallow/topo_module.f90 @@ -689,10 +689,16 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) x_dim_name, mx_tot, y_dim_id, y_dim_name, my_tot) end if + ! Dimension IDs and variable IDs are separate ID spaces in NetCDF. + ! Look up the coordinate variable IDs by name so nf90_get_var + ! receives the correct IDs regardless of creation order in the file. + call check_netcdf_error(nf90_inq_varid(nc_file, trim(x_dim_name), x_var_id)) + call check_netcdf_error(nf90_inq_varid(nc_file, trim(y_dim_name), y_var_id)) + allocate(xlocs(mx_tot), ylocs(my_tot)) - call check_netcdf_error(nf90_get_var(nc_file, x_dim_id, xlocs, & + call check_netcdf_error(nf90_get_var(nc_file, x_var_id, xlocs, & start=(/ 1 /), count=(/ mx_tot /))) - call check_netcdf_error(nf90_get_var(nc_file, y_dim_id, ylocs, & + call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, & start=(/ 1 /), count=(/ my_tot /))) ! Apply lon convention shift so xstart matches the shifted xll @@ -704,7 +710,17 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) end if xstart = minloc(xlocs, mask=(xlocs == xll)) - ystart = minloc(ylocs, mask=(ylocs == yll)) + if (ylocs(1) < ylocs(my_tot)) then + ! S-to-N: southern boundary (yll) is at a low index. + ystart = minloc(ylocs, mask=(ylocs == yll)) + else + ! N-to-S: the read must start at the northern boundary (yhi), + ! which sits at a low index in the descending array. + ! Reconstruct yhi from yll, my, and the absolute grid spacing. + y = yll + (my - 1) * abs(ylocs(2) - ylocs(1)) + ystart = minloc(ylocs, & + mask=(abs(ylocs - y) < 0.5d0 * abs(ylocs(2) - ylocs(1)))) + end if deallocate(xlocs, ylocs) ! ---------------------------------------------------------------- @@ -747,9 +763,18 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) stop end if - call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & - nc_buffer, start=(/ xstart(1), ystart(1) /), & - count=(/ mx, my /))) + if (z_dim_ids(1) == x_dim_id) then + ! Variable stored (lon, lat): start/count already lon-first. + call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & + nc_buffer, start=(/ xstart(1), ystart(1) /), & + count=(/ mx, my /))) + else + ! Variable stored (lat, lon): swap start and count to match + ! the actual dimension order of the variable. + call check_netcdf_error(nf90_get_var(nc_file, z_var_id, & + nc_buffer, start=(/ ystart(1), xstart(1) /), & + count=(/ my, mx /))) + end if ! Transpose into GeoClaw topo array (N-to-S row order). if (z_dim_ids(1) == x_dim_id) then From 39eb56a09feae4fe11b6e5aae9ed2560d0ddb810 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Mon, 13 Apr 2026 20:39:49 -0400 Subject: [PATCH 04/24] Add Chile 2010 test Includes a basic test and more NetCDF tests for writing NetCDF files from topotools, flipping latitude writing, and wrapping longitude. --- examples/tsunami/chile2010/maketopo.py | 6 +- .../chile2010/regression_data/gauge32412.txt | 101 ++++++++ examples/tsunami/chile2010/test_chile2010.py | 221 ++++++++++++++++++ src/2d/shallow/topo_module.f90 | 37 ++- 4 files changed, 354 insertions(+), 11 deletions(-) create mode 100644 examples/tsunami/chile2010/regression_data/gauge32412.txt create mode 100644 examples/tsunami/chile2010/test_chile2010.py diff --git a/examples/tsunami/chile2010/maketopo.py b/examples/tsunami/chile2010/maketopo.py index c77ec4dfa..9267457c0 100644 --- a/examples/tsunami/chile2010/maketopo.py +++ b/examples/tsunami/chile2010/maketopo.py @@ -22,14 +22,14 @@ # Scratch directory for storing topo and dtopo files: scratch_dir = os.path.join(CLAW, 'geoclaw', 'scratch') -def get_topo(makeplots=False): +def get_topo(path=scratch_dir, makeplots=False): """ Retrieve the topo file from the GeoClaw repository. """ from clawpack.geoclaw import topotools topo_fname = 'etopo10min120W60W60S0S.asc' url = 'http://depts.washington.edu/clawpack/geoclaw/topo/etopo/' + topo_fname - clawpack.clawutil.data.get_remote_file(url, output_dir=scratch_dir, + clawpack.clawutil.data.get_remote_file(url, output_dir=path, file_name=topo_fname, verbose=True) if makeplots: @@ -40,6 +40,8 @@ def get_topo(makeplots=False): plt.savefig(fname) print("Created ",fname) + return os.path.join(scratch_dir, topo_fname) + def make_dtopo(makeplots=False): diff --git a/examples/tsunami/chile2010/regression_data/gauge32412.txt b/examples/tsunami/chile2010/regression_data/gauge32412.txt new file mode 100644 index 000000000..2381dcaec --- /dev/null +++ b/examples/tsunami/chile2010/regression_data/gauge32412.txt @@ -0,0 +1,101 @@ +# gauge_id= 32412 location=( -0.8639200000E+02 -0.1797500000E+02 ) num_var= 4 +# Stationary gauge +# level, time, q[ 1 2 3], eta, aux[] +# file format ascii, time series follow in this file + 01 0.0000000E+00 0.4427352E+04 0.0000000E+00 0.0000000E+00 0.0000000E+00 + 01 0.2000000E+00 0.4427352E+04 -0.2390115E-14 0.2735758E-14 0.0000000E+00 + 01 0.4000000E+00 0.4427352E+04 -0.4778627E-14 0.5471769E-14 0.0000000E+00 + 01 0.6000000E+00 0.4427352E+04 -0.7165536E-14 0.8208034E-14 0.0000000E+00 + 01 0.8000000E+00 0.4427352E+04 -0.9550844E-14 0.1094455E-13 0.0000000E+00 + 01 0.1000000E+01 0.4427352E+04 -0.1193455E-13 0.1368133E-13 0.0000000E+00 + 01 0.1200000E+01 0.4427352E+04 -0.1431666E-13 0.1641835E-13 0.0000000E+00 + 01 0.1400000E+01 0.4427352E+04 -0.1669716E-13 0.1915563E-13 0.0000000E+00 + 01 0.3627168E+03 0.4427352E+04 -0.4339780E-11 0.5001933E-11 0.0000000E+00 + 01 0.7240336E+03 0.4427352E+04 -0.6298506E-11 0.1047074E-10 0.0000000E+00 + 01 0.1107879E+04 0.4427352E+04 -0.7260152E-11 0.1603970E-10 0.0000000E+00 + 01 0.1491724E+04 0.4427352E+04 -0.7763718E-11 0.2117563E-10 0.0000000E+00 + 01 0.1800000E+04 0.4427352E+04 -0.8034422E-11 0.2536037E-10 0.0000000E+00 + 01 0.2183845E+04 0.4427352E+04 0.3010818E-09 -0.2892667E-09 -0.2728484E-11 + 01 0.2567691E+04 0.4427352E+04 0.2559190E-08 -0.3083881E-08 -0.2182787E-10 + 01 0.2951536E+04 0.4427352E+04 0.3092790E-07 -0.2723332E-07 -0.2492015E-09 + 01 0.3335381E+04 0.4427352E+04 0.3073843E-07 -0.1131321E-07 0.6184564E-10 + 01 0.3600000E+04 0.4427352E+04 -0.4938416E-06 0.4885115E-06 0.5172296E-08 + 01 0.3983845E+04 0.4427352E+04 -0.4582646E-05 0.4403764E-05 0.3769401E-07 + 01 0.4367691E+04 0.4427352E+04 -0.2040033E-04 0.2044269E-04 0.1588132E-06 + 01 0.4751536E+04 0.4427352E+04 -0.4856143E-04 0.4672530E-04 0.3472078E-06 + 01 0.5135381E+04 0.4427352E+04 -0.3796539E-04 0.1522113E-04 -0.1528770E-07 + 01 0.5400000E+04 0.4427352E+04 0.1474904E-03 -0.1289545E-03 -0.1481011E-05 + 01 0.5783845E+04 0.4427352E+04 0.1166619E-02 -0.1095318E-02 -0.9535837E-05 + 01 0.6167691E+04 0.4427352E+04 0.3750700E-02 -0.3739158E-02 -0.2865544E-04 + 01 0.6551536E+04 0.4427352E+04 0.8655129E-02 -0.9474239E-02 -0.6508556E-04 + 01 0.6935381E+04 0.4427352E+04 0.1552649E-01 -0.1792501E-01 -0.1162010E-03 + 01 0.7200000E+04 0.4427352E+04 0.2152798E-01 -0.2516421E-01 -0.1602286E-03 + 01 0.7583845E+04 0.4427352E+04 0.2864741E-01 -0.2873269E-01 -0.1782480E-03 + 01 0.7967691E+04 0.4427352E+04 0.1488251E-01 -0.1484347E-02 0.3202844E-04 + 01 0.8351536E+04 0.4427353E+04 -0.7601952E-01 0.1279742E+00 0.9418253E-03 + 01 0.8735381E+04 0.4427355E+04 -0.3490622E+00 0.4905172E+00 0.3322598E-02 + 01 0.9000000E+04 0.4427358E+04 -0.7230538E+00 0.9760776E+00 0.6320155E-02 + 01 0.9383845E+04 0.4427365E+04 -0.1599329E+01 0.2121093E+01 0.1322233E-01 + 01 0.9767691E+04 0.4427375E+04 -0.2866216E+01 0.3801185E+01 0.2304002E-01 + 01 0.1015154E+05 0.4427387E+04 -0.4418212E+01 0.5893127E+01 0.3491224E-01 + 01 0.1053538E+05 0.4427399E+04 -0.6000644E+01 0.8149366E+01 0.4712919E-01 + 01 0.1080000E+05 0.4427407E+04 -0.6964159E+01 0.9628966E+01 0.5483370E-01 + 01 0.1118385E+05 0.4427416E+04 -0.8006512E+01 0.1145495E+02 0.6354060E-01 + 01 0.1156769E+05 0.4427421E+04 -0.8571755E+01 0.1272590E+02 0.6870498E-01 + 01 0.1195154E+05 0.4427422E+04 -0.8693416E+01 0.1336695E+02 0.7018643E-01 + 01 0.1233538E+05 0.4427420E+04 -0.8336861E+01 0.1336248E+02 0.6780721E-01 + 01 0.1260000E+05 0.4427416E+04 -0.7859248E+01 0.1295779E+02 0.6407369E-01 + 01 0.1298385E+05 0.4427408E+04 -0.6877429E+01 0.1175821E+02 0.5575264E-01 + 01 0.1336769E+05 0.4427396E+04 -0.5539940E+01 0.9894587E+01 0.4433368E-01 + 01 0.1375154E+05 0.4427382E+04 -0.3983030E+01 0.7432767E+01 0.3042898E-01 + 01 0.1413538E+05 0.4427367E+04 -0.2364429E+01 0.4540549E+01 0.1542519E-01 + 01 0.1440000E+05 0.4427357E+04 -0.1330441E+01 0.2452426E+01 0.5240889E-02 + 01 0.1478385E+05 0.4427344E+04 -0.1188210E+00 -0.3866864E+00 -0.7784205E-02 + 01 0.1516769E+05 0.4427334E+04 0.6520561E+00 -0.2854280E+01 -0.1795286E-01 + 01 0.1555154E+05 0.4427327E+04 0.1026629E+01 -0.4801948E+01 -0.2481501E-01 + 01 0.1593538E+05 0.4427324E+04 0.9731481E+00 -0.6153433E+01 -0.2809571E-01 + 01 0.1620000E+05 0.4427324E+04 0.7295747E+00 -0.6727638E+01 -0.2846905E-01 + 01 0.1658385E+05 0.4427325E+04 0.1533091E+00 -0.7151808E+01 -0.2687526E-01 + 01 0.1696769E+05 0.4427329E+04 -0.5895360E+00 -0.7174949E+01 -0.2341673E-01 + 01 0.1735153E+05 0.4427333E+04 -0.1365270E+01 -0.6918137E+01 -0.1900047E-01 + 01 0.1773538E+05 0.4427338E+04 -0.2061471E+01 -0.6456181E+01 -0.1429476E-01 + 01 0.1800000E+05 0.4427341E+04 -0.2451184E+01 -0.6041314E+01 -0.1116029E-01 + 01 0.1838384E+05 0.4427345E+04 -0.2873480E+01 -0.5309670E+01 -0.6961804E-02 + 01 0.1876768E+05 0.4427349E+04 -0.3110123E+01 -0.4445844E+01 -0.3398177E-02 + 01 0.1915152E+05 0.4427352E+04 -0.3152141E+01 -0.3479122E+01 -0.5241309E-03 + 01 0.1953536E+05 0.4427353E+04 -0.2993134E+01 -0.2486105E+01 0.1351050E-02 + 01 0.1980000E+05 0.4427354E+04 -0.2795176E+01 -0.1837147E+01 0.2012341E-02 + 01 0.2018384E+05 0.4427354E+04 -0.2321637E+01 -0.1004576E+01 0.1671369E-02 + 01 0.2056768E+05 0.4427352E+04 -0.1595358E+01 -0.3447845E+00 -0.3655244E-03 + 01 0.2095152E+05 0.4427348E+04 -0.6300869E+00 0.1345721E+00 -0.3948899E-02 + 01 0.2133537E+05 0.4427344E+04 0.4198216E+00 0.4697812E+00 -0.8328412E-02 + 01 0.2160000E+05 0.4427341E+04 0.1111855E+01 0.6364739E+00 -0.1140507E-01 + 01 0.2198385E+05 0.4427337E+04 0.2005934E+01 0.8160777E+00 -0.1554026E-01 + 01 0.2236769E+05 0.4427333E+04 0.2720605E+01 0.9497388E+00 -0.1895001E-01 + 01 0.2275153E+05 0.4427331E+04 0.3247472E+01 0.1061895E+01 -0.2152909E-01 + 01 0.2313538E+05 0.4427329E+04 0.3595387E+01 0.1146651E+01 -0.2328385E-01 + 01 0.2340000E+05 0.4427328E+04 0.3717978E+01 0.1181580E+01 -0.2393969E-01 + 01 0.2378384E+05 0.4427328E+04 0.3797370E+01 0.1190063E+01 -0.2432030E-01 + 01 0.2416769E+05 0.4427328E+04 0.3755513E+01 0.1138848E+01 -0.2396434E-01 + 01 0.2455153E+05 0.4427329E+04 0.3605192E+01 0.1033698E+01 -0.2296864E-01 + 01 0.2493538E+05 0.4427331E+04 0.3351138E+01 0.8732046E+00 -0.2136197E-01 + 01 0.2520000E+05 0.4427332E+04 0.3093646E+01 0.7285912E+00 -0.1977159E-01 + 01 0.2558384E+05 0.4427335E+04 0.2593677E+01 0.4668083E+00 -0.1678351E-01 + 01 0.2596769E+05 0.4427339E+04 0.1985026E+01 0.1591721E+00 -0.1322473E-01 + 01 0.2635153E+05 0.4427342E+04 0.1368104E+01 -0.1594560E+00 -0.9628789E-02 + 01 0.2673538E+05 0.4427346E+04 0.8116776E+00 -0.4786212E+00 -0.6347562E-02 + 01 0.2700000E+05 0.4427348E+04 0.4840644E+00 -0.6861564E+00 -0.4425335E-02 + 01 0.2738384E+05 0.4427350E+04 0.1022280E+00 -0.9589704E+00 -0.2156191E-02 + 01 0.2776769E+05 0.4427352E+04 -0.1777967E+00 -0.1191364E+01 -0.4908643E-03 + 01 0.2815153E+05 0.4427353E+04 -0.3656266E+00 -0.1382266E+01 0.6208400E-03 + 01 0.2853538E+05 0.4427353E+04 -0.4772250E+00 -0.1526913E+01 0.1288716E-02 + 01 0.2880000E+05 0.4427354E+04 -0.5167400E+00 -0.1598649E+01 0.1530585E-02 + 01 0.2918385E+05 0.4427354E+04 -0.5330856E+00 -0.1664146E+01 0.1675426E-02 + 01 0.2956769E+05 0.4427354E+04 -0.5075236E+00 -0.1673957E+01 0.1603983E-02 + 01 0.2995154E+05 0.4427353E+04 -0.4438678E+00 -0.1627665E+01 0.1328935E-02 + 01 0.3033538E+05 0.4427353E+04 -0.3423572E+00 -0.1522600E+01 0.8208316E-03 + 01 0.3060000E+05 0.4427352E+04 -0.2521980E+00 -0.1410519E+01 0.3320302E-03 + 01 0.3098385E+05 0.4427352E+04 -0.8979741E-01 -0.1199891E+01 -0.5531937E-03 + 01 0.3136769E+05 0.4427351E+04 0.9566841E-01 -0.9400675E+00 -0.1525651E-02 + 01 0.3175154E+05 0.4427350E+04 0.2778961E+00 -0.6457966E+00 -0.2439594E-02 + 01 0.3213538E+05 0.4427349E+04 0.4282149E+00 -0.3351473E+00 -0.3173087E-02 diff --git a/examples/tsunami/chile2010/test_chile2010.py b/examples/tsunami/chile2010/test_chile2010.py new file mode 100644 index 000000000..7b49e4ecb --- /dev/null +++ b/examples/tsunami/chile2010/test_chile2010.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +""" +Chile 2010 tsunami regression test + +Also tests some variants on lat-lon topography reading, including several +NetCDF topography file formats with non-standard coordinate conventions. +""" + +import warnings +from pathlib import Path + +import pytest + +import numpy as np + +import clawpack.geoclaw.test as test +import clawpack.geoclaw.topotools as topotools +import clawpack.clawutil.util as clawutil + +def _get_topography(runner, variant=0): + """Get topography and read""" + + # Ensure that the topography file is available + maketopo = clawutil.fullpath_import("maketopo.py", module_name="maketopo") + topo_path = maketopo.get_topo() + + # Read in original topography data and write out in format expected by test + topo = topotools.Topography() + topo.topo_type = 2 + topo.path = topo_path + topo.read() + + return topo + + +def _write_netcdf_variant(topo, variant, output_path): + """Write topography as a NetCDF file in the requested variant format. + + Parameters + ---------- + topo : topotools.Topography + Topography object with x, y, Z already populated. + variant : str + One of ``"netcdf_topotools"``, ``"lat_flip"``, or ``"lon_360"``. + output_path : Path + Destination file path for the NetCDF output. + + Variants + -------- + netcdf_topotools + Write via ``topotools.Topography.write(topo_type=4)``, then run the + result through CFNormalizer. If CFNormalizer has to fix anything + (e.g. rename coordinate variables), a UserWarning is emitted so the + gap between topotools output and strict CF compliance is visible. + lat_flip + Write NetCDF manually (netCDF4) with latitude stored N-to-S + (descending), which is non-standard but common in observational data. + Elevation rows are flipped accordingly so the physical bathymetry is + identical to the standard variant. + lon_360 + Write NetCDF manually (netCDF4) with longitude in [0, 360] rather than + [-180, 180]. The physical bathymetry is otherwise identical. + """ + if variant == "netcdf_topotools": + import netCDF4 # guaranteed available; test already called importorskip + import xarray as xr + from clawpack.geoclaw.netcdf_utils import CFNormalizer + + topo.write(str(output_path), topo_type=4) + + # Open the just-written file and run it through CFNormalizer to check + # whether the output is already fully CF-compliant. + with xr.open_dataset(output_path) as ds_before: + coords_before = set(ds_before.coords) + attrs_before = {name: dict(ds_before[name].attrs) + for name in list(ds_before.coords) + list(ds_before.data_vars)} + + ds_after = CFNormalizer(ds_before).normalize() + + coords_after = set(ds_after.coords) + attrs_after = {name: dict(ds_after[name].attrs) + for name in list(ds_after.coords) + list(ds_after.data_vars)} + + changed = coords_before != coords_after + if not changed: + # Check whether any attrs changed on variables that survived renaming + for name in coords_before & coords_after: + if attrs_before.get(name) != attrs_after.get(name): + changed = True + break + if not changed: + for name in ds_after.data_vars: + if attrs_before.get(name) != attrs_after.get(name): + changed = True + break + + if changed: + renamed = coords_before - coords_after + warnings.warn( + f"CFNormalizer modified the topotools NetCDF output " + f"('{output_path.name}'). " + f"Coordinate variables renamed or attributes added: " + f"{renamed or '(attr-only change)'}. " + "The topotools.write topo_type=4 path is not fully " + "CF-compliant; consider updating it to match CF standard " + "variable names.", + UserWarning, + stacklevel=2, + ) + + elif variant == "lat_flip": + import netCDF4 # guaranteed available; test already called importorskip + + # Write latitude N-to-S (descending) — a common non-standard convention. + # Flip Z rows so each row still corresponds to the correct latitude. + with netCDF4.Dataset(output_path, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo.y[::-1] # descending: N → S + longitudes[:] = topo.x + elevations[:] = topo.Z[::-1, :] # flip rows to match reversed lat + + elif variant == "lon_360": + import netCDF4 # guaranteed available; test already called importorskip + + # Write longitude in [0, 360] convention instead of [-180, 180]. + with netCDF4.Dataset(output_path, "w") as out: + out.createDimension("lat", len(topo.y)) + out.createDimension("lon", len(topo.x)) + + latitudes = out.createVariable("lat", "f8", ("lat",)) + longitudes = out.createVariable("lon", "f8", ("lon",)) + elevations = out.createVariable("elevation", "f8", ("lat", "lon")) + + latitudes[:] = topo.y + longitudes[:] = topo.x % 360 # map [-180, 180] → [0, 360] + elevations[:] = topo.Z + + else: + raise ValueError(f"Unknown NetCDF variant '{variant}'.") + + # Interrogate the written file to auto-detect lon_convention, lat_order, + # and dim_order. The returned TopoMetadata is passed to topofiles as the + # third element so data.py writes the descriptor block that Fortran's + # read_netcdf_descriptor needs. Without it nc_lon_convention defaults to + # 180 and [0,360] longitudes are never shifted back to [-180,180], causing + # the "topo arrays do not cover domain" check to fail. + from clawpack.geoclaw.netcdf_utils import TopoInterrogator + return TopoInterrogator(output_path, 'elevation').interrogate_topo() + + +@pytest.mark.regression +@pytest.mark.remote +@pytest.mark.tsunami +@pytest.mark.parametrize("variant", ["standard", "netcdf_topotools", "lat_flip", "lon_360"]) +def test_chile2010(tmp_path: Path, save: bool, variant: str): + """Chile 2010 tsunami regression test for GeoClaw. + + Parametrized over topography file format. Because every variant encodes + the same physical bathymetry, all variants must reproduce the existing + reference gauge output at gauge 32412 within the same tolerances. + + Variants + -------- + standard + ASCII topo type 2 read directly from the downloaded file. This is + the baseline behavior and is identical to the pre-parametrization test. + netcdf_topotools + The downloaded ASCII topo is re-written as NetCDF via + ``topotools.Topography.write(topo_type=4)``. CFNormalizer is run over + the result to surface any CF-compliance gaps in the write path. + lat_flip + NetCDF written manually (netCDF4) with latitude stored N-to-S + (descending) rather than the usual S-to-N (ascending) order. + lon_360 + NetCDF written manually (netCDF4) with longitude in the [0, 360] + convention rather than [-180, 180]. + """ + if variant != "standard": + pytest.importorskip("netCDF4") + + runner = test.GeoClawTestRunner(tmp_path, test_path=Path(__file__).parent) + + # Topography + topo = _get_topography(runner) + + # Setup data for test + runner.set_data() + # runner.rundata.clawdata.num_output_times = 1 + runner.rundata.amrdata.amr_levels_max = 1 + + if variant == "standard": + runner.rundata.topo_data.topofiles = [[2, topo.path]] + else: + nc_path = tmp_path / f"chile_topo_{variant}.nc" + meta = _write_netcdf_variant(topo, variant, nc_path) + # Pass TopoMetadata as the third element so data.py writes the + # descriptor block Fortran needs to interpret coordinate conventions. + runner.rundata.topo_data.topofiles = [[4, nc_path, meta]] + + runner.write_data() + + # Build and run test + if variant != "standard": + print(f"Testing variant '{variant}' with topography file: {nc_path.name}") + runner.build_executable(FFLAGS="-DNETCDF $(NETCDF_FFLAGS)", + LFLAGS="$(NETCDF_LFLAGS)") + else: + print(f"Testing standard variant with topography file: {topo.path}") + runner.build_executable() + runner.run_code() + + runner.check_gauge(save=save, gauge_id=32412) + +if __name__ == "__main__": + raise SystemExit(pytest.main([__file__])) diff --git a/src/2d/shallow/topo_module.f90 b/src/2d/shallow/topo_module.f90 index 567bf2489..2290f2d45 100644 --- a/src/2d/shallow/topo_module.f90 +++ b/src/2d/shallow/topo_module.f90 @@ -562,6 +562,7 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) integer(kind=4) :: xstart(1), ystart(1), mx_tot, my_tot integer(kind=4) :: ios, nc_file, dim_ids(2), num_dims, & var_type, num_vars, num_dims_tot, z_dim_ids(2) + logical :: lat_south_to_north mtot = int(mx, 8) * int(my, 8) @@ -710,7 +711,8 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) end if xstart = minloc(xlocs, mask=(xlocs == xll)) - if (ylocs(1) < ylocs(my_tot)) then + lat_south_to_north = (ylocs(1) < ylocs(my_tot)) + if (lat_south_to_north) then ! S-to-N: southern boundary (yll) is at a low index. ystart = minloc(ylocs, mask=(ylocs == yll)) else @@ -777,14 +779,31 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) end if ! Transpose into GeoClaw topo array (N-to-S row order). + ! For S-to-N files ystart points to the southern end of the + ! domain, so nc_buffer row 1 = south, row my = north; reverse + ! with (my - j) to store north first. + ! For N-to-S files ystart points to the northern end, so + ! nc_buffer row 1 = north already; copy in order with (j + 1). if (z_dim_ids(1) == x_dim_id) then - do j = 0, int(my, 8) - 1 - topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(:, my - j) - end do + if (lat_south_to_north) then + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(:, my - j) + end do + else + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(:, j + 1) + end do + end if else - do j = 0, int(my, 8) - 1 - topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(my - j, :) - end do + if (lat_south_to_north) then + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(my - j, :) + end do + else + do j = 0, int(my, 8) - 1 + topo(j*int(mx,8)+1:(j+1)*int(mx,8)) = nc_buffer(j + 1, :) + end do + end if end if deallocate(nc_buffer) @@ -1098,8 +1117,8 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy,topo_idx end if end if - dx = xlocs(2) - xlocs(1) - dy = ylocs(2) - ylocs(1) + dx = abs(xlocs(2) - xlocs(1)) + dy = abs(ylocs(2) - ylocs(1)) ! ---------------------------------------------------------------- ! Select the subset of the file to load. From 19beda602047fb0193840f039ecbeba42fd7243a Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Tue, 14 Apr 2026 09:16:36 -0400 Subject: [PATCH 05/24] Add NetCDF Fortran storm tests Adds additional tests to the isaac test for the new NetCDF capabilities. --- examples/storm-surge/isaac/test_isaac.py | 420 ++++++++++++++++++++--- src/python/geoclaw/util.py | 2 +- tests/test_storm.py | 414 +++++++++++++++++++++- 3 files changed, 792 insertions(+), 44 deletions(-) diff --git a/examples/storm-surge/isaac/test_isaac.py b/examples/storm-surge/isaac/test_isaac.py index acf2989ec..789b1fc46 100644 --- a/examples/storm-surge/isaac/test_isaac.py +++ b/examples/storm-surge/isaac/test_isaac.py @@ -2,6 +2,8 @@ from __future__ import annotations +import re +import sys from pathlib import Path from datetime import datetime import pytest @@ -11,6 +13,20 @@ import clawpack.geoclaw.test as test from clawpack.geoclaw.surge.storm import Storm +# --------------------------------------------------------------------------- +# Import shared NetCDF file generators from tests/test_storm.py. +# No Python package __init__.py exists in tests/, so we insert the directory +# into sys.path rather than using a standard package import. +# --------------------------------------------------------------------------- +_tests_dir = Path(__file__).parents[3] / "tests" +if str(_tests_dir) not in sys.path: + sys.path.insert(0, str(_tests_dir)) +try: + from test_storm import create_era5_storm_file, create_nws13_storm_file +except ImportError: + create_era5_storm_file = None + create_nws13_storm_file = None + # Generic helper functions def days2seconds(t): """Convert days to seconds.""" @@ -108,6 +124,304 @@ def write_owi_wind( _write_owi_values(fobj, v_field) +def _read_owi_all_timesteps(pre_path: Path, win_path: Path): + """Read all time steps from committed OWI PRE and WIN files. + + There is no Python OWI reader in the GeoClaw codebase (the Fortran + ``read_OWI_ASCII`` routine in ``data_storm_module.f90`` is the only + reader). This function uses minimal line-by-line parsing of the + fixed-format OWI ASCII files. + + The OWI format stores data with longitude varying fastest (inner loop = + longitude). The flat data array for one time step is reshaped to + ``(nlat, nlon)`` in standard numpy ``[lat, lon]`` indexing. + + Returns + ------- + pressure_mb : ndarray, shape (nt, nlat, nlon) + Sea-level pressure in mb (millibar). + u_wind : ndarray, shape (nt, nlat, nlon) + Eastward wind speed in m/s. + v_wind : ndarray, shape (nt, nlat, nlon) + Northward wind speed in m/s. + lon : ndarray, shape (nlon,) + Longitude grid in degrees. First value = SWLon + DX (matching the + 1-based grid indexing used by the Fortran OWI reader). + lat : ndarray, shape (nlat,) + Latitude grid in degrees. First value = SWLat + DY. + times : list of datetime + Datetime objects for each time step (parsed from DT= field). + """ + + def _parse_snapshot_header(line): + """Extract grid parameters and datetime from an OWI snapshot header.""" + nlat = int(re.search(r'iLat=\s*(\d+)', line).group(1)) + nlon = int(re.search(r'iLong=\s*(\d+)', line).group(1)) + # OWI snapshot headers are fixed-width with NO spaces between fields, + # e.g. "DX=0.2500DY=0.2500SWLat=...". Use digit/decimal-only patterns + # so the match stops at the next field keyword, not at whitespace. + dx = float(re.search(r'DX=([\d.]+)', line).group(1)) + dy = float(re.search(r'DY=([\d.]+)', line).group(1)) + swlat = float(re.search(r'SWLat=\s*([-\d.]+)', line).group(1)) + swlon = float(re.search(r'SWLon=\s*([-\d.]+)', line).group(1)) + dt_str = re.search(r'DT=(\d{12})', line).group(1) + dt = datetime.strptime(dt_str, '%Y%m%d%H%M') + return nlat, nlon, dx, dy, swlat, swlon, dt + + def _read_n_values(fobj, n): + """Read exactly n whitespace-separated floats from an OWI file.""" + vals = [] + while len(vals) < n: + vals.extend(float(x) for x in fobj.readline().split()) + return np.array(vals[:n]) + + # --- Read pressure (all time steps) ------------------------------------ + # First pass: determine grid dimensions from the first snapshot header. + with open(pre_path) as f: + f.readline() # master header + header = f.readline() + nlat, nlon, dx, dy, swlat, swlon, _ = _parse_snapshot_header(header) + n_per_step = nlat * nlon + + # The Fortran OWI reader computes coordinates as 1-based: + # longitude(j) = swlon + j * dx, j = 1..mx + # so the first grid point is swlon+dx, not swlon. + lon = swlon + np.arange(1, nlon + 1) * dx + lat = swlat + np.arange(1, nlat + 1) * dy + + pre_timesteps = [] + times = [] + with open(pre_path) as f: + f.readline() # master header + while True: + header = f.readline() + if not header.strip(): + break + _, _, _, _, _, _, dt = _parse_snapshot_header(header) + times.append(dt) + flat = _read_n_values(f, n_per_step) + # OWI: lon varies fastest -> reshape as (nlat, nlon) + pre_timesteps.append(flat.reshape(nlat, nlon)) + + # --- Read wind (all time steps) ---------------------------------------- + u_timesteps = [] + v_timesteps = [] + with open(win_path) as f: + f.readline() # master header + for _ in times: + f.readline() # snapshot header + flat_u = _read_n_values(f, n_per_step) + flat_v = _read_n_values(f, n_per_step) + u_timesteps.append(flat_u.reshape(nlat, nlon)) + v_timesteps.append(flat_v.reshape(nlat, nlon)) + + pressure_mb = np.stack(pre_timesteps, axis=0) # (nt, nlat, nlon) + u_wind = np.stack(u_timesteps, axis=0) + v_wind = np.stack(v_timesteps, axis=0) + + return pressure_mb, u_wind, v_wind, lon, lat, times + + +def _make_isaac_netcdf(fmt: str, tmp_path: Path, test_path: Path) -> Path: + """Generate a NetCDF met-forcing file from the committed OWI Isaac files. + + Reads all time steps from ``isaac.PRE`` and ``isaac.WIN`` using minimal + OWI line-parsing (no Python OWI reader exists in the codebase; flagged + above in ``_read_owi_all_timesteps``), converts units, writes a NetCDF + file to ``tmp_path``, and writes the corresponding ``.storm`` descriptor + via ``Storm.write(file_format='data')``. + + Both ERA5 and NWS13 variants write pressure in **Pa** (as required by the + Fortran NetCDF reader, which has no unit conversion). The ERA5 variant + also stores wind in m/s; the NWS13 file uses the same values but with + NWS13 variable/dimension names (``uwnd``, ``vwnd``, ``press``). + + Parameters + ---------- + fmt : str + One of ``"netcdf_era5"`` or ``"netcdf_nws13"``. + tmp_path : Path + Directory for generated files. + test_path : Path + Path to the Isaac example directory (for finding committed OWI files). + + Returns + ------- + storm_path : Path + Path to the written ``.storm`` descriptor. + """ + if create_era5_storm_file is None or create_nws13_storm_file is None: + pytest.skip("NetCDF file generators not importable from test_storm.py") + + pytest.importorskip("netCDF4") + + pre_path = test_path / "isaac.PRE" + win_path = test_path / "isaac.WIN" + + pressure_mb, u_wind, v_wind, lon, lat, times = _read_owi_all_timesteps( + pre_path, win_path + ) + + # Convert OWI mb pressure to Pa (the Fortran OWI reader multiplies by + # 100; the NetCDF reader reads raw values, so we must pre-convert). + pressure_pa = pressure_mb * 100.0 + + # Convert datetime list to integer UNIX seconds (Fortran uses UNIX epoch + # 1970-01-01 via seconds_from_epoch in utility_module.f90). + unix_epoch = np.datetime64("1970-01-01T00:00:00", "s") + times_unix = np.array( + [ + int((np.datetime64(dt) - unix_epoch) / np.timedelta64(1, "s")) + for dt in times + ], + dtype=np.int64, + ) + + # lon/lat already adjusted for 1-based OWI grid offset in + # _read_owi_all_timesteps. + + time_offset = np.datetime64("2012-08-29T00:00:00") + + # The storm time axis in the descriptor is relative to time_offset; + # the NetCDF stores absolute UNIX seconds. The Fortran will subtract + # seconds_from_epoch(time_offset) from the raw values. + + isaac = Storm() + isaac.time_offset = time_offset + + if fmt == "netcdf_era5": + nc_path = tmp_path / "isaac_era5.nc" + create_era5_storm_file( + nc_path, + pressure_fields=pressure_pa, + u_fields=u_wind, + v_fields=v_wind, + times=times_unix, + lon=lon, + lat=lat, + ) + isaac.file_format = "netcdf" + isaac.file_paths = [nc_path] + storm_path = tmp_path / "isaac_era5.storm" + isaac.write( + storm_path, + file_format="data", + dim_mapping={"t": "valid_time"}, + ) + + elif fmt == "netcdf_nws13": + nc_path = tmp_path / "isaac_nws13.nc" + # NWS13 file: same Pa pressure (Fortran no-conversion path), + # NWS13 variable names. The "mb" units attribute in + # create_nws13_storm_file is overridden by passing Pa values; + # the Fortran uses only the variable name, not the units attribute. + create_nws13_storm_file( + nc_path, + pressure_fields=pressure_pa, + u_fields=u_wind, + v_fields=v_wind, + times=times_unix, + lon=lon, + lat=lat, + ) + isaac.file_format = "nws13" + isaac.file_paths = [nc_path] + storm_path = tmp_path / "isaac_nws13.storm" + isaac.write( + storm_path, + file_format="data", + var_mapping={ + "wind_u": "uwnd", + "wind_v": "vwnd", + "pressure": "press", + }, + ) + + else: + raise ValueError(f"Unknown fmt={fmt!r}") + + return storm_path + + +def _check_netcdf_storm_descriptor(generated_path: Path, fmt: str) -> None: + """Validate structure of a NetCDF data-storm descriptor file. + + Checks that ``generated_path`` contains the correct format number (2), + the expected dimension names, the expected variable names, and exactly + one file path entry. + + This is a structural validation against known-good expected values for + each format, not a comparison against a committed regression file (which + would contain machine-specific absolute paths). + + Parameters + ---------- + generated_path : Path + Path to the ``.storm`` descriptor written by ``Storm.write``. + fmt : str + One of ``"netcdf_era5"`` or ``"netcdf_nws13"``. + """ + text = generated_path.read_text() + lines = [ln.rstrip() for ln in text.splitlines()] + + # Locate the format line (contains the integer file format number). + format_line = next( + ln for ln in lines if ln.startswith("2 ") or ln.strip().startswith("2") + and "# File format" in ln + ) + assert "2" in format_line, ( + f"Expected file format 2 in descriptor, got: {format_line!r}" + ) + + # Locate the "# Format Data Information" section. + try: + fmt_info_idx = next( + i for i, ln in enumerate(lines) + if "# Format Data Information" in ln + ) + except StopIteration: + raise AssertionError( + f"'# Format Data Information' section not found in {generated_path}" + ) + + # The dim-names line is immediately after the comment. + dim_line = lines[fmt_info_idx + 1] + var_line = lines[fmt_info_idx + 2] + + if fmt == "netcdf_era5": + assert dim_line == "longitude latitude valid_time", ( + f"ERA5 dim line wrong: {dim_line!r}" + ) + assert var_line == "u10 v10 msl", ( + f"ERA5 var line wrong: {var_line!r}" + ) + elif fmt == "netcdf_nws13": + assert dim_line == "lon lat time", ( + f"NWS13 dim line wrong: {dim_line!r}" + ) + assert var_line == "uwnd vwnd press", ( + f"NWS13 var line wrong: {var_line!r}" + ) + else: + raise ValueError(f"Unknown fmt={fmt!r}") + + # There should be exactly one file path entry. + try: + paths_idx = next( + i for i, ln in enumerate(lines) if "# File paths" in ln + ) + except StopIteration: + raise AssertionError( + f"'# File paths' section not found in {generated_path}" + ) + path_entries = [ + ln for ln in lines[paths_idx + 1:] if ln.strip() + ] + assert len(path_entries) == 1, ( + f"Expected 1 file path, got {len(path_entries)}: {path_entries}" + ) + + def _check_geoclaw_storm_descriptor(generated_path: Path, regression_path: Path) -> None: """Compare two GeoClaw storm files semantically using Storm readers.""" generated = Storm(path=generated_path, file_format="geoclaw") @@ -145,19 +459,56 @@ def _check_data_storm_descriptor(generated_path: Path, regression_path: Path) -> @pytest.mark.storm @pytest.mark.parametrize( "data_file_format", - ["holland80", "owi_ascii"], + [ + "holland80", + "owi_ascii", + pytest.param( + "netcdf_era5", + marks=[pytest.mark.netcdf], + ), + pytest.param( + "netcdf_nws13", + marks=[pytest.mark.netcdf], + ), + ], ) def test_isaac(tmp_path: Path, data_file_format: str, save: bool) -> None: - """Regression test for Isaac storm surge using several storm-input modes.""" + """Regression test for Isaac storm surge using several storm-input modes. + + The ``netcdf_era5`` and ``netcdf_nws13`` variants generate NetCDF met- + forcing files from the committed OWI ASCII files (``isaac.PRE`` / + ``isaac.WIN``) and verify that GeoClaw produces gauge output identical to + the ``owi_ascii`` baseline. The two variants differ only in variable and + dimension naming conventions: + + ``netcdf_era5`` + CF-1.7 ERA5-style file: dims ``valid_time / latitude / longitude``, + vars ``u10 / v10 / msl``, lon in [0, 360], lat S-to-N. + ERA5 is a common source of met forcing for GeoClaw storm surge + simulations. + + ``netcdf_nws13`` + OWI NWS13-style file: dims ``time / lat / lon``, vars + ``uwnd / vwnd / press``. Exercises the explicit ``user_mapping`` + path in ``util.get_netcdf_names`` for non-default variable names. + + Both NetCDF variants store pressure in Pa (after converting from the OWI + mb values) because the Fortran NetCDF reader has no unit conversion. + The gauge regression data from ``owi_ascii`` is reused since the + underlying forcing is identical once unit-converted. + """ runner = test.GeoClawTestRunner(tmp_path, test_path=Path(__file__).parent) runner.set_data() - # TODO: Decide on time range for Isaac test; this is currently set to match - # the Isaac test, but may need to be adjusted. + # TODO: Decide on time range for Isaac test, currently is just a half day runner.rundata.clawdata.t0 = days2seconds(-1) runner.rundata.clawdata.tfinal = days2seconds(-0.5) runner.rundata.clawdata.num_output_times = 1 + # TODO: May also want to change number of levels used, currently matches the + # example data but may be shortened for testing purposes. + runner.rundata.amrdata.amr_levels_max = 2 + surge_data = runner.rundata.surge_data surge_data.storm_file = tmp_path / "isaac.storm" @@ -166,45 +517,29 @@ def test_isaac(tmp_path: Path, data_file_format: str, save: bool) -> None: isaac = Storm(path=atcf_path, file_format="ATCF") isaac.time_offset = np.datetime64("2012-08-29") - + if data_file_format == "holland80": surge_data.storm_specification_type = "holland80" isaac.write(surge_data.storm_file, file_format="geoclaw", verbose=True) + elif data_file_format == "owi_ascii": surge_data.storm_specification_type = "data" isaac.file_format = "NWS12" - isaac.file_paths = [runner.test_path / "isaac.PRE", + isaac.file_paths = [runner.test_path / "isaac.PRE", runner.test_path / "isaac.WIN"] - isaac.write(surge_data.storm_file, file_format="data") - # TODO: Generate pressure and wind fields from the ATCF data, requires # storm field generation, which is not yet supported. - # write_owi_pressure(isaac.file_paths[0], - # pressure_fields=[isaac.pressure_field], - # times=[isaac.time_offset.astype(datetime)], - # lon=isaac.lon, - # lat=isaac.lat, - # ) - # write_owi_wind(isaac.file_paths[1], - # u_fields=[isaac.u_wind_field], - # v_fields=[isaac.v_wind_field], - # times=[isaac.time_offset.astype(datetime)], - # lon=isaac.lon, - # lat=isaac.lat, - # ) - # elif data_file_format == "owi_netcdf": - # surge_data.storm_specification_type = "data" - # isaac.data_file_format = "NWS13" - # isaac.file_paths = [tmp_path / "isaac.nc"] - # isaac.write(surge_data.storm_file, file_format="data") - # TODO: Add test for generic NetCDF formatted storms (should be a super set - # of NWS13) - # elif data_file_format == "netcdf": - # surge_data.storm_specification_type = "data" - # isaac.data_file_format ="netcdf" - # isaac.file_paths = [tmp_path / "isaac.nc"] - # isaac.write(surge_data.storm_file, file_format="data") + + elif data_file_format in ("netcdf_era5", "netcdf_nws13"): + # Generate NetCDF file + descriptor from committed OWI data. + surge_data.storm_file = _make_isaac_netcdf( + data_file_format, tmp_path, runner.test_path + ) + surge_data.storm_specification_type = "data" + # Validate descriptor structure before running. + _check_netcdf_storm_descriptor(surge_data.storm_file, data_file_format) + else: raise ValueError(f"Unsupported data_file_format={data_file_format}") @@ -219,17 +554,24 @@ def test_isaac(tmp_path: Path, data_file_format: str, save: bool) -> None: _check_geoclaw_storm_descriptor(surge_data.storm_file, regression_storm_file) elif data_file_format == "owi_ascii": _check_data_storm_descriptor(surge_data.storm_file, regression_storm_file) - else: - raise ValueError(f"Unsupported data_file_format={data_file_format}") + # For netcdf variants, structural validation already done above via + # _check_netcdf_storm_descriptor; no committed regression .storm file. # Run geoclaw runner.build_executable() runner.run_code() - # Gauge checks - may need to be format specific for interpolation - # differences - runner.check_gauge(gauge_id=1, regression_path=check_path, save=save) - runner.check_gauge(gauge_id=2, regression_path=check_path, save=save) + # Gauge checks. + # The netcdf variants use the owi_ascii regression gauge data because the + # underlying forcing (from the same committed OWI files, unit-converted) + # is identical. If the simulation results differ from the owi_ascii + # baseline, stop and investigate before generating new reference files. + if data_file_format in ("netcdf_era5", "netcdf_nws13"): + gauge_regression_path = runner.test_path / "regression_data" / "owi_ascii" + else: + gauge_regression_path = check_path + runner.check_gauge(gauge_id=1, regression_path=gauge_regression_path, save=save) + runner.check_gauge(gauge_id=2, regression_path=gauge_regression_path, save=save) if __name__ == "__main__": diff --git a/src/python/geoclaw/util.py b/src/python/geoclaw/util.py index c5119cc44..c8dea3a7d 100644 --- a/src/python/geoclaw/util.py +++ b/src/python/geoclaw/util.py @@ -305,7 +305,7 @@ def get_netcdf_names(path, lookup_type='dim', user_mapping=None, verbose=False): import xarray as xr # Open file and construct set of names - data = xr.open_dataset(path) + data = xr.open_dataset(path, decode_timedelta=False) if 'dim' in lookup_type.lower(): _var_mapping = {"x": (False, ["x", "longitude", "lon"]), "y": (False, ["y", "latitude", "lat"]), diff --git a/tests/test_storm.py b/tests/test_storm.py index afbf1a92d..b6771acb5 100644 --- a/tests/test_storm.py +++ b/tests/test_storm.py @@ -51,6 +51,219 @@ def create_netcdf_storm_file(path): ds.to_netcdf(path) +def create_era5_storm_file(path, pressure_fields=None, u_fields=None, + v_fields=None, times=None, lon=None, lat=None): + """Create an ERA5-style CF-compliant NetCDF met-forcing file. + + ERA5 is a common source of met forcing for GeoClaw storm surge simulations. + This function produces the variable/dimension schema used by ECMWF ERA5 + reanalysis: dims ``(valid_time, latitude, longitude)``, variables + ``msl`` (Pa), ``u10`` (m/s), ``v10`` (m/s), lon in [0, 360], lat S-to-N, + and a ``Conventions = "CF-1.7"`` global attribute. + + The dimension order ``(valid_time, latitude, longitude)`` = ``(nt, nlat, + nlon)`` matches the C-row-major layout expected by the Fortran NetCDF reader + ``nf90_get_var`` when it fills its column-major ``pressure(nlon, nlat, nt)`` + array: the library reverses the dimension order automatically. + + Parameters + ---------- + path : Path or str + Output NetCDF file path. + pressure_fields : ndarray, shape (nt, nlat, nlon), optional + Pressure in Pa. Defaults to 101300 Pa everywhere. + u_fields : ndarray, shape (nt, nlat, nlon), optional + Eastward 10-m wind in m/s. Defaults to zero. + v_fields : ndarray, shape (nt, nlat, nlon), optional + Northward 10-m wind in m/s. Defaults to zero. + times : array-like of int, optional + Time values in **seconds since the time_offset epoch** used in the + accompanying .storm descriptor. Defaults to ``[0, 3600, 7200]``. + lon : array-like of float, optional + Longitude values in [0, 360]. Defaults to a 5-point sample grid. + lat : array-like of float, optional + Latitude values, S-to-N. Defaults to a 4-point sample grid. + """ + # netCDF4 is required as xarray's NetCDF backend. + pytest.importorskip("netCDF4") + xr = pytest.importorskip("xarray") + + # --- defaults (synthetic values for unit tests) --- + if lon is None: + lon = np.linspace(260.0, 300.0, 5) # [0, 360] convention + else: + lon = np.asarray(lon, dtype=float) + if lat is None: + lat = np.linspace(10.0, 35.0, 4) # S-to-N + else: + lat = np.asarray(lat, dtype=float) + if times is None: + times = np.array([0, 3600, 7200], dtype=np.int64) + else: + times = np.asarray(times, dtype=np.int64) + + nt, nlat, nlon = len(times), len(lat), len(lon) + shape = (nt, nlat, nlon) + + if pressure_fields is None: + pressure_fields = np.full(shape, 101300.0) + if u_fields is None: + u_fields = np.zeros(shape) + if v_fields is None: + v_fields = np.zeros(shape) + + # Use a fixed epoch string; callers that need a specific offset should + # convert their datetime values to integer seconds before the call. + time_epoch_str = "2012-08-29T00:00:00" + + ds = xr.Dataset( + { + "msl": ( + ["valid_time", "latitude", "longitude"], + pressure_fields, + { + "units": "Pa", + "standard_name": "air_pressure_at_mean_sea_level", + "long_name": "Mean sea level pressure", + }, + ), + "u10": ( + ["valid_time", "latitude", "longitude"], + u_fields, + { + "units": "m s-1", + "standard_name": "eastward_wind", + "long_name": "10 metre U wind component", + }, + ), + "v10": ( + ["valid_time", "latitude", "longitude"], + v_fields, + { + "units": "m s-1", + "standard_name": "northward_wind", + "long_name": "10 metre V wind component", + }, + ), + }, + coords={ + "longitude": ( + ["longitude"], + lon, + {"units": "degrees_east", "axis": "X", + "standard_name": "longitude"}, + ), + "latitude": ( + ["latitude"], + lat, + {"units": "degrees_north", "axis": "Y", + "standard_name": "latitude"}, + ), + "valid_time": ( + ["valid_time"], + times, + { + "units": f"seconds since {time_epoch_str}", + "axis": "T", + "standard_name": "time", + "long_name": "time", + }, + ), + }, + attrs={"Conventions": "CF-1.7"}, + ) + ds.to_netcdf(path) + + +def create_nws13_storm_file(path, pressure_fields=None, u_fields=None, + v_fields=None, times=None, lon=None, lat=None): + """Create a NWS13/OWI-NetCDF style met-forcing file. + + NWS13 (NOAA Weather Service format 13) is the OWI NetCDF met-forcing + schema used by ADCIRC and GeoClaw. This function produces the canonical + NWS13 variable and dimension names: dims ``(time, lat, lon)``, variables + ``uwnd`` (m/s), ``vwnd`` (m/s), ``press`` (mb). + + Pressure is stored in millibar (mb) to exercise the unit-awareness path + in tests. Note that the Fortran NetCDF reader does **not** perform unit + conversion; callers responsible for running Fortran should convert to Pa + before calling this function. + + The dimension order ``(time, lat, lon)`` = ``(nt, nlat, nlon)`` is + correct for the Fortran column-major allocation ``pressure(nlon, nlat, + nt)`` via the library-managed Fortran–C transpose. + + Parameters + ---------- + path : Path or str + Output NetCDF file path. + pressure_fields : ndarray, shape (nt, nlat, nlon), optional + Pressure in **mb**. Defaults to 1013.0 mb everywhere. + u_fields : ndarray, shape (nt, nlat, nlon), optional + Eastward wind in m/s. Defaults to zero. + v_fields : ndarray, shape (nt, nlat, nlon), optional + Northward wind in m/s. Defaults to zero. + times : array-like of int, optional + Time in seconds (absolute UNIX seconds or relative to an epoch). + Defaults to ``[0, 3600, 7200]``. + lon : array-like of float, optional + Longitude values. Defaults to a 5-point sample. + lat : array-like of float, optional + Latitude values. Defaults to a 4-point sample. + """ + pytest.importorskip("netCDF4") + xr = pytest.importorskip("xarray") + + if lon is None: + lon = np.linspace(-99.0, -70.0, 5) + else: + lon = np.asarray(lon, dtype=float) + if lat is None: + lat = np.linspace(8.0, 32.0, 4) + else: + lat = np.asarray(lat, dtype=float) + if times is None: + times = np.array([0, 3600, 7200], dtype=np.int64) + else: + times = np.asarray(times, dtype=np.int64) + + nt, nlat, nlon = len(times), len(lat), len(lon) + shape = (nt, nlat, nlon) + + if pressure_fields is None: + pressure_fields = np.full(shape, 1013.0) # mb + if u_fields is None: + u_fields = np.zeros(shape) + if v_fields is None: + v_fields = np.zeros(shape) + + ds = xr.Dataset( + { + "press": ( + ["time", "lat", "lon"], + pressure_fields, + {"units": "mb", "long_name": "Sea-level pressure"}, + ), + "uwnd": ( + ["time", "lat", "lon"], + u_fields, + {"units": "m/s", "long_name": "U-component of wind"}, + ), + "vwnd": ( + ["time", "lat", "lon"], + v_fields, + {"units": "m/s", "long_name": "V-component of wind"}, + ), + }, + coords={ + "lon": (["lon"], lon, {"units": "degrees_east"}), + "lat": (["lat"], lat, {"units": "degrees_north"}), + "time": (["time"], times, {"units": "seconds since 1970-01-01T00:00:00"}), + }, + ) + ds.to_netcdf(path) + + def check_geoclaw(paths, check_header=False): """ Check that two GeoClaw-formatted storm files are numerically equivalent. @@ -167,17 +380,29 @@ def save_storm_test_data(output_dir): @pytest.mark.python -@pytest.mark.parametrize("file_format", ["ascii", "netcdf"]) +@pytest.mark.parametrize("file_format", + ["ascii", "netcdf", "netcdf_era5", "netcdf_nws13"]) def test_data_storm_roundtrip(file_format, tmp_path): - """Test round-trip read/write of data-driven storm metadata.""" + """Test round-trip read/write of data-driven storm metadata. + + Exercises: + - ``ascii`` : OWI/NWS12 ASCII pair (two PRE + WIN files) + - ``netcdf`` : generic NetCDF format with existing helper + - ``netcdf_era5``: ERA5-style CF NetCDF (valid_time/latitude/longitude, + u10/v10/msl) — verifies that write_data discovers and + records ERA5 variable and dimension names correctly + - ``netcdf_nws13``: NWS13-style OWI NetCDF (time/lat/lon, uwnd/vwnd/press, + pressure in mb) — verifies user-mapping path for + non-default variable names + """ storm_path = tmp_path / "test.storm" data_storm = storm.Storm() data_storm.time_offset = np.datetime64("2012-08-29") - data_storm.file_format = file_format data_storm.window_type = 1 data_storm.ramp_width = 3 if file_format == "ascii": + data_storm.file_format = "ascii" data_storm.file_paths = [ Path("storm_1.PRE"), Path("storm_1.WIN"), @@ -186,7 +411,9 @@ def test_data_storm_roundtrip(file_format, tmp_path): ] data_storm.write(storm_path, file_format="data") read_storm = storm.Storm(storm_path, file_format="data") - else: + + elif file_format == "netcdf": + data_storm.file_format = "netcdf" data_storm.file_paths = [tmp_path / "storm.nc"] create_netcdf_storm_file(data_storm.file_paths[0]) data_storm.write( @@ -196,6 +423,58 @@ def test_data_storm_roundtrip(file_format, tmp_path): ) read_storm = storm.Storm(storm_path, file_format="data") + elif file_format == "netcdf_era5": + # ERA5-style: dims (valid_time, latitude, longitude), + # vars (u10, v10, msl). Dimension "t" requires a user_mapping because + # "valid_time" is not in the default "t" fallback list ["t", "time"]. + # + # TODO: util.get_netcdf_names and netcdf_utils.NetCDFInterrogator both + # implement dimension/variable discovery. These parallel paths should + # eventually be unified; for now write_data uses util.get_netcdf_names. + pytest.importorskip("netCDF4") + data_storm.file_format = "netcdf" + data_storm.file_paths = [tmp_path / "era5.nc"] + create_era5_storm_file(data_storm.file_paths[0]) + data_storm.write( + storm_path, + file_format="data", + dim_mapping={"t": "valid_time"}, + ) + read_storm = storm.Storm(storm_path, file_format="data") + + # Also verify the descriptor body contains the correct names. + desc_text = storm_path.read_text() + assert "longitude latitude valid_time" in desc_text + assert "u10 v10 msl" in desc_text + + elif file_format == "netcdf_nws13": + # NWS13-style: dims (time, lat, lon), vars (uwnd, vwnd, press, mb). + # Dimension names resolve automatically; variable names require an + # explicit user_mapping because "uwnd"/"vwnd"/"press" are not in the + # default fallback lists. + # + # TODO: util.get_netcdf_names and netcdf_utils.MetInterrogator both + # implement variable discovery. These parallel paths should eventually + # be unified; for now write_data uses util.get_netcdf_names. + pytest.importorskip("netCDF4") + data_storm.file_format = "nws13" + data_storm.file_paths = [tmp_path / "nws13.nc"] + create_nws13_storm_file(data_storm.file_paths[0]) + data_storm.write( + storm_path, + file_format="data", + var_mapping={ + "wind_u": "uwnd", + "wind_v": "vwnd", + "pressure": "press", + }, + ) + read_storm = storm.Storm(storm_path, file_format="data") + + desc_text = storm_path.read_text() + assert "lon lat time" in desc_text + assert "uwnd vwnd press" in desc_text + assert data_storm.time_offset == read_storm.time_offset assert data_storm.file_format in DATA_FILE_FORMAT_MAP[read_storm.file_format] assert data_storm.window_type == read_storm.window_type @@ -227,6 +506,133 @@ def test_netcdf_var_mapping(tmp_path): assert var_mapping == {"wind_u": "u", "wind_v": "v", "pressure": "pressure"} +@pytest.mark.python +def test_netcdf_var_mapping_era5(tmp_path): + """Test ERA5-style CF NetCDF dimension and variable name discovery. + + ERA5 is a common source of met forcing for GeoClaw storm surge simulations. + This test verifies that ``util.get_netcdf_names`` correctly maps ERA5 + dimension names (``valid_time``, ``latitude``, ``longitude``) and variable + names (``u10``, ``v10``, ``msl``) to the GeoClaw roles ``t``/``x``/``y`` + and ``wind_u``/``wind_v``/``pressure``. + + ``valid_time`` requires an explicit ``user_mapping`` because it is not in + the default "t" fallback list ``["t", "time"]`` used by + ``util.get_netcdf_names``. ``u10``, ``v10``, and ``msl`` are resolved + automatically from the default variable fallback lists. + + TODO: ``util.get_netcdf_names`` and + ``netcdf_utils.NetCDFInterrogator._find_coord_name`` both implement + dimension/variable discovery (the latter is CF-first via axis/standard_name + attributes). These parallel implementations should eventually be unified; + they are kept separate for now and flagged here for future consolidation. + """ + pytest.importorskip("netCDF4") + + nc_path = tmp_path / "era5.nc" + create_era5_storm_file(nc_path) + + # "valid_time" is not in the default "t" fallback list; must be specified. + dim_mapping = util.get_netcdf_names( + nc_path, + lookup_type="dim", + user_mapping={"t": "valid_time"}, + ) + # u10, v10, msl are in the default variable fallback lists. + var_mapping = util.get_netcdf_names( + nc_path, + lookup_type="var", + ) + + assert dim_mapping["x"] == "longitude" + assert dim_mapping["y"] == "latitude" + assert dim_mapping["t"] == "valid_time" + assert var_mapping["wind_u"] == "u10" + assert var_mapping["wind_v"] == "v10" + assert var_mapping["pressure"] == "msl" + + +@pytest.mark.python +def test_netcdf_var_mapping_nws13(tmp_path): + """Test NWS13/OWI-NetCDF dimension and variable name discovery. + + NWS13 is the OWI NetCDF met-forcing format used by ADCIRC and GeoClaw. + This test verifies that ``util.get_netcdf_names`` correctly maps NWS13 + dimension names (``lon``, ``lat``, ``time``) and variable names + (``uwnd``, ``vwnd``, ``press``) when an explicit ``user_mapping`` is + provided for the variable names. + + ``lon``, ``lat``, and ``time`` are resolved automatically from the default + dimension fallback lists. ``uwnd``, ``vwnd``, and ``press`` are NOT in + the default variable fallback lists and require an explicit mapping. + + TODO: ``util.get_netcdf_names`` and + ``netcdf_utils.MetInterrogator`` (variable unit / role discovery) both + implement variable-name lookup. These parallel implementations should + eventually be unified; they are kept separate for now and flagged here for + future consolidation. + """ + pytest.importorskip("netCDF4") + + nc_path = tmp_path / "nws13.nc" + create_nws13_storm_file(nc_path) + + # lon, lat, time are auto-discovered from the default fallback lists. + dim_mapping = util.get_netcdf_names( + nc_path, + lookup_type="dim", + ) + # uwnd, vwnd, press require an explicit user_mapping. + var_mapping = util.get_netcdf_names( + nc_path, + lookup_type="var", + user_mapping={ + "wind_u": "uwnd", + "wind_v": "vwnd", + "pressure": "press", + }, + ) + + assert dim_mapping["x"] == "lon" + assert dim_mapping["y"] == "lat" + assert dim_mapping["t"] == "time" + assert var_mapping["wind_u"] == "uwnd" + assert var_mapping["wind_v"] == "vwnd" + assert var_mapping["pressure"] == "press" + + +@pytest.mark.python +@pytest.mark.skip( + reason=( + "WRF string-time axis and curvilinear grid require MetPreprocessor, " + "not yet implemented" + ) +) +def test_netcdf_wrf_stub(tmp_path): + """Stub for future WRF NetCDF met-forcing test (currently skipped). + + WRF output files differ from ERA5 and NWS13 in two ways that require + pre-processing before the GeoClaw Fortran reader can consume them: + + 1. **String time axis**: WRF stores time as ``char Times(Time, DateStrLen)`` + (ISO 8601 strings) rather than a numeric variable. GeoClaw's Fortran + reader expects integer seconds; a ``MetPreprocessor`` step is needed to + decode and convert the time axis. + + 2. **Curvilinear grid**: WRF uses a map-projected (Lambert conformal, + polar stereographic, or Mercator) curvilinear grid described by 2-D + ``XLAT``/``XLONG`` arrays rather than 1-D coordinate variables. + GeoClaw's reader expects regular (lon, lat) grids; regridding to a + regular lat/lon grid is required before use. + + Once a ``MetPreprocessor`` class (or equivalent) is implemented to handle + these two cases, this test slot should be filled in with a synthetic WRF + file (minimal curvilinear grid, string time) and an end-to-end round-trip + assertion. + """ + pass # placeholder — see docstring for implementation notes + + @pytest.mark.python @pytest.mark.parametrize( "speeds_knots, expected_categories", From 4bd2ebe2d9577cc1c413a8ef322f0405e46ffe4f Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Tue, 14 Apr 2026 11:26:52 -0400 Subject: [PATCH 06/24] Properly mark netcdf Chile 2010 tests --- examples/tsunami/chile2010/test_chile2010.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/examples/tsunami/chile2010/test_chile2010.py b/examples/tsunami/chile2010/test_chile2010.py index 7b49e4ecb..d7e1f1791 100644 --- a/examples/tsunami/chile2010/test_chile2010.py +++ b/examples/tsunami/chile2010/test_chile2010.py @@ -11,8 +11,6 @@ import pytest -import numpy as np - import clawpack.geoclaw.test as test import clawpack.geoclaw.topotools as topotools import clawpack.clawutil.util as clawutil @@ -21,7 +19,8 @@ def _get_topography(runner, variant=0): """Get topography and read""" # Ensure that the topography file is available - maketopo = clawutil.fullpath_import("maketopo.py", module_name="maketopo") + maketopo = clawutil.fullpath_import(runner.test_path / "maketopo.py", + module_name="maketopo") topo_path = maketopo.get_topo() # Read in original topography data and write out in format expected by test @@ -153,12 +152,16 @@ def _write_netcdf_variant(topo, variant, output_path): from clawpack.geoclaw.netcdf_utils import TopoInterrogator return TopoInterrogator(output_path, 'elevation').interrogate_topo() +CASES = [pytest.param("standard", id="standard"), + pytest.param("netcdf_topotools", id="netcdf_topotools", marks=pytest.mark.netcdf), + pytest.param("lat_flip", id="lat_flip", marks=pytest.mark.netcdf), + pytest.param("lon_360", id="lon_360", marks=pytest.mark.netcdf)] @pytest.mark.regression @pytest.mark.remote @pytest.mark.tsunami -@pytest.mark.parametrize("variant", ["standard", "netcdf_topotools", "lat_flip", "lon_360"]) -def test_chile2010(tmp_path: Path, save: bool, variant: str): +@pytest.mark.parametrize("variant", CASES) +def test_chile2010(tmp_path: Path, save: bool, variant: dict): """Chile 2010 tsunami regression test for GeoClaw. Parametrized over topography file format. Because every variant encodes From 58d02e064caa7f46a96db93a0003caf0fc72a77b Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Tue, 14 Apr 2026 15:09:13 -0400 Subject: [PATCH 07/24] Fix Makefile LFLAGS in bowl-slosh-netcdf example --- examples/tsunami/bowl-slosh-netcdf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tsunami/bowl-slosh-netcdf/Makefile b/examples/tsunami/bowl-slosh-netcdf/Makefile index 7d99edd32..f78a9068f 100644 --- a/examples/tsunami/bowl-slosh-netcdf/Makefile +++ b/examples/tsunami/bowl-slosh-netcdf/Makefile @@ -26,7 +26,7 @@ ifeq ($(HAVE_NETCDF), no) $(error This example requires NetCDF support, but none was found.) endif FFLAGS += -DNETCDF $(NETCDF_FFLAGS) -LFLAGS += $(NETCDF_LFLAGS) +LFLAGS += $(NETCDF_LFLAGS) $(FFLAGS) # --------------------------------- # package sources for this program: From 810eb1f5cad9a085c21b0579c8a61095fe8cf8d6 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Wed, 15 Apr 2026 15:10:42 -0400 Subject: [PATCH 08/24] Put fencing up around xarray possible use of dask This allows to use dask if it is available, but then uses the generic NetCDF backend if it is not and does not do lazy loading. This is probably not generally an issue for any use cases I have seen. --- src/python/geoclaw/netcdf_utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/python/geoclaw/netcdf_utils.py b/src/python/geoclaw/netcdf_utils.py index 2a0b9a02d..ece7afddd 100644 --- a/src/python/geoclaw/netcdf_utils.py +++ b/src/python/geoclaw/netcdf_utils.py @@ -166,10 +166,16 @@ def __init__( ) -> None: self.path = Path(path) self.crop_bounds = crop_bounds - # Open with chunks={} to activate Dask-lazy mode. + # Activate Dask-lazy chunking if dask is available; fall back to + # netCDF4 native lazy loading so dask is an optional dependency. + try: + import dask # noqa: F401 + chunks: dict | str = {} + except ImportError: + chunks = None # mask_and_scale=True (default) so fill values appear as NaN. self.ds: xr.Dataset = xr.open_dataset( - self.path, chunks={}, mask_and_scale=True + self.path, chunks=chunks, mask_and_scale=True ) # ------------------------------------------------------------------ From cbd295f8adea2a972dba32cad072b574038a7eb7 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Wed, 15 Apr 2026 15:27:05 -0400 Subject: [PATCH 09/24] Add generation of dtopo to chile2010 test CI testing revealed that of course dtopo was not being generated for this test. Written to allow future testing of dtopo variants as well. --- examples/tsunami/chile2010/maketopo.py | 6 ++++-- examples/tsunami/chile2010/test_chile2010.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/examples/tsunami/chile2010/maketopo.py b/examples/tsunami/chile2010/maketopo.py index 9267457c0..8e40d6aab 100644 --- a/examples/tsunami/chile2010/maketopo.py +++ b/examples/tsunami/chile2010/maketopo.py @@ -44,7 +44,7 @@ def get_topo(path=scratch_dir, makeplots=False): -def make_dtopo(makeplots=False): +def make_dtopo(path=scratch_dir, makeplots=False): """ Create dtopo data file for deformation of sea floor due to earthquake. Uses the Okada model with fault parameters and mesh specified below. @@ -52,7 +52,7 @@ def make_dtopo(makeplots=False): from clawpack.geoclaw import dtopotools import numpy - dtopo_fname = os.path.join(scratch_dir, "dtopo_usgs100227.tt3") + dtopo_fname = os.path.join(path, "dtopo_usgs100227.tt3") # Specify subfault parameters for this simple fault model consisting # of a single subfault: @@ -109,6 +109,8 @@ def make_dtopo(makeplots=False): plt.savefig(fname) print("Created ",fname) + return dtopo_fname + if __name__=='__main__': get_topo(False) diff --git a/examples/tsunami/chile2010/test_chile2010.py b/examples/tsunami/chile2010/test_chile2010.py index d7e1f1791..86ceb1257 100644 --- a/examples/tsunami/chile2010/test_chile2010.py +++ b/examples/tsunami/chile2010/test_chile2010.py @@ -13,15 +13,17 @@ import clawpack.geoclaw.test as test import clawpack.geoclaw.topotools as topotools +import clawpack.geoclaw.dtopotools as dtopotools import clawpack.clawutil.util as clawutil def _get_topography(runner, variant=0): """Get topography and read""" - + # Ensure that the topography file is available maketopo = clawutil.fullpath_import(runner.test_path / "maketopo.py", module_name="maketopo") topo_path = maketopo.get_topo() + dtopo_path = maketopo.make_dtopo() # Read in original topography data and write out in format expected by test topo = topotools.Topography() @@ -29,7 +31,13 @@ def _get_topography(runner, variant=0): topo.path = topo_path topo.read() - return topo + # Read in dtopography data + dtopo = dtopotools.DTopography() + dtopo.dtopo_type = 3 + dtopo.path = dtopo_path + dtopo.read() + + return topo, dtopo def _write_netcdf_variant(topo, variant, output_path): @@ -190,7 +198,7 @@ def test_chile2010(tmp_path: Path, save: bool, variant: dict): runner = test.GeoClawTestRunner(tmp_path, test_path=Path(__file__).parent) # Topography - topo = _get_topography(runner) + topo, dtopo = _get_topography(runner) # Setup data for test runner.set_data() From 2f56a1e3d10ddc4c01f267b290f311d24ac3f55e Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Fri, 17 Apr 2026 19:28:38 -0400 Subject: [PATCH 10/24] Implement better support for netCDF met forcing This allows for more ways to specify variables and remove the duplicative aspects of the met forcing that was present. Much more robust time handling as well without assuming epochs. --- src/2d/shallow/surge/data_storm_module.f90 | 154 ++++++++++++++++++--- src/python/geoclaw/netcdf_utils.py | 1 - src/python/geoclaw/surge/storm.py | 153 ++++++++++++++++---- 3 files changed, 263 insertions(+), 45 deletions(-) diff --git a/src/2d/shallow/surge/data_storm_module.f90 b/src/2d/shallow/surge/data_storm_module.f90 index 278328d73..8a480009e 100644 --- a/src/2d/shallow/surge/data_storm_module.f90 +++ b/src/2d/shallow/surge/data_storm_module.f90 @@ -94,6 +94,13 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) integer :: nc_fid, num_dims, num_vars, var_id character(len=16) :: name, dim_names(3), var_names(3) + ! NetCDF descriptor parsing (format 2) + real(kind=8) :: nc_time_offset + integer :: lon_convention + character(len=512) :: nc_line, nc_key, nc_val + integer :: nc_eq_pos, nc_in_file_info + integer :: nc_vn_start, nc_vn_end, nc_role_start, nc_role_end + if (.not.module_setup) then ! Open data file print *,'Reading storm data file ', storm_data_path @@ -138,17 +145,99 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) case(1) ! ASCII/OWI read(data_unit, *) read(data_unit, *) - case(2) ! NetCDF - read(data_unit, *) - read(data_unit, *) dim_names - read(data_unit, *) var_names - read(data_unit, *) - write(log_unit, "('dims = ',a)") dim_names - write(log_unit, "('vars = ',a)") var_names + case(2) ! NetCDF - &file_info / &variable_info namelist format + read(data_unit, *) ! "# Format Data Information" + nc_time_offset = 0.0d0 + lon_convention = 180 + nc_in_file_info = 0 + dim_names = '' + var_names = '' + + do + read(data_unit, '(a)', iostat=io_status) nc_line + if (io_status /= 0) exit + nc_line = adjustl(nc_line) + + ! Stop at "# File paths" — put it back for post-case read + if (nc_line(1:12) == '# File paths') then + backspace(data_unit) + exit + end if + + ! Start of &file_info block + if (nc_line(1:10) == '&file_info') then + nc_in_file_info = 1 + cycle + end if + + ! End of block (standalone /) + if (trim(nc_line) == '/') then + nc_in_file_info = 0 + cycle + end if + + ! &variable_info inline block + ! Format: &variable_info var_name=u10 geoclaw_role=wind_u / + if (nc_line(1:14) == '&variable_info') then + nc_vn_start = index(nc_line, 'var_name=') + 9 + nc_vn_end = nc_vn_start + do while (nc_vn_end <= len_trim(nc_line) .and. & + nc_line(nc_vn_end:nc_vn_end) /= ' ' .and. & + nc_line(nc_vn_end:nc_vn_end) /= '/') + nc_vn_end = nc_vn_end + 1 + end do + nc_val = nc_line(nc_vn_start:nc_vn_end-1) + + nc_role_start = index(nc_line, 'geoclaw_role=') + 13 + nc_role_end = nc_role_start + do while (nc_role_end <= len_trim(nc_line) .and. & + nc_line(nc_role_end:nc_role_end) /= ' ' .and. & + nc_line(nc_role_end:nc_role_end) /= '/') + nc_role_end = nc_role_end + 1 + end do + nc_key = nc_line(nc_role_start:nc_role_end-1) + + select case(trim(nc_key)) + case('wind_u') + var_names(1) = trim(nc_val) + case('wind_v') + var_names(2) = trim(nc_val) + case('pressure') + var_names(3) = trim(nc_val) + end select + cycle + end if + + ! key = value lines inside &file_info + if (nc_in_file_info == 1) then + nc_eq_pos = index(nc_line, '=') + if (nc_eq_pos > 0) then + nc_key = adjustl(trim(nc_line(1:nc_eq_pos-1))) + nc_val = adjustl(trim(nc_line(nc_eq_pos+1:))) + select case(trim(nc_key)) + case('lon_name') + dim_names(1) = trim(nc_val) + case('lat_name') + dim_names(2) = trim(nc_val) + case('time_name') + dim_names(3) = trim(nc_val) + case('lon_convention') + read(nc_val, *) lon_convention + case('time_offset') + read(nc_val, *) nc_time_offset + end select + end if + end if + end do + + write(log_unit, "('dims = ',3(1x,a))") dim_names + write(log_unit, "('vars = ',3(1x,a))") var_names + write(log_unit, "('lon_convention = ',i4)") lon_convention + write(log_unit, "('nc_time_offset = ',d16.8)") nc_time_offset if (DEBUG) then - print "('dims = ',a)", dim_names - print "('vars = ',a)", var_names + print *, "dims = ", dim_names + print *, "vars = ", var_names end if case default print *, "Storm data format", file_format," not available." @@ -252,17 +341,42 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) call check_netcdf_error(nf90_inq_varid(nc_fid, dim_names(3), var_ID)) call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%time)) - ! Convert time to seconds from offset - storm%time(:) = storm%time(:) - seconds_from_epoch(time(:, 1)) - - ! Wind - call check_netcdf_error(nf90_inq_varid(nc_fid, var_names(1), var_ID)) - call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_u)) - call check_netcdf_error(nf90_inq_varid(nc_fid, var_names(2), var_ID)) - call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_v)) - ! ! Pressure - call check_netcdf_error(nf90_inq_varid(nc_fid, var_names(3), var_ID)) - call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%pressure)) + ! Normalize [0,360] longitude to [-180,180] + if (lon_convention == 360) then + where (storm%longitude > 180.0d0) + storm%longitude = storm%longitude - 360.0d0 + end where + end if + + ! nc_time_offset = (first_file_time - storm_offset) in s. + ! Subtracting raw[0] handles files where time is stored as + ! absolute seconds (e.g. Unix epoch) rather than relative. + storm%time(:) = storm%time(:) - storm%time(1) & + + nint(nc_time_offset) + + ! Wind (optional — zero-fill if not in descriptor) + if (len_trim(var_names(1)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_fid, trim(var_names(1)), var_ID)) + call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_u)) + else + write(log_unit, *) "Warning: wind_u not in descriptor, setting to zero." + storm%wind_u = 0.0d0 + end if + if (len_trim(var_names(2)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_fid, trim(var_names(2)), var_ID)) + call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%wind_v)) + else + write(log_unit, *) "Warning: wind_v not in descriptor, setting to zero." + storm%wind_v = 0.0d0 + end if + ! Pressure + if (len_trim(var_names(3)) > 0) then + call check_netcdf_error(nf90_inq_varid(nc_fid, trim(var_names(3)), var_ID)) + call check_netcdf_error(nf90_get_var(nc_fid, var_ID, storm%pressure)) + else + write(log_unit, *) "Warning: pressure not in descriptor, setting to zero." + storm%pressure = 0.0d0 + end if ! Close file to stop corrupting the netcdf files call check_netcdf_error(nf90_close(nc_fid)) diff --git a/src/python/geoclaw/netcdf_utils.py b/src/python/geoclaw/netcdf_utils.py index ece7afddd..287444373 100644 --- a/src/python/geoclaw/netcdf_utils.py +++ b/src/python/geoclaw/netcdf_utils.py @@ -1012,7 +1012,6 @@ def write_met_descriptor(f, meta: 'MetMetadata') -> None: Output of ``MetInterrogator.interrogate_met()``. """ f.write("&file_info\n") - f.write(f" source_file = {meta.source_file}\n") f.write(f" lon_name = {meta.lon_name}\n") f.write(f" lat_name = {meta.lat_name}\n") f.write(f" time_name = {meta.time_name}\n") diff --git a/src/python/geoclaw/surge/storm.py b/src/python/geoclaw/surge/storm.py index 2ae850f56..c1e30d796 100644 --- a/src/python/geoclaw/surge/storm.py +++ b/src/python/geoclaw/surge/storm.py @@ -226,6 +226,17 @@ def __init__(self, path=None, file_format="ATCF", **kwargs): self.window = None # If not provided for data files it # will be set to the file extents + # NetCDF met-forcing metadata (populated by read_data for file_format==2) + self.met_lon_name = None + self.met_lat_name = None + self.met_time_name = None + self.met_lon_convention = None + self.met_lat_order = None + self.met_fill_value = None + self.met_fill_action = None + self.met_time_offset = None + self.met_variable_map = {} + if path is not None: self.read(path, file_format=file_format, **kwargs) @@ -948,15 +959,64 @@ def read_data(self, path: Path, verbose: bool=False): data_file.readline() data_file.readline() - # We do not keep track of any of the format specific information if self.file_format == 1: + # Skip blank line then "# File paths" comment data_file.readline() data_file.readline() elif self.file_format == 2: - data_file.readline() - data_file.readline() - data_file.readline() - data_file.readline() + # Parse the &file_info / &variable_info descriptor produced + # by DescriptorWriter.write_met_descriptor. + file_info = {} + variable_infos = [] + + # Read &file_info block (lines until standalone "/") + line = data_file.readline() + if line.strip().startswith('&file_info'): + while True: + inner = data_file.readline() + if not inner: + break + s = inner.strip() + if s == '/': + break + if '=' in s and not s.startswith('&'): + key, _, val = s.partition('=') + file_info[key.strip()] = val.strip() + + # Read &variable_info lines until "# File paths" terminator + while True: + line = data_file.readline() + if not line: + break + s = line.strip() + if s.startswith('# File paths'): + break + if s.startswith('&variable_info'): + var_info = {} + content = s[len('&variable_info'):].rstrip('/').strip() + for pair in content.split(): + k, _, v = pair.partition('=') + if k and v: + var_info[k] = v + variable_infos.append(var_info) + + # Store fields on Storm object for Fortran to consume + self.met_lon_name = file_info.get('lon_name') + self.met_lat_name = file_info.get('lat_name') + self.met_time_name = file_info.get('time_name') + _conv = file_info.get('lon_convention') + self.met_lon_convention = int(_conv) if _conv is not None else None + self.met_lat_order = file_info.get('lat_order') + _fv = file_info.get('fill_value') + self.met_fill_value = float(_fv) if _fv is not None else None + self.met_fill_action = file_info.get('fill_action', 'warn') + _to = file_info.get('time_offset') + self.met_time_offset = float(_to) if _to is not None else None + self.met_variable_map = { + vi['geoclaw_role']: vi['var_name'] + for vi in variable_infos + if 'geoclaw_role' in vi and 'var_name' in vi + } else: raise TypeError(f"Unknown storm data file format type" + f" '{self.file_format}' provided.") @@ -1269,7 +1329,8 @@ def write_tcvitals(self, path, verbose=False): "implemented yet but is planned for a ", "future release.")) - def write_data(self, path, dim_mapping=None, var_mapping=None, verbose=False): + def write_data(self, path, dim_mapping=None, var_mapping=None, + met_interrogator=None, verbose=False): r""" """ @@ -1337,29 +1398,73 @@ def write_data(self, path, dim_mapping=None, var_mapping=None, verbose=False): "resolution provided.") data_file.write("\n") elif file_format == 2: - # Get dimension mapping - _dim_mapping = util.get_netcdf_names(self.file_paths[0], - lookup_type='dim', - user_mapping=dim_mapping, - verbose=verbose) - - # Get variable mapping - _var_mapping = util.get_netcdf_names(self.file_paths[0], - lookup_type='var', - user_mapping=var_mapping, - verbose=verbose) - data_file.write(f"{str(_dim_mapping['x'])} ") - data_file.write(f"{str(_dim_mapping['y'])} ") - data_file.write(f"{str(_dim_mapping['t'])}\n") - data_file.write(f"{str(_var_mapping['wind_u'])} ") - data_file.write(f"{str(_var_mapping['wind_v'])} ") - data_file.write(f"{str(_var_mapping['pressure'])}\n") - data_file.write("\n") + # Variable discovery: get_netcdf_names auto-discovers met + # roles from standard name lists, then any explicit var_mapping + # entries override specific roles. MetInterrogator then applies + # CF-aware coordinate discovery, unit validation, lon-convention + # detection, and fill-value resolution. + # TODO: consolidate get_netcdf_names with MetInterrogator if len(self.file_paths) != 1: raise ValueError(f"Expected 1 path for NetCDF format, " + f"got {len(self.file_paths)}") + from clawpack.geoclaw.netcdf_utils import ( + MetInterrogator, DescriptorWriter) + + if met_interrogator is None: + # Auto-discover met variable names, then apply explicit + # overrides from var_mapping. This lets callers supply + # only the non-standard names (e.g. a non-standard pressure + # field) while standard wind names are still found + # automatically. dim_mapping is handled automatically by + # MetInterrogator via CF axis/standard_name conventions; it + # is accepted for backwards compatibility but not forwarded. + _MET_ROLES = frozenset({'wind_u', 'wind_v', 'pressure'}) + variable_map = { + role: name + for role, name in util.get_netcdf_names( + self.file_paths[0], + lookup_type='var', + verbose=verbose, + ).items() + if role in _MET_ROLES + } + + if var_mapping is not None: + # Validate each user-specified name exists before + # merging to catch typos early with a clear message. + import xarray as xr + with xr.open_dataset(str(self.file_paths[0])) as _ds: + _available = set(_ds.data_vars) | set(_ds.coords) + _bad = {role: name + for role, name in var_mapping.items() + if name not in _available} + if _bad: + raise ValueError( + "The following names in var_mapping were not " + f"found in {self.file_paths[0]}:\n" + + "\n".join( + f" '{role}': '{name}'" + for role, name in _bad.items()) + + f"\nAvailable variables: " + + str(sorted(_available)) + ) + variable_map.update(var_mapping) + + with MetInterrogator(self.file_paths[0], + variable_map=variable_map, + time_reference=self.time_offset) as mi: + meta = mi.interrogate_met() + else: + meta = met_interrogator.interrogate_met() + + # Note: storm.window governs the Fortran forcing application + # domain (with ramp width); MetInterrogator.crop_bounds is a + # read-time spatial subset of the NetCDF file. These are + # conceptually distinct and should remain independent. + DescriptorWriter.write_met_descriptor(data_file, meta) + # Write paths data_file.write("# File paths\n") for path in self.file_paths: From e36657faf8df701b86ed298b11231829dc10629d Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Fri, 17 Apr 2026 20:02:12 -0400 Subject: [PATCH 11/24] Modify and add tests for new netCDF met forcing --- tests/netcdf/test_descriptor_writer.py | 7 ++--- tests/test_storm.py | 40 ++++++++++++++------------ 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/tests/netcdf/test_descriptor_writer.py b/tests/netcdf/test_descriptor_writer.py index fd908bdbf..beb743bec 100644 --- a/tests/netcdf/test_descriptor_writer.py +++ b/tests/netcdf/test_descriptor_writer.py @@ -250,9 +250,8 @@ def test_met_descriptor_file_info_block_present(met_file_factory): def test_met_descriptor_file_info_required_keys(met_file_factory): """ - &file_info block contains all mandatory keys: source_file, lon_name, - lat_name, time_name, dim_order, lon_convention, lat_order, - fill_action, time_offset. + &file_info block contains all mandatory keys: lon_name, lat_name, + time_name, dim_order, lon_convention, lat_order, fill_action, time_offset. """ meta = _get_met_meta(met_file_factory) buf = io.StringIO() @@ -260,7 +259,7 @@ def test_met_descriptor_file_info_required_keys(met_file_factory): parsed = _parse_met_descriptor(buf.getvalue()) fi = parsed["file_info"] - for key in ("source_file", "lon_name", "lat_name", "time_name", + for key in ("lon_name", "lat_name", "time_name", "dim_order", "lon_convention", "lat_order", "fill_action", "time_offset"): assert key in fi, f"Missing key '{key}' in &file_info block" diff --git a/tests/test_storm.py b/tests/test_storm.py index b6771acb5..7aa7c57dd 100644 --- a/tests/test_storm.py +++ b/tests/test_storm.py @@ -425,12 +425,9 @@ def test_data_storm_roundtrip(file_format, tmp_path): elif file_format == "netcdf_era5": # ERA5-style: dims (valid_time, latitude, longitude), - # vars (u10, v10, msl). Dimension "t" requires a user_mapping because - # "valid_time" is not in the default "t" fallback list ["t", "time"]. - # - # TODO: util.get_netcdf_names and netcdf_utils.NetCDFInterrogator both - # implement dimension/variable discovery. These parallel paths should - # eventually be unified; for now write_data uses util.get_netcdf_names. + # vars (u10, v10, msl). write_data uses MetInterrogator, which + # discovers "valid_time" via CF axis='T' automatically; dim_mapping + # is accepted for backwards compatibility but not required. pytest.importorskip("netCDF4") data_storm.file_format = "netcdf" data_storm.file_paths = [tmp_path / "era5.nc"] @@ -442,20 +439,21 @@ def test_data_storm_roundtrip(file_format, tmp_path): ) read_storm = storm.Storm(storm_path, file_format="data") - # Also verify the descriptor body contains the correct names. + # Verify the descriptor body contains the correct coordinate and + # variable names in the new &file_info / &variable_info format. desc_text = storm_path.read_text() - assert "longitude latitude valid_time" in desc_text - assert "u10 v10 msl" in desc_text + assert "lon_name = longitude" in desc_text + assert "lat_name = latitude" in desc_text + assert "time_name = valid_time" in desc_text + assert "var_name=u10" in desc_text + assert "var_name=v10" in desc_text + assert "var_name=msl" in desc_text elif file_format == "netcdf_nws13": # NWS13-style: dims (time, lat, lon), vars (uwnd, vwnd, press, mb). - # Dimension names resolve automatically; variable names require an - # explicit user_mapping because "uwnd"/"vwnd"/"press" are not in the - # default fallback lists. - # - # TODO: util.get_netcdf_names and netcdf_utils.MetInterrogator both - # implement variable discovery. These parallel paths should eventually - # be unified; for now write_data uses util.get_netcdf_names. + # write_data uses MetInterrogator; variable names require an explicit + # var_mapping because "uwnd"/"vwnd"/"press" are not in the default + # fallback lists used by get_netcdf_names. pytest.importorskip("netCDF4") data_storm.file_format = "nws13" data_storm.file_paths = [tmp_path / "nws13.nc"] @@ -471,9 +469,15 @@ def test_data_storm_roundtrip(file_format, tmp_path): ) read_storm = storm.Storm(storm_path, file_format="data") + # Verify the descriptor body contains the correct coordinate and + # variable names in the new &file_info / &variable_info format. desc_text = storm_path.read_text() - assert "lon lat time" in desc_text - assert "uwnd vwnd press" in desc_text + assert "lon_name = lon" in desc_text + assert "lat_name = lat" in desc_text + assert "time_name = time" in desc_text + assert "var_name=uwnd" in desc_text + assert "var_name=vwnd" in desc_text + assert "var_name=press" in desc_text assert data_storm.time_offset == read_storm.time_offset assert data_storm.file_format in DATA_FILE_FORMAT_MAP[read_storm.file_format] From cc82fcf707bc28752b1c363f764f2dbc417cbb51 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Sun, 19 Apr 2026 11:48:08 -0400 Subject: [PATCH 12/24] Fix isaac tests for new netCDF met forcing input --- examples/storm-surge/isaac/test_isaac.py | 84 +++++++++++++++++++----- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/examples/storm-surge/isaac/test_isaac.py b/examples/storm-surge/isaac/test_isaac.py index 789b1fc46..f1a4311f1 100644 --- a/examples/storm-surge/isaac/test_isaac.py +++ b/examples/storm-surge/isaac/test_isaac.py @@ -282,21 +282,24 @@ def _make_isaac_netcdf(fmt: str, tmp_path: Path, test_path: Path) -> Path: time_offset = np.datetime64("2012-08-29T00:00:00") - # The storm time axis in the descriptor is relative to time_offset; - # the NetCDF stores absolute UNIX seconds. The Fortran will subtract - # seconds_from_epoch(time_offset) from the raw values. - isaac = Storm() isaac.time_offset = time_offset if fmt == "netcdf_era5": nc_path = tmp_path / "isaac_era5.nc" + # create_era5_storm_file uses "seconds since 2012-08-29" as the time + # epoch (see time_epoch_str in test_storm.py). Convert Unix seconds + # to seconds relative to that epoch so xarray decodes them correctly + # and MetInterrogator computes the right nc_time_offset. + time_offset_s = int( + (time_offset - unix_epoch) / np.timedelta64(1, "s") + ) create_era5_storm_file( nc_path, pressure_fields=pressure_pa, u_fields=u_wind, v_fields=v_wind, - times=times_unix, + times=times_unix - time_offset_s, lon=lon, lat=lat, ) @@ -347,8 +350,9 @@ def _check_netcdf_storm_descriptor(generated_path: Path, fmt: str) -> None: """Validate structure of a NetCDF data-storm descriptor file. Checks that ``generated_path`` contains the correct format number (2), - the expected dimension names, the expected variable names, and exactly - one file path entry. + the expected coordinate names in the ``&file_info`` block, the expected + variable/role pairs in ``&variable_info`` blocks, and exactly one file + path entry. This is a structural validation against known-good expected values for each format, not a comparison against a committed regression file (which @@ -384,23 +388,67 @@ def _check_netcdf_storm_descriptor(generated_path: Path, fmt: str) -> None: f"'# Format Data Information' section not found in {generated_path}" ) - # The dim-names line is immediately after the comment. - dim_line = lines[fmt_info_idx + 1] - var_line = lines[fmt_info_idx + 2] + # Parse &file_info block for coordinate names. + file_info: dict = {} + in_file_info = False + for ln in lines[fmt_info_idx:]: + stripped = ln.strip() + if stripped == "&file_info": + in_file_info = True + continue + if in_file_info: + if stripped == "/": + break + if "=" in stripped: + key, _, val = stripped.partition("=") + file_info[key.strip()] = val.strip() + + # Parse &variable_info blocks for role→var_name mapping. + var_info: dict = {} + for ln in lines[fmt_info_idx:]: + stripped = ln.strip() + if stripped.startswith("&variable_info"): + kv = dict(re.findall(r'(\w+)=(\S+)', stripped)) + if "geoclaw_role" in kv and "var_name" in kv: + var_info[kv["geoclaw_role"]] = kv["var_name"].rstrip("/").strip() if fmt == "netcdf_era5": - assert dim_line == "longitude latitude valid_time", ( - f"ERA5 dim line wrong: {dim_line!r}" + assert file_info.get("lon_name") == "longitude", ( + f"ERA5 lon_name wrong: {file_info.get('lon_name')!r}" + ) + assert file_info.get("lat_name") == "latitude", ( + f"ERA5 lat_name wrong: {file_info.get('lat_name')!r}" + ) + assert file_info.get("time_name") == "valid_time", ( + f"ERA5 time_name wrong: {file_info.get('time_name')!r}" ) - assert var_line == "u10 v10 msl", ( - f"ERA5 var line wrong: {var_line!r}" + assert var_info.get("wind_u") == "u10", ( + f"ERA5 wind_u var wrong: {var_info.get('wind_u')!r}" + ) + assert var_info.get("wind_v") == "v10", ( + f"ERA5 wind_v var wrong: {var_info.get('wind_v')!r}" + ) + assert var_info.get("pressure") == "msl", ( + f"ERA5 pressure var wrong: {var_info.get('pressure')!r}" ) elif fmt == "netcdf_nws13": - assert dim_line == "lon lat time", ( - f"NWS13 dim line wrong: {dim_line!r}" + assert file_info.get("lon_name") == "lon", ( + f"NWS13 lon_name wrong: {file_info.get('lon_name')!r}" + ) + assert file_info.get("lat_name") == "lat", ( + f"NWS13 lat_name wrong: {file_info.get('lat_name')!r}" + ) + assert file_info.get("time_name") == "time", ( + f"NWS13 time_name wrong: {file_info.get('time_name')!r}" + ) + assert var_info.get("wind_u") == "uwnd", ( + f"NWS13 wind_u var wrong: {var_info.get('wind_u')!r}" + ) + assert var_info.get("wind_v") == "vwnd", ( + f"NWS13 wind_v var wrong: {var_info.get('wind_v')!r}" ) - assert var_line == "uwnd vwnd press", ( - f"NWS13 var line wrong: {var_line!r}" + assert var_info.get("pressure") == "press", ( + f"NWS13 pressure var wrong: {var_info.get('pressure')!r}" ) else: raise ValueError(f"Unknown fmt={fmt!r}") From c04848d8561c128ffcf57d0cf2f7841d6e93bb09 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Thu, 7 May 2026 10:39:18 -0400 Subject: [PATCH 13/24] Remove hard dim and var requirements for data storms Replaced with simply checking for minimal dims and vars instead --- src/2d/shallow/surge/data_storm_module.f90 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/2d/shallow/surge/data_storm_module.f90 b/src/2d/shallow/surge/data_storm_module.f90 index 8a480009e..0a5cf2f35 100644 --- a/src/2d/shallow/surge/data_storm_module.f90 +++ b/src/2d/shallow/surge/data_storm_module.f90 @@ -302,7 +302,9 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) call check_netcdf_error(nf90_open(storm%paths(1), nf90_nowrite, nc_fid)) ! Check dim/var number call check_netcdf_error(nf90_inquire(nc_fid, num_dims, num_vars)) - if (num_dims /= 3 .and. num_vars /= 3) then + ! Check to make sure the mimimal number of dimansions and + ! variables are there to read in the data + if (num_dims < 3 .or. num_vars < 3) then print *, "Invalid number of dimensions/variables." print *, " num_dims = ", num_dims print *, " num_vars = ", num_vars From 7785bfcb247a14bc311ec1fc4b35440780c43518 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Thu, 7 May 2026 10:40:58 -0400 Subject: [PATCH 14/24] Remove the required bathy var name from topo interrogator Added a dict of common bathy variable names to fallback on if not provided, but still allows for override. --- src/python/geoclaw/netcdf_utils.py | 76 ++++++++++++++++++++++-- tests/netcdf/_helpers.py | 40 +++++++++++++ tests/netcdf/test_topo_interrogator.py | 82 ++++++++++++++++++++++++++ 3 files changed, 194 insertions(+), 4 deletions(-) diff --git a/src/python/geoclaw/netcdf_utils.py b/src/python/geoclaw/netcdf_utils.py index 287444373..c694326ec 100644 --- a/src/python/geoclaw/netcdf_utils.py +++ b/src/python/geoclaw/netcdf_utils.py @@ -410,8 +410,13 @@ class TopoInterrogator(NetCDFInterrogator): ---------- path : str or Path Path to the NetCDF topography file. - var_name : str - Name of the elevation/bathymetry variable (e.g. 'z', 'elevation'). + var_name : str, optional + Name of the elevation/bathymetry variable (e.g. ``'z'``, + ``'elevation'``). If omitted, the variable is auto-detected by + searching for known CF ``standard_name`` values + (``surface_altitude``, ``height_above_mean_sea_level``, …) and + then falling back to common names (``z``, ``elevation``, ``topo``, + …). A ``ValueError`` is raised if no match is found. crop_bounds : tuple of four floats, optional (lon_min, lon_max, lat_min, lat_max). """ @@ -419,12 +424,73 @@ class TopoInterrogator(NetCDFInterrogator): def __init__( self, path: str | Path, - var_name: str, + var_name: Optional[str] = None, crop_bounds: Optional[tuple[float, float, float, float]] = None, ) -> None: super().__init__(path, crop_bounds) self.var_name = var_name + # ------------------------------------------------------------------ + # Variable auto-detection + # ------------------------------------------------------------------ + + #: CF standard_names that indicate an elevation / bathymetry variable, + #: ordered from most specific to most generic. + _TOPO_CF_STANDARD_NAMES: tuple[str, ...] = ( + 'surface_altitude', + 'height_above_mean_sea_level', + 'height_above_reference_ellipsoid', + 'bedrock_altitude', + 'altitude', + 'height', + 'sea_floor_depth_below_geoid', + ) + + #: Common variable names for elevation / bathymetry data. + _TOPO_FALLBACK_NAMES: tuple[str, ...] = ( + 'z', 'Z', + 'elevation', 'Elevation', 'ELEVATION', + 'topo', 'TOPO', + 'height', 'HEIGHT', + 'altitude', 'ALTITUDE', + 'depth', 'DEPTH', + 'Band1', + 'dem', 'DEM', + 'topography', 'bathymetry', 'bathy', + 'bedrock', + ) + + def _find_topo_var_name(self) -> str: + """ + Auto-detect the elevation/bathymetry variable. + + Search order: + 1. CF ``standard_name`` attribute against known elevation standard names. + 2. Variable name against ``_TOPO_FALLBACK_NAMES`` (case-sensitive). + + Returns the matched variable name. Raises ValueError if nothing is found. + """ + # 1. CF standard_name lookup across all data variables + for std_name in self._TOPO_CF_STANDARD_NAMES: + for var_name, var in self.ds.data_vars.items(): + if var.attrs.get('standard_name', '') == std_name: + return str(var_name) + + # 2. Name-based fallback + available = {str(k) for k in self.ds.data_vars} + for name in self._TOPO_FALLBACK_NAMES: + if name in available: + return name + + raise ValueError( + f"Cannot auto-detect elevation variable in '{self.path}'. " + f"No data variable matched known CF standard_names " + f"{self._TOPO_CF_STANDARD_NAMES} or fallback names " + f"{self._TOPO_FALLBACK_NAMES}. " + f"Pass var_name explicitly. " + f"Available data variables: {sorted(available)}" + ) + # ------------------------------------------------------------------ # Unit verification # ------------------------------------------------------------------ @@ -531,13 +597,15 @@ def interrogate_topo(self) -> TopoMetadata: """ Fully interrogate the topo file and return a TopoMetadata instance. """ + if self.var_name is None: + self.var_name = self._find_topo_var_name() base = self.interrogate(self.var_name) source_units = self._check_topo_units() self._check_fill_in_crop(base.lon_name, base.lat_name, base.lat_order) return TopoMetadata( **dataclasses.asdict(base), - var_name=self.var_name, + var_name = self.var_name, source_units=source_units, fill_action='abort', # fill in topo crop region is always fatal ) diff --git a/tests/netcdf/_helpers.py b/tests/netcdf/_helpers.py index 7e12ca43a..8f61c61c0 100644 --- a/tests/netcdf/_helpers.py +++ b/tests/netcdf/_helpers.py @@ -35,6 +35,7 @@ def make_topo_dataset( has_fill_in_data: bool = False, lon_axis_attr: bool = False, lat_axis_attr: bool = False, + var_standard_name: Optional[str] = None, ) -> xr.Dataset: """ Build a minimal topo xarray Dataset. @@ -69,6 +70,8 @@ def make_topo_dataset( var_attrs: dict = {} if units: var_attrs["units"] = units + if var_standard_name is not None: + var_attrs["standard_name"] = var_standard_name coords = { lon_name: xr.DataArray(lons, dims=[lon_name], attrs=lon_attrs), @@ -247,3 +250,40 @@ def make_met_dataset( pytest.param({"wind_units": "m/s", "expected_source": "m/s"}, id="m_s"), pytest.param({"wind_units": "knots", "expected_source": "knots"}, id="knots"), ] + +#: CF standard_name values recognised by TopoInterrogator._find_topo_var_name, +#: in priority order. +TOPO_CF_STANDARD_NAME_VARIANTS: list = [ + pytest.param("surface_altitude", id="surface_altitude"), + pytest.param("height_above_mean_sea_level", id="height_above_mean_sea_level"), + pytest.param("height_above_reference_ellipsoid", id="height_above_reference_ellipsoid"), + pytest.param("bedrock_altitude", id="bedrock_altitude"), + pytest.param("altitude", id="altitude"), + pytest.param("height", id="height"), + pytest.param("sea_floor_depth_below_geoid", id="sea_floor_depth_below_geoid"), +] + +#: Variable names in the fallback list recognised by +#: TopoInterrogator._find_topo_var_name. +TOPO_FALLBACK_NAME_VARIANTS: list = [ + pytest.param("z", id="z"), + pytest.param("Z", id="Z"), + pytest.param("elevation", id="elevation"), + pytest.param("Elevation", id="Elevation"), + pytest.param("ELEVATION", id="ELEVATION"), + pytest.param("topo", id="topo"), + pytest.param("TOPO", id="TOPO"), + pytest.param("height", id="height"), + pytest.param("HEIGHT", id="HEIGHT"), + pytest.param("altitude", id="altitude"), + pytest.param("ALTITUDE", id="ALTITUDE"), + pytest.param("depth", id="depth"), + pytest.param("DEPTH", id="DEPTH"), + pytest.param("Band1", id="Band1"), + pytest.param("dem", id="dem"), + pytest.param("DEM", id="DEM"), + pytest.param("topography", id="topography"), + pytest.param("bathymetry", id="bathymetry"), + pytest.param("bathy", id="bathy"), + pytest.param("bedrock", id="bedrock"), +] diff --git a/tests/netcdf/test_topo_interrogator.py b/tests/netcdf/test_topo_interrogator.py index 8dc01a007..c0a98c65d 100644 --- a/tests/netcdf/test_topo_interrogator.py +++ b/tests/netcdf/test_topo_interrogator.py @@ -14,7 +14,9 @@ from ._helpers import ( make_topo_dataset, COORD_VARIANTS, + TOPO_CF_STANDARD_NAME_VARIANTS, TOPO_DIM_ORDER_VARIANTS, + TOPO_FALLBACK_NAME_VARIANTS, ) pytestmark = [pytest.mark.python, pytest.mark.netcdf] @@ -203,3 +205,83 @@ def test_dim_order_variants(topo_file_factory, dim_kwargs): with TopoInterrogator(path, var_name="z") as intr: meta = intr.interrogate_topo() assert meta.dim_order == list(dim_kwargs["dim_order"]) + + +# ============================================================ +# Variable auto-detection (no var_name supplied) +# ============================================================ + +@pytest.mark.parametrize("std_name", TOPO_CF_STANDARD_NAME_VARIANTS) +def test_autodetect_by_cf_standard_name(topo_file_factory, std_name): + """ + When var_name is omitted, TopoInterrogator finds the elevation variable + by matching the CF standard_name attribute. The matched name is written + back into the returned metadata. + """ + ds = make_topo_dataset(var_name="topo_data", var_standard_name=std_name) + path = topo_file_factory(ds=ds) + with TopoInterrogator(path) as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + assert meta.var_name == "topo_data" + + +@pytest.mark.parametrize("var_name", TOPO_FALLBACK_NAME_VARIANTS) +def test_autodetect_by_fallback_name(topo_file_factory, var_name): + """ + When var_name is omitted, TopoInterrogator falls back to matching the + variable name against the built-in list of common elevation names. + """ + path = topo_file_factory(var_name=var_name) + with TopoInterrogator(path) as intr: + meta = intr.interrogate_topo() + assert isinstance(meta, TopoMetadata) + assert meta.var_name == var_name + + +def test_autodetect_no_match_raises(topo_file_factory): + """ + When var_name is omitted and no variable name or CF standard_name matches + any known elevation identifier, interrogate_topo() raises ValueError with + a message that mentions auto-detect and lists available variables. + """ + path = topo_file_factory(var_name="completely_unknown_field") + with TopoInterrogator(path) as intr: + with pytest.raises(ValueError, match="auto-detect"): + intr.interrogate_topo() + + +def test_autodetect_cf_standard_name_takes_priority(topo_file_factory): + """ + CF standard_name detection takes priority over the fallback name list. + A dataset with both a CF-tagged variable and an 'elevation' variable must + return the CF-tagged one. + """ + import numpy as np + import xarray as xr + + lons = np.linspace(-10.0, 10.0, 5) + lats = np.linspace(-10.0, 10.0, 5) + coords = { + "lon": xr.DataArray(lons, dims=["lon"]), + "lat": xr.DataArray(lats, dims=["lat"]), + } + data = np.full((5, 5), -100.0, dtype=np.float32) + ds = xr.Dataset( + { + # CF-tagged variable: should be selected first + "cf_topo": xr.DataArray( + data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m", "standard_name": "surface_altitude"}, + ), + # Fallback-name variable: present but must not be selected + "elevation": xr.DataArray( + data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m"}, + ), + } + ) + path = topo_file_factory(ds=ds) + with TopoInterrogator(path) as intr: + meta = intr.interrogate_topo() + assert meta.var_name == "cf_topo" From 7d7008e49391fd84b893761ab258ac1b3df33425 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Thu, 7 May 2026 10:56:26 -0400 Subject: [PATCH 15/24] Add MetInterrogator to data storms as an option --- src/python/geoclaw/surge/storm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/python/geoclaw/surge/storm.py b/src/python/geoclaw/surge/storm.py index c1e30d796..c52d45add 100644 --- a/src/python/geoclaw/surge/storm.py +++ b/src/python/geoclaw/surge/storm.py @@ -1333,7 +1333,6 @@ def write_data(self, path, dim_mapping=None, var_mapping=None, met_interrogator=None, verbose=False): r""" """ - # Only one format right now _data_file_format_mapping = {'ascii': 1, 'nws12': 1, "owi": 1, 'netcdf': 2, 'nws13': 2} From a95487dfe86662b8a6cba29ba65a38160ef742b3 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Thu, 7 May 2026 11:18:41 -0400 Subject: [PATCH 16/24] Trim paths for output --- src/2d/shallow/surge/data_storm_module.f90 | 4 ++-- src/2d/shallow/topo_module.f90 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/2d/shallow/surge/data_storm_module.f90 b/src/2d/shallow/surge/data_storm_module.f90 index 0a5cf2f35..3c47992c4 100644 --- a/src/2d/shallow/surge/data_storm_module.f90 +++ b/src/2d/shallow/surge/data_storm_module.f90 @@ -103,7 +103,7 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) if (.not.module_setup) then ! Open data file - print *,'Reading storm data file ', storm_data_path + print *,'Reading storm data file ', trim(storm_data_path) open(unit=data_unit, file=storm_data_path, status='old', & action='read', iostat=io_status) if (io_status /= 0) then @@ -298,7 +298,7 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) #ifdef NETCDF ! Open file and get file ID ! :TODO: Only read in times that are between t0 and tfinal - print *, "Reading storm NetCDF file ", storm%paths(1) + print *, "Reading storm NetCDF file ", trim(storm%paths(1)) call check_netcdf_error(nf90_open(storm%paths(1), nf90_nowrite, nc_fid)) ! Check dim/var number call check_netcdf_error(nf90_inquire(nc_fid, num_dims, num_vars)) diff --git a/src/2d/shallow/topo_module.f90 b/src/2d/shallow/topo_module.f90 index 2290f2d45..cfb5dd8c0 100644 --- a/src/2d/shallow/topo_module.f90 +++ b/src/2d/shallow/topo_module.f90 @@ -567,7 +567,7 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) mtot = int(mx, 8) * int(my, 8) print *, ' ' - print *, 'Reading topography file ', fname + print *, 'Reading topography file ', trim(fname) select case(abs(topo_type)) ! ASCII file with x,y,z values on each line. From 61b32fd1b6ef00bdf9a049a379e120691f4bdda3 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Thu, 7 May 2026 21:32:10 -0400 Subject: [PATCH 17/24] Add topo_entries to netcdf interrogator This allows wrapping topo files in longitudinal direction --- examples/tsunami/chile2010/test_chile2010.py | 41 ++-- src/2d/shallow/topo_module.f90 | 44 +++-- src/python/geoclaw/netcdf_utils.py | 133 ++++++++++++- tests/netcdf/test_descriptor_writer.py | 28 ++- tests/netcdf/test_topo_interrogator.py | 186 ++++++++++++++++++- 5 files changed, 394 insertions(+), 38 deletions(-) diff --git a/examples/tsunami/chile2010/test_chile2010.py b/examples/tsunami/chile2010/test_chile2010.py index 86ceb1257..3397c538e 100644 --- a/examples/tsunami/chile2010/test_chile2010.py +++ b/examples/tsunami/chile2010/test_chile2010.py @@ -40,7 +40,7 @@ def _get_topography(runner, variant=0): return topo, dtopo -def _write_netcdf_variant(topo, variant, output_path): +def _write_netcdf_variant(topo, variant, output_path, crop_bounds=None): """Write topography as a NetCDF file in the requested variant format. Parameters @@ -51,6 +51,11 @@ def _write_netcdf_variant(topo, variant, output_path): One of ``"netcdf_topotools"``, ``"lat_flip"``, or ``"lon_360"``. output_path : Path Destination file path for the NetCDF output. + crop_bounds : tuple or None + (lon_min, lon_max, lat_min, lat_max) in domain coordinates. When + provided, ``topo_entries()`` is called to compute the correct + ``lon_offset`` for the descriptor; the returned list has the same + ``[ttype, path, meta]`` element format used by ``topo_data.topofiles``. Variants -------- @@ -151,14 +156,16 @@ def _write_netcdf_variant(topo, variant, output_path): else: raise ValueError(f"Unknown NetCDF variant '{variant}'.") - # Interrogate the written file to auto-detect lon_convention, lat_order, - # and dim_order. The returned TopoMetadata is passed to topofiles as the - # third element so data.py writes the descriptor block that Fortran's - # read_netcdf_descriptor needs. Without it nc_lon_convention defaults to - # 180 and [0,360] longitudes are never shifted back to [-180,180], causing - # the "topo arrays do not cover domain" check to fail. + # Interrogate the written file to auto-detect lat_order, dim_order, etc. + # When crop_bounds (domain coordinates) are given, topo_entries() is used + # so that the correct lon_offset is computed and written into the descriptor + # block that Fortran's read_netcdf_descriptor needs. from clawpack.geoclaw.netcdf_utils import TopoInterrogator - return TopoInterrogator(output_path, 'elevation').interrogate_topo() + with TopoInterrogator(output_path, 'elevation', crop_bounds=crop_bounds) as intr: + if crop_bounds is not None: + return intr.topo_entries() + else: + return [[4, output_path, intr.interrogate_topo()]] CASES = [pytest.param("standard", id="standard"), pytest.param("netcdf_topotools", id="netcdf_topotools", marks=pytest.mark.netcdf), @@ -209,10 +216,14 @@ def test_chile2010(tmp_path: Path, save: bool, variant: dict): runner.rundata.topo_data.topofiles = [[2, topo.path]] else: nc_path = tmp_path / f"chile_topo_{variant}.nc" - meta = _write_netcdf_variant(topo, variant, nc_path) - # Pass TopoMetadata as the third element so data.py writes the - # descriptor block Fortran needs to interpret coordinate conventions. - runner.rundata.topo_data.topofiles = [[4, nc_path, meta]] + clawdata = runner.rundata.clawdata + # lon_360 needs domain crop bounds so topo_entries() can compute the + # correct lon_offset (-360.0) for the descriptor. Other variants use + # interrogate_topo() (lon_offset=0.0 is correct for [-180,180] files). + crop = (clawdata.lower[0], clawdata.upper[0], + clawdata.lower[1], clawdata.upper[1]) if variant == "lon_360" else None + entries = _write_netcdf_variant(topo, variant, nc_path, crop_bounds=crop) + runner.rundata.topo_data.topofiles = entries runner.write_data() @@ -226,7 +237,11 @@ def test_chile2010(tmp_path: Path, save: bool, variant: dict): runner.build_executable() runner.run_code() - runner.check_gauge(save=save, gauge_id=32412) + # lon_360 coordinates undergo +360 then -360 shifts that are not a + # perfect floating-point round-trip for non-round values, so allow a + # slightly relaxed tolerance (still much smaller than 1% physically). + tol = {"atol": 1e-2} if variant == "lon_360" else {} + runner.check_gauge(save=save, gauge_id=32412, **tol) if __name__ == "__main__": raise SystemExit(pytest.main([__file__])) diff --git a/src/2d/shallow/topo_module.f90 b/src/2d/shallow/topo_module.f90 index cfb5dd8c0..b817cf7f6 100644 --- a/src/2d/shallow/topo_module.f90 +++ b/src/2d/shallow/topo_module.f90 @@ -47,7 +47,7 @@ module topo_module character(len=32), allocatable :: nc_lat_order(:) ! 'N_to_S' or 'S_to_N' character(len=32), allocatable :: nc_dim_order(:) ! e.g. 'lat,lon' character(len=8), allocatable :: nc_fill_action(:) ! 'abort' or 'warn' - integer, allocatable :: nc_lon_convention(:) ! 180 or 360 + real(kind=8), allocatable :: nc_lon_offset(:) ! scalar added to file coords: x_domain = x_file + offset real(kind=8), allocatable :: nc_fill_value(:) ! fill/nodata sentinel real(kind=8), allocatable :: nc_crop_bounds(:,:) ! (4, n): lon0,lon1,lat0,lat1 logical, allocatable :: nc_has_fill(:) ! fill_value was specified @@ -173,7 +173,7 @@ subroutine read_topo_settings(restart,file_name) allocate(nc_var_name(mtopofiles), nc_lon_name(mtopofiles)) allocate(nc_lat_name(mtopofiles), nc_lat_order(mtopofiles)) allocate(nc_dim_order(mtopofiles), nc_fill_action(mtopofiles)) - allocate(nc_lon_convention(mtopofiles), nc_fill_value(mtopofiles)) + allocate(nc_lon_offset(mtopofiles), nc_fill_value(mtopofiles)) allocate(nc_crop_bounds(4, mtopofiles)) allocate(nc_has_fill(mtopofiles), nc_has_crop(mtopofiles)) nc_var_name = '' @@ -182,7 +182,7 @@ subroutine read_topo_settings(restart,file_name) nc_lat_order = 'S_to_N' nc_dim_order = 'lat,lon' nc_fill_action = 'abort' - nc_lon_convention = 180 + nc_lon_offset = 0.0d0 nc_fill_value = 0.0d0 nc_crop_bounds = 0.0d0 nc_has_fill = .false. @@ -702,11 +702,12 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, & start=(/ 1 /), count=(/ my_tot /))) - ! Apply lon convention shift so xstart matches the shifted xll - ! computed in read_topo_header. + ! Apply lon_offset to convert file coordinates to domain + ! coordinates, so xstart matches the domain-coord xll that + ! was computed by read_topo_header. if (present(topo_idx)) then - if (nc_lon_convention(topo_idx) == 360) then - where (xlocs > 180.0d0) xlocs = xlocs - 360.0d0 + if (nc_lon_offset(topo_idx) /= 0.0d0) then + xlocs = xlocs + nc_lon_offset(topo_idx) end if end if @@ -1110,22 +1111,17 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy,topo_idx call check_netcdf_error(nf90_get_var(nc_file, y_var_id, ylocs, & start=(/ 1 /), count=(/ my /))) - ! Shift 0-360 longitudes to -180/180 if descriptor says so. - if (present(topo_idx)) then - if (nc_lon_convention(topo_idx) == 360) then - where (xlocs > 180.0d0) xlocs = xlocs - 360.0d0 - end if - end if - dx = abs(xlocs(2) - xlocs(1)) dy = abs(ylocs(2) - ylocs(1)) ! ---------------------------------------------------------------- ! Select the subset of the file to load. - ! Descriptor path: use explicit crop_bounds. + ! Descriptor path: use explicit crop_bounds (in FILE coordinates). ! Fallback: clip to the AMR domain (with ghost-cell buffer). ! ---------------------------------------------------------------- if (present(topo_idx) .and. nc_has_crop(topo_idx)) then + ! crop_bounds are in FILE coordinates; compare against + ! file-coord xlocs before applying lon_offset. x_in_dom = (xlocs >= nc_crop_bounds(1, topo_idx)) .and. & (xlocs <= nc_crop_bounds(2, topo_idx)) y_in_dom = (ylocs >= nc_crop_bounds(3, topo_idx)) .and. & @@ -1137,6 +1133,16 @@ subroutine read_topo_header(fname,topo_type,mx,my,xll,yll,xhi,yhi,dx,dy,topo_idx (ylocs < (yupper + dy + hyposs(1)*nghost)) end if + ! Apply lon_offset AFTER crop_bounds selection: converts file + ! coordinates to domain coordinates. crop_bounds above were + ! compared against file-coord xlocs; xll/xhi below will be in + ! domain coordinates as GeoClaw expects. + if (present(topo_idx)) then + if (nc_lon_offset(topo_idx) /= 0.0d0) then + xlocs = xlocs + nc_lon_offset(topo_idx) + end if + end if + xll = minval(xlocs, mask=x_in_dom) yll = minval(ylocs, mask=y_in_dom) xhi = maxval(xlocs, mask=x_in_dom) @@ -1721,7 +1727,7 @@ end subroutine check_netcdf_error ! var_name = z ! lon_name = longitude ! lat_name = latitude - ! lon_convention = 180 + ! lon_offset = 0.0 ! lat_order = N_to_S ! dim_order = lat,lon ! fill_value = -9999.0 @@ -1767,8 +1773,8 @@ subroutine read_netcdf_descriptor(iunit, idx) nc_lon_name(idx) = trim(val) case ('lat_name') nc_lat_name(idx) = trim(val) - case ('lon_convention') - read(val, *) nc_lon_convention(idx) + case ('lon_offset') + read(val, *) nc_lon_offset(idx) case ('lat_order') nc_lat_order(idx) = trim(val) case ('dim_order') @@ -1793,7 +1799,7 @@ subroutine read_netcdf_descriptor(iunit, idx) write(GEO_PARM_UNIT, *) ' var_name = ', trim(nc_var_name(idx)) write(GEO_PARM_UNIT, *) ' lon_name = ', trim(nc_lon_name(idx)) write(GEO_PARM_UNIT, *) ' lat_name = ', trim(nc_lat_name(idx)) - write(GEO_PARM_UNIT, *) ' lon_convention = ', nc_lon_convention(idx) + write(GEO_PARM_UNIT, *) ' lon_offset = ', nc_lon_offset(idx) write(GEO_PARM_UNIT, *) ' lat_order = ', trim(nc_lat_order(idx)) write(GEO_PARM_UNIT, *) ' fill_action = ', trim(nc_fill_action(idx)) if (nc_has_fill(idx)) & diff --git a/src/python/geoclaw/netcdf_utils.py b/src/python/geoclaw/netcdf_utils.py index c694326ec..362e44140 100644 --- a/src/python/geoclaw/netcdf_utils.py +++ b/src/python/geoclaw/netcdf_utils.py @@ -120,6 +120,7 @@ class TopoMetadata(FileMetadata): var_name: str source_units: str # units as found in file fill_action: str # 'abort' (only value for topo; fill = fatal) + lon_offset: float = 0.0 # scalar Fortran adds to file coords: x_domain = x_file + lon_offset @dataclasses.dataclass @@ -605,11 +606,65 @@ def interrogate_topo(self) -> TopoMetadata: return TopoMetadata( **dataclasses.asdict(base), - var_name = self.var_name, + var_name=self.var_name, source_units=source_units, fill_action='abort', # fill in topo crop region is always fatal + lon_offset=0.0, ) + def topo_entries(self) -> list[list]: + """ + Return a list of ready-to-use topo entries for topofiles. + + Each entry is [4, filepath, TopoMetadata]. When no wrapping is needed + this returns a list of one entry. When crop_bounds straddle the file's + lon cut point, returns two entries pointing to the same file with + different lon_offset and crop_bounds values. + + crop_bounds on the interrogator are in domain coordinates; this method + converts them to file coordinates before storing in the returned + metadata. Fortran can then use crop_bounds directly against file + coordinate arrays before applying lon_offset. + """ + + # Interrogate without crop validation: self.crop_bounds is in domain + # coordinates, which may lie outside the file extent. + saved_crop = self.crop_bounds + self.crop_bounds = None + try: + meta = self.interrogate_topo() + finally: + self.crop_bounds = saved_crop + + if saved_crop is None: + return [[4, self.path, dataclasses.replace(meta, lon_offset=0.0)]] + + assert saved_crop is not None # narrowing hint: already returned above + lon_coords = self.ds[meta.lon_name].values + file_lon_min = float(lon_coords.min()) + file_lon_max = float(lon_coords.max()) + if len(lon_coords) > 1: + lon_resolution = float(abs(lon_coords[1] - lon_coords[0])) + else: + lon_resolution = 1e-10 # single-point file, no gap tolerance needed + crop_lon_min, crop_lon_max, crop_lat_min, crop_lat_max = saved_crop + + entries_spec = _compute_lon_entries( + file_lon_min, file_lon_max, crop_lon_min, crop_lon_max, + max_gap=lon_resolution, + ) + + result = [] + for file_crop_min, file_crop_max, lon_offset in entries_spec: + new_meta = dataclasses.replace( + meta, + crop_bounds=(file_crop_min, file_crop_max, crop_lat_min, crop_lat_max), + lon_offset=lon_offset, + ) + result.append([4, self.path, new_meta]) + + return result + # --------------------------------------------------------------------------- # Met interrogator @@ -1006,6 +1061,76 @@ def _resolve_fill_values(self) -> None: self.ds[var_name].attrs = attrs +# --------------------------------------------------------------------------- +# Longitude entry computation +# --------------------------------------------------------------------------- + +def _compute_lon_entries( + file_lon_min: float, + file_lon_max: float, + domain_lon_min: float, + domain_lon_max: float, + max_gap: float = 1e-10, +) -> list[tuple[float, float, float]]: + """ + Compute (file_crop_min, file_crop_max, lon_offset) tuples needed to cover + [domain_lon_min, domain_lon_max] from a file with lons in + [file_lon_min, file_lon_max]. + + lon_offset is the scalar Fortran adds to file coordinates to produce + domain coordinates: x_domain = x_file + lon_offset. + + Returns 1 tuple if a single offset suffices, 2 tuples if the domain + straddles the file's cut point. + + max_gap controls how much under-coverage is tolerated. The default + (1e-10) is tight enough to catch genuine gaps. Pass the file's grid + spacing to allow for the half-cell gap at the dateline that near-global + files (e.g. GEBCO) have between their last and first longitude columns. + + Raises ValueError if the file cannot cover the requested domain even + with all candidate offsets, or if the remaining uncovered gap exceeds + max_gap. + """ + candidate_offsets = [0.0, 360.0, -360.0] + entries: list[tuple[float, float, float]] = [] + total_coverage = 0.0 + + for offset in candidate_offsets: + shifted_min = file_lon_min + offset + shifted_max = file_lon_max + offset + intersect_min = max(domain_lon_min, shifted_min) + intersect_max = min(domain_lon_max, shifted_max) + width = intersect_max - intersect_min + if width > 1e-10: + file_crop_min = intersect_min - offset + file_crop_max = intersect_max - offset + # Clamp to file extent (guards against floating-point overshoot) + file_crop_min = max(file_crop_min, file_lon_min) + file_crop_max = min(file_crop_max, file_lon_max) + entries.append((file_crop_min, file_crop_max, offset)) + total_coverage += width + + if not entries: + raise ValueError( + f"File longitude range [{file_lon_min}, {file_lon_max}] cannot cover " + f"domain [{domain_lon_min}, {domain_lon_max}] with candidate offsets " + f"{candidate_offsets}." + ) + + # One-sided check: overcoverage is harmless; under-coverage beyond max_gap + # indicates that the file genuinely cannot cover the requested domain. + gap = (domain_lon_max - domain_lon_min) - total_coverage + if gap > max_gap: + raise ValueError( + f"File longitudes [{file_lon_min}, {file_lon_max}] cannot " + f"cover requested domain [{domain_lon_min}, {domain_lon_max}]. " + f"Gap of {gap:.6f} degrees exceeds tolerance {max_gap:.6f}." + ) + + return entries + + # --------------------------------------------------------------------------- # Descriptor writer # --------------------------------------------------------------------------- @@ -1049,10 +1174,14 @@ def write_topo_descriptor(f, meta: 'TopoMetadata') -> None: meta : TopoMetadata Output of ``TopoInterrogator.interrogate_topo()``. """ + # Fortran adds lon_offset to all file coordinate values after reading: + # x_domain = x_file + lon_offset + # crop_bounds written here are in FILE coordinates (converted from + # domain coordinates by topo_entries()). f.write(f"var_name = {meta.var_name}\n") f.write(f"lon_name = {meta.lon_name}\n") f.write(f"lat_name = {meta.lat_name}\n") - f.write(f"lon_convention = {meta.lon_convention}\n") + f.write(f"lon_offset = {meta.lon_offset!r}\n") f.write(f"lat_order = {meta.lat_order}\n") f.write(f"dim_order = {','.join(meta.dim_order)}\n") if meta.fill_value is not None: diff --git a/tests/netcdf/test_descriptor_writer.py b/tests/netcdf/test_descriptor_writer.py index beb743bec..a307e81de 100644 --- a/tests/netcdf/test_descriptor_writer.py +++ b/tests/netcdf/test_descriptor_writer.py @@ -124,14 +124,14 @@ def _get_met_meta(met_file_factory, **kwargs) -> MetMetadata: def test_topo_descriptor_required_keys_present(topo_file_factory): """ write_topo_descriptor emits all mandatory keys: var_name, lon_name, - lat_name, lon_convention, lat_order, dim_order, fill_action. + lat_name, lon_offset, lat_order, dim_order, fill_action. """ meta = _get_topo_meta(topo_file_factory) buf = io.StringIO() DescriptorWriter.write_topo_descriptor(buf, meta) parsed = _parse_topo_descriptor(buf.getvalue()) - for key in ("var_name", "lon_name", "lat_name", "lon_convention", + for key in ("var_name", "lon_name", "lat_name", "lon_offset", "lat_order", "dim_order", "fill_action"): assert key in parsed, f"Missing required key '{key}' in topo descriptor" @@ -151,7 +151,7 @@ def test_topo_descriptor_values_match_metadata(topo_file_factory): assert parsed["var_name"] == meta.var_name assert parsed["lon_name"] == meta.lon_name assert parsed["lat_name"] == meta.lat_name - assert int(parsed["lon_convention"]) == meta.lon_convention + assert float(parsed["lon_offset"]) == pytest.approx(meta.lon_offset) assert parsed["lat_order"] == meta.lat_order assert parsed["dim_order"] == ",".join(meta.dim_order) assert parsed["fill_action"] == meta.fill_action @@ -219,6 +219,28 @@ def test_topo_descriptor_crop_bounds_present_when_set(topo_file_factory): assert lat1 == pytest.approx(5.0) +def test_descriptor_writes_lon_offset_not_convention(topo_file_factory): + """ + Topo descriptor must contain 'lon_offset' as a parseable float and must + NOT contain 'lon_convention'. This verifies the format-version-2 change. + """ + import dataclasses + meta = _get_topo_meta(topo_file_factory) + # Exercise a non-zero offset to confirm the value round-trips. + meta = dataclasses.replace(meta, lon_offset=-360.0) + + buf = io.StringIO() + DescriptorWriter.write_topo_descriptor(buf, meta) + text = buf.getvalue() + parsed = _parse_topo_descriptor(text) + + assert "lon_offset" in parsed, "Descriptor must contain 'lon_offset'" + assert "lon_convention" not in parsed, ( + "Descriptor must NOT contain 'lon_convention' (superseded by lon_offset)" + ) + assert float(parsed["lon_offset"]) == pytest.approx(-360.0) + + def test_topo_descriptor_ends_with_blank_line(topo_file_factory): """ Descriptor block ends with a blank line so the Fortran parser can detect diff --git a/tests/netcdf/test_topo_interrogator.py b/tests/netcdf/test_topo_interrogator.py index c0a98c65d..1e804bc35 100644 --- a/tests/netcdf/test_topo_interrogator.py +++ b/tests/netcdf/test_topo_interrogator.py @@ -3,7 +3,7 @@ Covers: fill-in-crop detection, fill-outside-crop pass, unit verification, unit-conversion warn path, missing units warn path, descriptor key presence, -and all coordinate variant combinations. +all coordinate variant combinations, and topo_entries() lon wrapping. """ import warnings @@ -251,6 +251,190 @@ def test_autodetect_no_match_raises(topo_file_factory): intr.interrogate_topo() +# ============================================================ +# topo_entries() — longitude offset and wrap logic +# ============================================================ + +def test_topo_entries_no_wrap(topo_file_factory): + """ + File [-180, 180], crop (-90, 0, -45, 45): single entry, offset 0, + crop_bounds in metadata equal the requested file-coord crop. + """ + path = topo_file_factory(lon_min=-180.0, lon_max=180.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-90.0, 0.0, -45.0, 45.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 1 + ttype, fpath, meta = entries[0] + assert ttype == 4 + assert meta.lon_offset == pytest.approx(0.0) + assert meta.crop_bounds is not None + lon0, lon1, lat0, lat1 = meta.crop_bounds + assert lon0 == pytest.approx(-90.0) + assert lon1 == pytest.approx(0.0) + assert lat0 == pytest.approx(-45.0) + assert lat1 == pytest.approx(45.0) + + +def test_topo_entries_simple_shift(topo_file_factory): + """ + File [0, 360], crop (0, 180, -45, 45): single entry with lon_offset 0.0. + """ + path = topo_file_factory(lon_min=0.0, lon_max=360.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(0.0, 180.0, -45.0, 45.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 1 + assert entries[0][2].lon_offset == pytest.approx(0.0) + + +def test_topo_entries_simple_shift_negative(topo_file_factory): + """ + File [0, 360], crop (-180, 0, -45, 45): 1 or 2 entries are both valid; + combined domain coverage must equal 180 degrees. + """ + path = topo_file_factory(lon_min=0.0, lon_max=360.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-180.0, 0.0, -45.0, 45.0)) + entries = intr.topo_entries() + intr.close() + + assert 1 <= len(entries) <= 2 + # Total lon coverage across all entries must equal the requested domain width. + total = sum( + meta.crop_bounds[1] - meta.crop_bounds[0] + for _, _, meta in entries + if meta.crop_bounds is not None + ) + assert total == pytest.approx(180.0) + + +def test_topo_entries_wrap_required(topo_file_factory): + """ + File [-180, 180], domain (-360, 0): requires two entries. + Entry 1: file_crop [-180, 0], lon_offset 0.0. + Entry 2: file_crop [0, 180], lon_offset -360.0. + Combined domain coverage == 360 degrees. + """ + path = topo_file_factory(lon_min=-180.0, lon_max=180.0, + lat_min=-90.0, lat_max=90.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-360.0, 0.0, -90.0, 90.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 2 + + # Collect (file_crop_min, file_crop_max, lon_offset) pairs (order not mandated). + pairs = { + (meta.crop_bounds[0], meta.crop_bounds[1], meta.lon_offset) + for _, _, meta in entries + if meta.crop_bounds is not None + } + assert (-180.0, 0.0, 0.0) in pairs + assert (0.0, 180.0, -360.0) in pairs + + +def test_topo_entries_no_coverage(topo_file_factory): + """ + File [0, 90], domain [-180, -90]: no candidate offset can cover the + domain; _compute_lon_entries raises ValueError. + """ + path = topo_file_factory(lon_min=0.0, lon_max=90.0, + lat_min=-45.0, lat_max=45.0) + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-180.0, -90.0, -45.0, 45.0)) + with pytest.raises(ValueError): + intr.topo_entries() + intr.close() + + +def test_topo_entries_near_global_gap(topo_file_factory): + """ + Near-global file (e.g. GEBCO style) with lons [-179.99, 179.99] has a + gap of ~0.0042 degrees at the dateline — one grid cell. A domain that + straddles the dateline (crop [-211, -99, 40, 75]) must still produce two + entries without raising ValueError, since the gap is within one grid + spacing. + """ + import numpy as np + import xarray as xr + + # 121 points from -179.99 to 179.99 gives spacing ~3.0 degrees; the + # "missing" wrap column sits between 179.99 and -179.99 + 360 = 180.01. + lons = np.linspace(-179.99, 179.99, 121) + lats = np.linspace(40.0, 75.0, 10) + data = np.full((len(lats), len(lons)), -500.0, dtype=np.float32) + coords = { + "lon": xr.DataArray(lons, dims=["lon"]), + "lat": xr.DataArray(lats, dims=["lat"]), + } + ds = xr.Dataset( + {"z": xr.DataArray(data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m"})} + ) + path = topo_file_factory(ds=ds) + + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-211.0, -99.0, 40.0, 75.0)) + entries = intr.topo_entries() + intr.close() + + assert len(entries) == 2, ( + f"Expected 2 entries for dateline-straddling crop, got {len(entries)}" + ) + # The seam gap (domain width minus combined file-coord coverage) must be + # within one grid spacing. crop_bounds widths equal domain-coord widths + # because lon_offset is a constant shift. + lon_spacing = float(lons[1] - lons[0]) + total_covered = sum( + meta.crop_bounds[1] - meta.crop_bounds[0] + for _, _, meta in entries + if meta.crop_bounds is not None + ) + domain_width = -99.0 - (-211.0) + gap = domain_width - total_covered + assert gap <= lon_spacing, ( + f"Seam gap {gap:.6f} exceeds grid spacing {lon_spacing:.6f}" + ) + + +def test_topo_entries_genuine_gap_still_errors(topo_file_factory): + """ + File [-90, 90] genuinely cannot cover domain [-200, -50]: the uncovered + gap (~60 degrees) is far larger than the grid spacing, so ValueError + must still be raised even with the near-global-gap tolerance. + """ + import numpy as np + import xarray as xr + + lons = np.linspace(-90.0, 90.0, 91) + lats = np.linspace(40.0, 75.0, 10) + data = np.full((len(lats), len(lons)), -500.0, dtype=np.float32) + coords = { + "lon": xr.DataArray(lons, dims=["lon"]), + "lat": xr.DataArray(lats, dims=["lat"]), + } + ds = xr.Dataset( + {"z": xr.DataArray(data, dims=["lat", "lon"], coords=coords, + attrs={"units": "m"})} + ) + path = topo_file_factory(ds=ds) + + intr = TopoInterrogator(path, var_name="z", + crop_bounds=(-200.0, -50.0, 40.0, 75.0)) + with pytest.raises(ValueError, match="[Gg]ap"): + intr.topo_entries() + intr.close() + + def test_autodetect_cf_standard_name_takes_priority(topo_file_factory): """ CF standard_name detection takes priority over the fallback name list. From fbfaa89230aa6ea0c5e32ae7bdc2edc06087154a Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Fri, 8 May 2026 20:29:41 -0400 Subject: [PATCH 18/24] Fix isaac.storm written during test Needed to remove the isaac.storm file being written directly during the run of setrun.py and instead only do so when called from `__main__`. --- examples/storm-surge/isaac/setrun.py | 68 ++++++++++++++---------- examples/storm-surge/isaac/test_isaac.py | 2 - 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/examples/storm-surge/isaac/setrun.py b/examples/storm-surge/isaac/setrun.py index 11d8ed79f..952c65bcf 100644 --- a/examples/storm-surge/isaac/setrun.py +++ b/examples/storm-surge/isaac/setrun.py @@ -409,34 +409,6 @@ def setgeo(rundata): data.storm_specification_type = 'holland80' # data.storm_specification_type = 'data' data.storm_file = (Path() / 'isaac.storm').resolve() - - isaac = Storm() - atcf_path = (Path(__file__).parent / "bal092012.dat").resolve() - isaac.read(path=atcf_path, file_format="ATCF") - # Calculate landfall time - Need to specify as the file above does not - # include this info (~2345 UTC - 6:45 p.m. CDT - on August 28) - isaac.time_offset = np.datetime64("2012-08-29T00:00") - - if data.storm_specification_type == 'holland80': - isaac.write(data.storm_file, file_format='geoclaw') - - elif data.storm_specification_type == "data": - # ASCII - isaac.file_format = 'NWS12' - isaac.file_paths = [] - isaac.file_paths.append((Path() / "isaac.PRE").resolve()) - isaac.file_paths.append((Path() / "isaac.WIN").resolve()) - - # NetCDF - # isaac.file_format = "NWS13" - # isaac.file_paths.append((Path() / "isaac.nc").resolve()) - # # For NetCDF we need to set this to the epoch or things do not work yet - # isaac.time_offset = np.datetime64("1970-01-01") - - isaac.write(data.storm_file, file_format='data') - - else: - raise ValueError("Invalid storm specification type.") # ======================= # Set Variable Friction @@ -463,6 +435,45 @@ def setgeo(rundata): # ---------------------- +def write_storm_file(rundata): + """Write the storm file configured in rundata.surge_data. + + Separated from setgeo so that importing/calling setrun() does not write + files as a side effect (important for test runners that call setrun() and + then substitute their own storm file). + """ + data = rundata.surge_data + + isaac = Storm() + atcf_path = (Path(__file__).parent / "bal092012.dat").resolve() + isaac.read(path=atcf_path, file_format="ATCF") + # Calculate landfall time - Need to specify as the file above does not + # include this info (~2345 UTC - 6:45 p.m. CDT - on August 28) + isaac.time_offset = np.datetime64("2012-08-29T00:00") + + if data.storm_specification_type == 'holland80': + isaac.write(data.storm_file, file_format='geoclaw') + + elif data.storm_specification_type == "data": + # ASCII + isaac.file_format = 'NWS12' + isaac.file_paths = [ + (Path(__file__).parent / "isaac.PRE").resolve(), + (Path(__file__).parent / "isaac.WIN").resolve(), + ] + + # NetCDF + # isaac.file_format = "NWS13" + # isaac.file_paths.append((Path() / "isaac.nc").resolve()) + # # For NetCDF we need to set this to the epoch or things do not work yet + # isaac.time_offset = np.datetime64("1970-01-01") + + isaac.write(data.storm_file, file_format='data') + + else: + raise ValueError("Invalid storm specification type.") + + if __name__ == '__main__': # Set up run-time parameters and write all data files. import sys @@ -471,4 +482,5 @@ def setgeo(rundata): else: rundata = setrun() + write_storm_file(rundata) rundata.write() diff --git a/examples/storm-surge/isaac/test_isaac.py b/examples/storm-surge/isaac/test_isaac.py index f1a4311f1..d3e38b88b 100644 --- a/examples/storm-surge/isaac/test_isaac.py +++ b/examples/storm-surge/isaac/test_isaac.py @@ -501,8 +501,6 @@ def _check_data_storm_descriptor(generated_path: Path, regression_path: Path) -> ] -# @pytest.mark.slow - @pytest.mark.regression @pytest.mark.storm @pytest.mark.parametrize( From f0799dd7a3554122c82bd56dc4ef511b19c62541 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Thu, 14 May 2026 13:56:02 -0400 Subject: [PATCH 19/24] Lower number of levels for slow ike test This was not finishing in a reasonable amount of time in the config tested. Lowered this to max levels = 4 for now. --- examples/storm-surge/ike/test_ike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/storm-surge/ike/test_ike.py b/examples/storm-surge/ike/test_ike.py index 55b91538f..0d5d9d6cf 100644 --- a/examples/storm-surge/ike/test_ike.py +++ b/examples/storm-surge/ike/test_ike.py @@ -17,7 +17,7 @@ ), pytest.param( {"num_cells": [29 * 4, 24 * 4], - "amr_levels_max": 6, + "amr_levels_max": 4, "num_output_times": 16}, id="fine", marks=pytest.mark.slow, From 8800c2ffd233684bce090146ae424677461305aa Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Mon, 18 May 2026 12:05:19 -0400 Subject: [PATCH 20/24] Adds time scaling for met forcing Adds parameter to storm objects that allow for the time scale of the storm to be changed. This is useful for testing and for running idealized cases of storms that may be slower or faster than provided in the input data. This also adds basic tests in the Python to support this functionality. --- examples/storm-surge/isaac/setrun.py | 4 + src/2d/shallow/surge/data_storm_module.f90 | 20 ++++- src/python/geoclaw/surge/storm.py | 14 +++- tests/test_storm.py | 1 + tests/test_storm_time_scale.py | 85 ++++++++++++++++++++++ 5 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 tests/test_storm_time_scale.py diff --git a/examples/storm-surge/isaac/setrun.py b/examples/storm-surge/isaac/setrun.py index 952c65bcf..e83e0dbc0 100644 --- a/examples/storm-surge/isaac/setrun.py +++ b/examples/storm-surge/isaac/setrun.py @@ -468,6 +468,10 @@ def write_storm_file(rundata): # # For NetCDF we need to set this to the epoch or things do not work yet # isaac.time_offset = np.datetime64("1970-01-01") + # Stretch (>1) or compress (<1) the storm time axis relative to the + # first time step. 1.0 = no change; 2.0 = storm moves twice as slow. + isaac.storm_time_scale = 1.0 + isaac.write(data.storm_file, file_format='data') else: diff --git a/src/2d/shallow/surge/data_storm_module.f90 b/src/2d/shallow/surge/data_storm_module.f90 index 3c47992c4..4962c4d1b 100644 --- a/src/2d/shallow/surge/data_storm_module.f90 +++ b/src/2d/shallow/surge/data_storm_module.f90 @@ -47,6 +47,7 @@ module data_storm_module ! Run-time storm modification settings real(kind=8) :: wind_scaling, pressure_scaling + real(kind=8) :: storm_time_scale ! Ramping settings integer :: window_type @@ -132,7 +133,16 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) else read(data_unit, *) end if - read(data_unit, *) + + ! storm_time_scale stretches (>1) or compresses (<1) the time axis + ! to simulate slower or faster storm translation speeds. + ! A blank line here means a legacy file without the parameter; default to 1.0. + read(data_unit, *, iostat=io_status) storm_time_scale + if (io_status /= 0) then + storm_time_scale = 1.0d0 + write(log_unit, *) "Warning: storm_time_scale not in data file, defaulting to 1.0" + end if + write(log_unit, "('storm_time_scale = ',d16.8)") storm_time_scale if (DEBUG) then print "('time_offset = ',a)", storm%time_offset @@ -356,6 +366,14 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) storm%time(:) = storm%time(:) - storm%time(1) & + nint(nc_time_offset) + ! Stretch (>1) or compress (<1) the time axis relative to + ! the first time step (anchor). No-op when scale is 1.0. + if (storm_time_scale /= 1.0d0) then + storm%time(:) = storm%time(1) & + + nint(storm_time_scale & + * dble(storm%time(:) - storm%time(1))) + end if + ! Wind (optional — zero-fill if not in descriptor) if (len_trim(var_names(1)) > 0) then call check_netcdf_error(nf90_inq_varid(nc_fid, trim(var_names(1)), var_ID)) diff --git a/src/python/geoclaw/surge/storm.py b/src/python/geoclaw/surge/storm.py index c52d45add..9d9e5e0fa 100644 --- a/src/python/geoclaw/surge/storm.py +++ b/src/python/geoclaw/surge/storm.py @@ -219,6 +219,7 @@ def __init__(self, path=None, file_format="ATCF", **kwargs): # Run-time modifications for storm self.scaling = [1.0, 1.0] # Scaling of wind and pressure + self.storm_time_scale = 1.0 # >1 slower storm, <1 faster storm # Ramping information - only applies to data storms currently self.window_type = 0 @@ -955,9 +956,14 @@ def read_data(self, path: Path, verbose: bool=False): data_file.readline().partition("#")[0].rstrip()) self.ramp_width = float( data_file.readline().partition("#")[0].rstrip()) - data_file.readline() - data_file.readline() - data_file.readline() + data_file.readline() # window + _ts_str = data_file.readline().partition("#")[0].rstrip() + try: + self.storm_time_scale = float(_ts_str) + except ValueError: + # Legacy file: blank line consumed; storm_time_scale absent + self.storm_time_scale = 1.0 + data_file.readline() # "# Format Data Information" if self.file_format == 1: # Skip blank line then "# File paths" comment @@ -1387,7 +1393,7 @@ def write_data(self, path, dim_mapping=None, var_mapping=None, data_file.write(f"{str(self.window)[1:-1].ljust(20)} # Window\n") else: data_file.write(f"{str(None).ljust(20)} # Window\n") - data_file.write("\n") + data_file.write(f"{str(self.storm_time_scale).ljust(20)} # Storm time scale (>1 slower, <1 faster)\n") data_file.write("# Format Data Information\n") if file_format == 1: # Check number of file paths diff --git a/tests/test_storm.py b/tests/test_storm.py index 7aa7c57dd..2015139b3 100644 --- a/tests/test_storm.py +++ b/tests/test_storm.py @@ -483,6 +483,7 @@ def test_data_storm_roundtrip(file_format, tmp_path): assert data_storm.file_format in DATA_FILE_FORMAT_MAP[read_storm.file_format] assert data_storm.window_type == read_storm.window_type assert data_storm.ramp_width == read_storm.ramp_width + assert data_storm.storm_time_scale == read_storm.storm_time_scale assert len(data_storm.file_paths) == len(read_storm.file_paths) for i, path in enumerate(data_storm.file_paths): assert read_storm.file_paths[i] == path diff --git a/tests/test_storm_time_scale.py b/tests/test_storm_time_scale.py new file mode 100644 index 000000000..7d88fbbfe --- /dev/null +++ b/tests/test_storm_time_scale.py @@ -0,0 +1,85 @@ +""" +Self-contained test for the storm_time_scale time-axis arithmetic. + +No GeoClaw runtime or test framework is required — only numpy and assert. +Mirrors the Fortran arithmetic in data_storm_module.f90 case(2): + + t_ref = t[0] + t_scaled[i] = t_ref + round(scale * (t[i] - t_ref)) +""" + +import numpy as np + + +def apply_time_scale(t_raw, scale): + """Apply storm_time_scale to a raw time array. + + Parameters + ---------- + t_raw : array-like of float + Times in any consistent unit (e.g. seconds since epoch). + scale : float + storm_time_scale value. + + Returns + ------- + numpy.ndarray of int + Scaled integer times matching Fortran nint() rounding. + """ + t = np.asarray(t_raw, dtype=np.float64) + t_ref = t[0] + return np.rint(t_ref + scale * (t - t_ref)).astype(np.int64) + + +def _hourly_times(n=10): + """Return n hourly timestamps as float64 seconds since Unix epoch.""" + epoch = np.datetime64("1970-01-01T00:00:00", "s") + start = np.datetime64("2012-08-28T00:00:00", "s") + t0 = float((start - epoch) / np.timedelta64(1, "s")) + return np.array([t0 + i * 3600.0 for i in range(n)]) + + +def test_scale_one_is_noop(): + t = _hourly_times(10) + scaled = apply_time_scale(t, 1.0) + expected = np.rint(t).astype(np.int64) + assert np.array_equal(scaled, expected), f"scale=1.0 changed values: {scaled - expected}" + + +def test_scale_two_doubles_duration(): + t = _hourly_times(10) + scaled = apply_time_scale(t, 2.0) + # Duration from first to last should be doubled + orig_duration = t[-1] - t[0] + scaled_duration = scaled[-1] - scaled[0] + assert abs(scaled_duration - 2.0 * orig_duration) < 1, ( + f"Expected duration {2*orig_duration}, got {scaled_duration}" + ) + + +def test_anchor_invariant(): + t = _hourly_times(10) + t_ref = np.rint(t[0]).astype(np.int64) + for scale in [0.5, 1.0, 2.0, 3.7]: + scaled = apply_time_scale(t, scale) + assert scaled[0] == t_ref, ( + f"scale={scale}: anchor changed from {t_ref} to {scaled[0]}" + ) + + +def test_scale_half_compresses_duration(): + t = _hourly_times(10) + scaled = apply_time_scale(t, 0.5) + orig_duration = t[-1] - t[0] + scaled_duration = scaled[-1] - scaled[0] + assert abs(scaled_duration - 0.5 * orig_duration) < 1, ( + f"Expected duration {0.5*orig_duration}, got {scaled_duration}" + ) + + +if __name__ == "__main__": + test_scale_one_is_noop() + test_scale_two_doubles_duration() + test_anchor_invariant() + test_scale_half_compresses_duration() + print("All storm_time_scale arithmetic tests passed.") From 15b237e4fb32c37bc90c8238236698c250e15363 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Tue, 19 May 2026 15:02:28 -0400 Subject: [PATCH 21/24] Add gauge met forcing plotting helpers --- src/python/geoclaw/surge/plot.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/python/geoclaw/surge/plot.py b/src/python/geoclaw/surge/plot.py index ca7f54d7a..aa3415e99 100644 --- a/src/python/geoclaw/surge/plot.py +++ b/src/python/geoclaw/surge/plot.py @@ -107,25 +107,36 @@ def get_track(self, frame): # ========================================================================== # Gauge functions # ========================================================================== -def gauge_locations(current_data, gaugenos='all'): +def gauge_locations(current_data, gaugenos='all') -> None: gaugetools.plot_gauge_locations(current_data.plotdata, gaugenos=gaugenos, format_string='kx', add_labels=True, xoffset=0.02, yoffset=0.02) -def gauge_dry_regions(cd, dry_tolerance=1e-16): +def gauge_dry_regions(cd, dry_tolerance: float=1e-16): """Masked array of zeros where gauge is dry.""" return np.ma.masked_where(np.abs(cd.gaugesoln.q[0, :]) > dry_tolerance, np.zeros(cd.gaugesoln.q[0, :].shape)) -def gauge_surface(cd, dry_tolerance=1e-16): +def gauge_surface(cd, dry_tolerance: float=1e-16): """Sea surface at gauge masked when dry.""" return np.ma.masked_where(np.abs(cd.gaugesoln.q[0, :]) < dry_tolerance, cd.gaugesoln.q[3, :]) +def gauge_wind(cd, wind_x_idx: int=4, wind_y_idx: int=5): + """Wind speed at gauge""" + return np.sqrt(cd.gaugesoln.q[wind_x_idx, :]**2 + + cd.gaugesoln.q[wind_y_idx, :]**2) + + +def gauge_pressure(cd, pressure_idx: int=6): + """Wind speed at gauge""" + return cd.gaugesoln.q[pressure_idx, :] + + def plot_landfall_gauge(gauge, axes, landfall=0.0, style='b', kwargs={}): """Plot gauge data on the axes provided From 9b176892cfdc1923790b49de9fcb78093bdcb46d Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Fri, 22 May 2026 11:06:05 -0400 Subject: [PATCH 22/24] Add new line to path read messages --- src/2d/shallow/surge/data_storm_module.f90 | 11 +++++++---- src/2d/shallow/topo_module.f90 | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/2d/shallow/surge/data_storm_module.f90 b/src/2d/shallow/surge/data_storm_module.f90 index 4962c4d1b..8c94a6149 100644 --- a/src/2d/shallow/surge/data_storm_module.f90 +++ b/src/2d/shallow/surge/data_storm_module.f90 @@ -105,6 +105,7 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) if (.not.module_setup) then ! Open data file print *,'Reading storm data file ', trim(storm_data_path) + print * open(unit=data_unit, file=storm_data_path, status='old', & action='read', iostat=io_status) if (io_status /= 0) then @@ -138,8 +139,8 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) ! to simulate slower or faster storm translation speeds. ! A blank line here means a legacy file without the parameter; default to 1.0. read(data_unit, *, iostat=io_status) storm_time_scale - if (io_status /= 0) then - storm_time_scale = 1.0d0 + if (io_status /= 0) then + storm_time_scale = 1.0d0 write(log_unit, *) "Warning: storm_time_scale not in data file, defaulting to 1.0" end if write(log_unit, "('storm_time_scale = ',d16.8)") storm_time_scale @@ -298,8 +299,9 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) allocate(storm%time(mt)) ! Fill out variable data/info - print *, "Reading pressure file ", storm%paths(1) - print *, "Reading wind file ", storm%paths(2) + print *, "Reading pressure file ", trim(storm%paths(1)) + print *, "Reading wind file ", trim(storm%paths(2)) + print * call read_OWI_ASCII(storm%paths(2), storm%paths(1), & mx, my, mt, sw_lat, sw_lon, dy, dx, & storm, seconds_from_epoch(time(:, 1))) @@ -309,6 +311,7 @@ subroutine set_storm(storm_data_path, storm, storm_spec_type, log_unit) ! Open file and get file ID ! :TODO: Only read in times that are between t0 and tfinal print *, "Reading storm NetCDF file ", trim(storm%paths(1)) + print * call check_netcdf_error(nf90_open(storm%paths(1), nf90_nowrite, nc_fid)) ! Check dim/var number call check_netcdf_error(nf90_inquire(nc_fid, num_dims, num_vars)) diff --git a/src/2d/shallow/topo_module.f90 b/src/2d/shallow/topo_module.f90 index b817cf7f6..91659f402 100644 --- a/src/2d/shallow/topo_module.f90 +++ b/src/2d/shallow/topo_module.f90 @@ -568,6 +568,7 @@ subroutine read_topo_file(mx,my,topo_type,fname,xll,yll,topo,topo_idx) print *, ' ' print *, 'Reading topography file ', trim(fname) + print * select case(abs(topo_type)) ! ASCII file with x,y,z values on each line. From 4c09027a4cfee29c26c9fcaa06a8b17211b09da6 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Fri, 22 May 2026 11:24:51 -0400 Subject: [PATCH 23/24] Add example plotting gauge wind/pressure --- examples/storm-surge/isaac/setplot.py | 63 +++++++++++++++++++++++---- examples/storm-surge/isaac/setrun.py | 3 ++ 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/examples/storm-surge/isaac/setplot.py b/examples/storm-surge/isaac/setplot.py index 8fe715b15..29c5872ba 100644 --- a/examples/storm-surge/isaac/setplot.py +++ b/examples/storm-surge/isaac/setplot.py @@ -13,6 +13,7 @@ import clawpack.amrclaw.data as amrclaw import clawpack.geoclaw.data as geodata import clawpack.geoclaw.util as geoutil +import clawpack.geoclaw.surge.storm as stormtools import clawpack.geoclaw.surge.plot as surgeplot try: @@ -42,7 +43,8 @@ def setplot(plotdata=None): friction_data = geodata.FrictionData() friction_data.read(os.path.join(plotdata.outdir, 'friction.data')) - # Load storm track + # Load storm data + storm = stormtools.Storm(surge_data.storm_file, file_format="data") track = surgeplot.track_data(os.path.join(plotdata.outdir, 'fort.track')) # Set afteraxes function @@ -173,17 +175,19 @@ def friction_after_axes(cd): # ======================================================================== # Figures for gauges # ======================================================================== + # Map GeoClaw gauge number to NOAA gauge number and location/name + gauge_mapping = {1: [8761724, "Grand Isle, LA"], + 2: [8760922, 'Pilots Station East, SW Pass, LA']} + + # Storm Timepoints + landfall_time = np.datetime64(datetime.datetime(2012, 8, 29, 0)) + begin_date = datetime.datetime(2012, 8, 27) + end_date = datetime.datetime(2012, 8, 31) + def plot_observed(current_data): """Fetch and plot gauge data for gauges used.""" - # Map GeoClaw gauge number to NOAA gauge number and location/name - gauge_mapping = {1: [8761724, "Grand Isle, LA"], - 2: [8760922, 'Pilots Station East, SW Pass, LA']} - station_id, station_name = gauge_mapping[current_data.gaugesoln.id] - landfall_time = np.datetime64(datetime.datetime(2012, 8, 29, 0)) - begin_date = datetime.datetime(2012, 8, 27) - end_date = datetime.datetime(2012, 8, 31) # Fetch data if needed date_time, water_level, tide = geoutil.fetch_noaa_tide_data(station_id, @@ -227,6 +231,49 @@ def plot_observed(current_data): plotitem = plotaxes.new_plotitem(plot_type='1d_plot') plotitem.plot_var = surgeplot.gauge_dry_regions plotitem.kwargs = {"color":'lightcoral', "linewidth":5} + + + # Plot wind and pressure at each gauge + def storm_gauge_afteraxes(cd, storm): + # Map GeoClaw gauge number to NOAA gauge number and location/name + station_id, station_name = gauge_mapping[cd.gaugesoln.id] + + # Convert to seconds relative to landfall + # t = (cd.gaugesoln.t - landfall_time) / np.timedelta64(1, 's') + t = cd.gaugesoln.t + t /= (24 * 60**2) + + w_ax = plt.gca() + color ="tab:blue" + w_ax.plot(t, surgeplot.gauge_wind(cd), label="wind", color=color) + w_ax.set_ylabel("Wind (m/s)", color=color) + w_ax.set_ylim(wind_limits) + w_ax.tick_params(axis='y', labelcolor=color) + + P_ax = w_ax.twinx() + color = "tab:red" + P_ax.set_ylabel("Pressure (mbar)", color=color) + P_ax.plot(t, surgeplot.gauge_pressure(cd) * 1e-2, label="pressure", color=color) + P_ax.set_ylim(pressure_limits) + P_ax.tick_params(axis='y', labelcolor=color) + + w_ax.set_title(f"Storm Fileds at {station_name}") + + plt.gcf().tight_layout() + + plotfigure = plotdata.new_plotfigure(name='Gauge Wind', figno=301, + type='each_gauge') + plotfigure.show = True + plotfigure.clf_each_gauge = True + plotfigure.kwargs = {"figsize": [6.4 * 1.2, + 4.8 * 1.0]} + + plotaxes = plotfigure.new_plotaxes() + plotaxes.time_scale = 1 / (24 * 60**2) + plotaxes.grid = True + plotaxes.xlimits = [-2, 1.5] + plotaxes.time_label = "Days relative to 2012-12-26 00:00 UTC" + plotaxes.afteraxes = lambda cd: storm_gauge_afteraxes(cd, storm) # Gauge Location Plot def gauge_location_afteraxes(cd): diff --git a/examples/storm-surge/isaac/setrun.py b/examples/storm-surge/isaac/setrun.py index e83e0dbc0..632bc94ae 100644 --- a/examples/storm-surge/isaac/setrun.py +++ b/examples/storm-surge/isaac/setrun.py @@ -322,6 +322,9 @@ def setrun(claw_pkg='geoclaw'): rundata.gaugedata.gauges.append([2, -89.95826667, 29.26460167, rundata.clawdata.t0, rundata.clawdata.tfinal]) + + # Record the wind and pressure fields at the gauges + rundata.gaugedata.aux_out_fields = [4, 5, 6] # ------------------------------------------------------------------ # GeoClaw specific parameters: From a7d368fb651ea1cb337bd92cbcba8703ac38a487 Mon Sep 17 00:00:00 2001 From: Kyle Mandli Date: Fri, 22 May 2026 11:25:07 -0400 Subject: [PATCH 24/24] Add FFLAGS to LFLAGS in isaac --- examples/storm-surge/isaac/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/storm-surge/isaac/Makefile b/examples/storm-surge/isaac/Makefile index cd46dc5b3..707c82690 100644 --- a/examples/storm-surge/isaac/Makefile +++ b/examples/storm-surge/isaac/Makefile @@ -24,7 +24,7 @@ PLOTDIR = _plots # Directory for plots # FFLAGS += -DNETCDF -I$(NETCDF4_DIR)/include -L$(NETCDF4_DIR)/lib # LFLAGS += -I$(NETCDF4_DIR)/include -L$(NETCDF4_DIR)/lib -lnetcdff FFLAGS += -DNETCDF $(NETCDF_FFLAGS) -LFLAGS += $(NETCDF_LFLAGS) +LFLAGS += $(FFLAGS) $(NETCDF_LFLAGS) # --------------------------------- # package sources for this program: