Creating representative datasets from ncdump output #10773
-
TLDR: Jump to the bottom I'm interested in Unidata metadata and conventions.
def from_xarray_dataset_dict(d) -> xr.Dataset:
"""Reconstruct a dataset with zero data from the output of ``xarray.Dataset.to_dict(data=False)``.
Useful in issues helping users debug their dataset metadata - sharing dataset schemas with associated metadata
without sharing the data itself.
Example
-------
>>> import xarray as xr
>>> from parcels._datasets.structured.generic import datasets
>>> ds = datasets['ds_2d_left']
>>> d = ds.to_dict(data=False)
>>> ds2 = from_xarray_dataset_dict(d)
"""
return xr.Dataset.from_dict(_fill_with_dummy_data(copy.deepcopy(d)))
def _fill_with_dummy_data(d: dict[str, dict]):
assert isinstance(d, dict)
if "dtype" in d:
d["data"] = np.zeros(d["shape"], dtype=d["dtype"])
del d["dtype"]
del d["shape"]
for k in d:
if isinstance(d[k], dict):
d[k] = _fill_with_dummy_data(d[k])
return d If a user then does def load_dataset() -> xr.Dataset:
sgrid_roms = xr.Dataset()
sgrid_roms["grid"] = xr.DataArray(
0,
attrs=dict(
cf_role="grid_topology",
topology_dimension=2,
node_dimensions="xi_psi eta_psi",
face_dimensions="xi_rho: xi_psi (padding: both) eta_rho: eta_psi (padding: both)",
edge1_dimensions="xi_u: xi_psi eta_u: eta_psi (padding: both)",
edge2_dimensions="xi_v: xi_psi (padding: both) eta_v: eta_psi",
node_coordinates="lon_psi lat_psi",
face_coordinates="lon_rho lat_rho",
edge1_coordinates="lon_u lat_u",
edge2_coordinates="lon_v lat_v",
vertical_dimensions="s_rho: s_w (padding: none)",
),
)
sgrid_roms["u"] = (("xi_u", "eta_u"), np.ones((2, 2)), {"grid": "grid"})
return sgrid_roms
load_dataset().to_dict(data=False) Then copy/pasting the (jsonified if you must) output in a GitHub issue: {'coords': {}, 'attrs': {}, 'dims': {'xi_u': 2, 'eta_u': 2}, 'data_vars': {'grid': {'dims': (), 'attrs': {'cf_role': 'grid_topology', 'topology_dimension': 2, 'node_dimensions': 'xi_psi eta_psi', 'face_dimensions': 'xi_rho: xi_psi (padding: both) eta_rho: eta_psi (padding: both)', 'edge1_dimensions': 'xi_u: xi_psi eta_u: eta_psi (padding: both)', 'edge2_dimensions': 'xi_v: xi_psi (padding: both) eta_v: eta_psi', 'node_coordinates': 'lon_psi lat_psi', 'face_coordinates': 'lon_rho lat_rho', 'edge1_coordinates': 'lon_u lat_u', 'edge2_coordinates': 'lon_v lat_v', 'vertical_dimensions': 's_rho: s_w (padding: none)'}, 'dtype': 'int64', 'shape': ()}, 'u': {'dims': ('xi_u', 'eta_u'), 'attrs': {'grid': 'grid'}, 'dtype': 'float64', 'shape': (2, 2)}}} We can do d = ...
ds = from_xarray_dataset_dict(d) This is great, but it requires the user to have access to the original dataset. Is there tooling that I can use to ingest ncdump output such as those listed on UGRID / SGRID 's websites into a representative xarray dataset? |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
Beta Was this translation helpful? Give feedback.
ncgen can be used to parse ncdump (i.e., CDL) output