Skip to content

Commit

Permalink
Merge pull request #20 from nasa/DAS-1414-netcdf-assets
Browse files Browse the repository at this point in the history
DAS-1414 - Check input granule is NetCDF-4 via media type or extension.
  • Loading branch information
owenlittlejohns authored Mar 9, 2022
2 parents b953024 + a5aea0e commit 91ffa73
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 3 deletions.
15 changes: 13 additions & 2 deletions harmony_netcdf_to_zarr/stac_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pystac import Asset, Catalog, Item


VALID_EXTENSIONS = ('.nc4', '.nc', '.h5', '.hdf5', '.hdf')
VALID_MEDIA_TYPES = ['application/x-hdf5', 'application/x-netcdf',
'application/x-netcdf4']

Expand All @@ -38,11 +39,21 @@ def get_item_url(item: Item) -> Optional[str]:
"""
return next((asset.href for asset in item.assets.values()
if 'data' in (asset.roles or [])
and asset.media_type in VALID_MEDIA_TYPES),
if 'data' in (asset.roles or []) and is_netcdf_asset(asset)),
None)


def is_netcdf_asset(asset: Asset) -> bool:
""" Check that a `pystac.Asset` is a valid NetCDF-4 granule. This can be
ascertained via either the media type or by checking the extension of
granule itself if that media type is absent.
"""
return (asset.media_type in VALID_MEDIA_TYPES
or (asset.media_type is None
and asset.href.lower().endswith(VALID_EXTENSIONS)))


def get_output_catalog(input_catalog: Catalog, zarr_root: str) -> Catalog:
""" Clone the input STAC catalog and add an item for the Zarr store output.
This item will need to have the correct spatial and temporal
Expand Down
32 changes: 31 additions & 1 deletion tests/unit/test_stac_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
get_output_bounding_box,
get_output_catalog,
get_output_date_range,
get_output_item)
get_output_item,
is_netcdf_asset)


class TestStacUtilities(TestCase):
Expand Down Expand Up @@ -271,3 +272,32 @@ def test_get_item_date_range(self):
with self.subTest('Start and end datetimes'):
self.assertTupleEqual(get_item_date_range(start_end_item),
(self.datetime_one, self.datetime_two))

def test_is_netcdf_asset(self):
""" Ensure that a NetCDF-4 asset can be correctly identified via either
the asset media type or the file extension. The check on the file
extension should handle both uppercase and lowercase.
"""
test_args = [['NetCDF-4 media type', 'application/x-netcdf4', '.h5'],
['NetCDF media type', 'application/x-netcdf', '.nc4'],
['HDF-5 media type', 'application/x-hdf5', '.h5'],
['.nc4 extension', None, '.nc4'],
['.nc extension', None, '.nc'],
['.h5 extension', None, '.h5'],
['.hdf5 extension', None, '.hdf5'],
['.hdf extension', None, '.hdf'],
['.HDF5 extension', None, '.HDF5']]

for description, media_type, extension in test_args:
with self.subTest(description):
test_asset = Asset(f'test{extension}', media_type=media_type)
self.assertTrue(is_netcdf_asset(test_asset))

bad_args = [['Bad media-type', 'application/tiff', '.tiff'],
['Missing media-type, bad extension', None, '.tiff']]

for description, media_type, extension in bad_args:
with self.subTest(description):
test_asset = Asset(f'test{extension}', media_type=media_type)
self.assertFalse(is_netcdf_asset(test_asset))

0 comments on commit 91ffa73

Please sign in to comment.