Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/nwbinspector/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
check_order_of_images_unique,
)
from ._nwb_containers import (
check_dataset_not_empty,
check_empty_string_for_optional_attribute,
check_large_dataset_compression,
check_small_dataset_compression,
Expand Down Expand Up @@ -103,6 +104,7 @@
"check_order_of_images_unique",
"check_order_of_images_len",
"check_index_series_points_to_image",
"check_dataset_not_empty",
"check_empty_string_for_optional_attribute",
"check_small_dataset_compression",
"check_large_dataset_compression",
Expand Down
33 changes: 33 additions & 0 deletions src/nwbinspector/checks/_nwb_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,39 @@ def check_large_dataset_compression(
return None


@register_check(importance=Importance.CRITICAL, neurodata_type=NWBContainer)
Copy link
Contributor

@stephprince stephprince Jul 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure yet whether this should be a critical check or a best practice violation. There was a discussion here about whether empty datasets should be allowed: NeurodataWithoutBorders/pynwb#2065.

def check_dataset_not_empty(nwb_container: NWBContainer) -> Optional[Iterable[InspectorMessage]]:
"""
Check if any datasets in the container are empty (have zero elements).

Empty datasets can cause issues with analysis and visualization tools, and generally indicate
missing or incomplete data.

Parameters
----------
nwb_container: NWBContainer
The NWB container to check for empty datasets.

Returns
-------
Optional[Iterable[InspectorMessage]]
Inspector messages for each empty dataset found, or None if no empty datasets are found.
"""
for field_name, field in getattr(nwb_container, "fields", dict()).items():
if not isinstance(field, (h5py.Dataset, zarr.Array)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we may also want to check for other empty array-like objects here, including numpy arrays and DataIO objects. Could use something like:

Suggested change
if not isinstance(field, (h5py.Dataset, zarr.Array)):
is_array_data = hasattr(value, "shape") and hasattr(value, "dtype")
if not is_array_data:

continue

# Check if the dataset has zero elements
if field.size == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if it will be necessary but for the DataIO objects you may need to use get_data_shape from the nwbinspector.utils module.

yield InspectorMessage(
severity=Severity.HIGH,
message=f"The dataset '{os.path.split(field.name)[1]}' is empty (has zero elements). "
f"Datasets should contain data.",
)

return None


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=NWBContainer)
def check_small_dataset_compression(
nwb_container: NWBContainer,
Expand Down