diff --git a/processor/reader.py b/processor/reader.py index e427fb7..fd28404 100644 --- a/processor/reader.py +++ b/processor/reader.py @@ -7,6 +7,16 @@ log = logging.getLogger() +# Conversion factors to microvolts (uV) +UNIT_TO_UV = { + "volts": 1e6, + "v": 1e6, + "millivolts": 1e3, + "mv": 1e3, + "microvolts": 1, + "uv": 1, +} + class NWBElectricalSeriesReader: """ @@ -223,7 +233,7 @@ def get_chunk(self, start=None, end=None): end: End sample index (default: num_samples) Returns: - list of numpy arrays, one per channel, with scaling applied + list of numpy arrays, one per channel, with scaling applied in uV """ # Single HDF5 read for all channels all_data = self.electrical_series.data[start:end, :] @@ -239,5 +249,21 @@ def get_chunk(self, start=None, end=None): else: scaled_data = all_data * base_scale + offset + # Convert volts to microvolts (uV) + # This processor fixes the TimeSeriesChannel unit to uV. + # NWB conversion factor outputs values in the unit specified by electrical_series.unit (fixed to 'volts'; see docstring below) + # From: https://github.com/NeurodataWithoutBorders/nwb-schema/blob/d65d42257003543c569ea7ac0cd6d7aee01c88d6/core/nwb.ecephys.yaml#L35-L42 + # - name: unit + # dtype: text + # value: volts + # doc: Base unit of measurement for working with the data. This value is fixed to + # 'volts'. Actual stored values are not necessarily stored in these units. To + # access the data in these units, multiply 'data' by 'conversion', followed by + # 'channel_conversion' (if present), and then add 'offset'. + unit = getattr(self.electrical_series, "unit", "volts").lower() + if unit not in UNIT_TO_UV: + raise ValueError(f"Unknown unit '{unit}' - expected one of: {list(UNIT_TO_UV.keys())}") + scaled_data = scaled_data * UNIT_TO_UV[unit] + # Split into list of per-channel arrays return [scaled_data[:, i] for i in range(self.num_channels)] diff --git a/processor/timeseries_channel.py b/processor/timeseries_channel.py index 62408fd..ff72fa1 100644 --- a/processor/timeseries_channel.py +++ b/processor/timeseries_channel.py @@ -1,4 +1,7 @@ class TimeSeriesChannel: + # Unit is always microvolts - data is converted to uV during processing + UNIT = "uV" + def __init__( self, index, @@ -7,7 +10,6 @@ def __init__( start, end, type="CONTINUOUS", - unit="uV", group="default", last_annotation=0, properties=None, @@ -27,7 +29,6 @@ def __init__( self.start = int(start) self.end = int(end) - self.unit = unit.strip() self.type = type.upper() self.group = group.strip() self.last_annotation = last_annotation @@ -38,7 +39,7 @@ def as_dict(self): "name": self.name, "start": self.start, "end": self.end, - "unit": self.unit, + "unit": self.UNIT, "rate": self.rate, "type": self.type, "group": self.group, @@ -53,11 +54,11 @@ def as_dict(self): @staticmethod def from_dict(channel, properties=None): + # Note: channel["unit"] is ignored - unit is always uV return TimeSeriesChannel( name=channel["name"], start=int(channel["start"]), end=int(channel["end"]), - unit=channel["unit"], rate=channel["rate"], type=channel.get("channelType", channel.get("type")), group=channel["group"], diff --git a/processor/writer.py b/processor/writer.py index 589d3ae..56cb688 100644 --- a/processor/writer.py +++ b/processor/writer.py @@ -89,8 +89,8 @@ def write_chunk(chunk, start_time, end_time, channel_index, output_dir): file_name = "channel-{}_{}_{}{}".format( "{index:05d}".format(index=channel_index), - int(start_time * 1e6), - int(end_time * 1e6), + round(start_time * 1e6), + round(end_time * 1e6), TIME_SERIES_BINARY_FILE_EXTENSION, ) file_path = os.path.join(output_dir, file_name) diff --git a/tests/conftest.py b/tests/conftest.py index 5a1410b..bcf5916 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,6 +94,7 @@ def mock_electrical_series(sample_electrical_series_data, sample_timestamps): series.conversion = 1.0 series.offset = 0.0 series.channel_conversion = None + series.unit = "volts" # Mock electrodes table mock_electrodes = [] diff --git a/tests/test_reader.py b/tests/test_reader.py index d49ca7b..8968eac 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -15,6 +15,7 @@ def create_mock_electrical_series( offset=0.0, channel_conversion=None, group_names=None, + unit="microvolts", ): """Helper to create mock ElectricalSeries objects.""" series = Mock() @@ -29,6 +30,7 @@ def create_mock_electrical_series( series.conversion = conversion series.offset = offset series.channel_conversion = channel_conversion + series.unit = unit # Create mock electrodes as a Mock object that can be iterated and has table attribute mock_electrode_list = [] @@ -307,3 +309,29 @@ def test_all_scaling_factors_combined(self): # Result: data * conversion * channel_conversion + offset np.testing.assert_array_equal(chunks[0], np.ones(10) * 7.0) # 1 * 2 * 3 + 1 = 7 np.testing.assert_array_equal(chunks[1], np.ones(10) * 9.0) # 1 * 2 * 4 + 1 = 9 + + def test_volts_to_microvolts_conversion(self): + """Test that data in volts is converted to microvolts.""" + series = create_mock_electrical_series(10, 2, rate=1000.0, unit="volts") + series.data = np.ones((10, 2)) * 1e-6 # 1 microvolt in volts + session_start = datetime(2023, 1, 1, 12, 0, 0) + + reader = NWBElectricalSeriesReader(series, session_start) + chunks = reader.get_chunk() + + # 1e-6 V * 1e6 = 1 uV + for chunk in chunks: + np.testing.assert_array_almost_equal(chunk, np.ones(10) * 1.0) + + def test_millivolts_to_microvolts_conversion(self): + """Test that data in millivolts is converted to microvolts.""" + series = create_mock_electrical_series(10, 2, rate=1000.0, unit="millivolts") + series.data = np.ones((10, 2)) * 0.001 # 1 microvolt in millivolts + session_start = datetime(2023, 1, 1, 12, 0, 0) + + reader = NWBElectricalSeriesReader(series, session_start) + chunks = reader.get_chunk() + + # 0.001 mV * 1e3 = 1 uV + for chunk in chunks: + np.testing.assert_array_almost_equal(chunk, np.ones(10) * 1.0) diff --git a/tests/test_timeseries_channel.py b/tests/test_timeseries_channel.py index 5a816b8..2ef7eb0 100644 --- a/tests/test_timeseries_channel.py +++ b/tests/test_timeseries_channel.py @@ -15,7 +15,7 @@ def test_basic_initialization(self): assert channel.start == 1000000 assert channel.end == 2000000 assert channel.type == "CONTINUOUS" - assert channel.unit == "uV" + assert TimeSeriesChannel.UNIT == "uV" assert channel.group == "default" assert channel.last_annotation == 0 assert channel.properties == [] @@ -30,7 +30,6 @@ def test_initialization_with_all_parameters(self): start=500000, end=1500000, type="UNIT", - unit=" mV ", group=" electrode_group ", last_annotation=100, properties=[{"key": "value"}], @@ -43,7 +42,7 @@ def test_initialization_with_all_parameters(self): assert channel.start == 500000 assert channel.end == 1500000 assert channel.type == "UNIT" # should be uppercased - assert channel.unit == "mV" # should be stripped + assert TimeSeriesChannel.UNIT == "uV" # unit is always uV assert channel.group == "electrode_group" # should be stripped assert channel.last_annotation == 100 assert channel.properties == [{"key": "value"}] @@ -124,7 +123,7 @@ def test_from_dict_with_type_key(self, sample_channel_dict): assert channel.name == "Channel 1" assert channel.start == 1000000 assert channel.end == 2000000 - assert channel.unit == "uV" + assert TimeSeriesChannel.UNIT == "uV" # unit is always uV assert channel.rate == 30000.0 assert channel.type == "CONTINUOUS" assert channel.group == "default" @@ -281,7 +280,6 @@ def test_as_dict_from_dict_round_trip(self): start=500000, end=1500000, type="UNIT", - unit="mV", group="test_group", last_annotation=50, properties=[{"key": "value"}], @@ -296,7 +294,7 @@ def test_as_dict_from_dict_round_trip(self): assert restored.start == original.start assert restored.end == original.end assert restored.type == original.type - assert restored.unit == original.unit + assert serialized["unit"] == "uV" # unit is always uV in serialized output assert restored.group == original.group assert restored.last_annotation == original.last_annotation assert restored.properties == original.properties diff --git a/tests/test_timeseries_client.py b/tests/test_timeseries_client.py index 6171431..3dac1bf 100644 --- a/tests/test_timeseries_client.py +++ b/tests/test_timeseries_client.py @@ -76,7 +76,7 @@ def test_create_channel_sends_correct_body(self, mock_session_manager): client = TimeSeriesClient("https://api.test.com", mock_session_manager) channel = TimeSeriesChannel( - index=0, name="Ch1", rate=1000.0, start=0, end=1000, type="UNIT", unit="mV", group="test_group" + index=0, name="Ch1", rate=1000.0, start=0, end=1000, type="UNIT", group="test_group" ) client.create_channel("pkg-123", channel) @@ -86,7 +86,7 @@ def test_create_channel_sends_correct_body(self, mock_session_manager): assert body["rate"] == 1000.0 assert body["channelType"] == "UNIT" # 'type' should be renamed to 'channelType' assert "type" not in body # Original 'type' key should be removed - assert body["unit"] == "mV" + assert body["unit"] == "uV" # unit is always uV assert body["group"] == "test_group" @responses.activate diff --git a/tests/test_writer.py b/tests/test_writer.py index 9578459..67c00f2 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -34,9 +34,9 @@ def test_write_chunk_creates_file(self, temp_output_dir): TimeSeriesChunkWriter.write_chunk(chunk, start_time, end_time, 0, temp_output_dir) - # Check file was created + # Check file was created (use round() to match writer behavior) expected_filename = ( - f"channel-00000_{int(start_time * 1e6)}_{int(end_time * 1e6)}{TIME_SERIES_BINARY_FILE_EXTENSION}" + f"channel-00000_{round(start_time * 1e6)}_{round(end_time * 1e6)}{TIME_SERIES_BINARY_FILE_EXTENSION}" ) file_path = os.path.join(temp_output_dir, expected_filename) assert os.path.exists(file_path) @@ -116,7 +116,7 @@ def test_write_channel_creates_metadata_file(self, temp_output_dir, session_star writer = TimeSeriesChunkWriter(session_start_time, temp_output_dir, 1000) channel = TimeSeriesChannel( - index=5, name="Test Channel", rate=30000.0, start=1000000, end=2000000, unit="mV", group="electrode_group" + index=5, name="Test Channel", rate=30000.0, start=1000000, end=2000000, group="electrode_group" ) writer.write_channel(channel) @@ -135,7 +135,6 @@ def test_write_channel_json_content(self, temp_output_dir, session_start_time): rate=30000.0, start=1000000, end=2000000, - unit="mV", type="CONTINUOUS", group="test_group", last_annotation=100, @@ -153,7 +152,7 @@ def test_write_channel_json_content(self, temp_output_dir, session_start_time): assert data["rate"] == 30000.0 assert data["start"] == 1000000 assert data["end"] == 2000000 - assert data["unit"] == "mV" + assert data["unit"] == "uV" # unit is always uV assert data["type"] == "CONTINUOUS" assert data["group"] == "test_group" assert data["lastAnnotation"] == 100