diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index e3010690e..8c0de1d93 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -128,7 +128,7 @@ def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str) # Then read the header self._skip_rows(fp, config_format.skip_rows_before_header) reader = csv.reader(fp, dialect=dialect_name) # type: ignore - headers = list(next(reader)) + headers = [header.strip() for header in next(reader)] fp.seek(0) return headers @@ -209,7 +209,7 @@ async def infer_schema( failure_type=FailureType.config_error, ) schema = { - header.strip(): {"type": type_inferred.infer()} + header: {"type": type_inferred.infer()} for header, type_inferred in type_inferrer_by_field.items() } data_generator.close() diff --git a/unit_tests/sources/file_based/file_types/test_csv_parser.py b/unit_tests/sources/file_based/file_types/test_csv_parser.py index 0b49dd66d..7470b6e33 100644 --- a/unit_tests/sources/file_based/file_types/test_csv_parser.py +++ b/unit_tests/sources/file_based/file_types/test_csv_parser.py @@ -658,6 +658,20 @@ def test_read_data_with_encoding_error(self) -> None: assert "encoding" in ate.value.message assert self._csv_reader._get_headers.called + def test_read_data_strips_leading_and_trailing_whitespace_in_header(self) -> None: + self._stream_reader.open_file.return_value = ( + CsvFileBuilder() + .with_data( + [ + "header1 ,\theader2", + "1,2", + ] + ) + .build() + ) + data_generator = self._read_data() + assert list(data_generator) == [{"header1": "1", "header2": "2"}] + def _read_data(self) -> Generator[Dict[str, str], None, None]: data_generator = self._csv_reader.read_data( self._config,