diff --git a/src/power_grid_model_io/data_stores/excel_file_store.py b/src/power_grid_model_io/data_stores/excel_file_store.py index 5e2daf2b..86e9b0b7 100644 --- a/src/power_grid_model_io/data_stores/excel_file_store.py +++ b/src/power_grid_model_io/data_stores/excel_file_store.py @@ -88,11 +88,18 @@ def load(self) -> TabularData: def lazy_sheet_loader(xls_file: pd.ExcelFile, xls_sheet_name: str): def sheet_loader(): - sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows) + preview = xls_file.parse(xls_sheet_name, header=self._header_rows, nrows=0) + columns = list(preview.columns) + dtype = {} + for col in columns: + if "Name" in str(col) or (isinstance(col, tuple) and ("Name" in str(col[0]))): + dtype[col] = str + sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows, dtype=dtype) sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data) sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=xls_sheet_name) sheet_data = self._process_uuid_columns(data=sheet_data, sheet_name=xls_sheet_name) sheet_data = self._update_column_names(data=sheet_data) + return sheet_data return sheet_loader diff --git a/tests/unit/data_stores/test_vision_excel_file_store.py b/tests/unit/data_stores/test_vision_excel_file_store.py index cdb3ddc6..ba08cc7c 100644 --- a/tests/unit/data_stores/test_vision_excel_file_store.py +++ b/tests/unit/data_stores/test_vision_excel_file_store.py @@ -4,6 +4,8 @@ from pathlib import Path from unittest.mock import MagicMock, mock_open, patch +import pandas as pd + from power_grid_model_io.data_stores.vision_excel_file_store import VisionExcelFileStore @@ -19,4 +21,56 @@ def test_header_rows(mock_excel_file: MagicMock): data["foo"] # Assert - mock_excel_file.return_value.parse.assert_called_once_with("foo", header=[0, 1]) + assert mock_excel_file.return_value.parse.call_count == 2 + + +@patch("power_grid_model_io.data_stores.excel_file_store.pd.ExcelFile") +@patch("power_grid_model_io.data_stores.excel_file_store.Path.open", mock_open()) +def test_name_column_dtype_conversion(mock_excel_file: MagicMock): + store = VisionExcelFileStore(file_path=Path("dummy.xlsx")) + mock_excel_file.return_value.sheet_names = ["test_sheet"] + + preview_df = pd.DataFrame(columns=["Mock.Name", "Other.Column", "ID"]) + + def mock_parse(*args, **kwargs): + if kwargs.get("nrows") == 0: + return preview_df + else: + actual_data = { + "Mock.Name": [12345678900000000000, 987.654], + "Other.Column": ["value1", "value2"], + "ID": [1, 2], + "ratio": [0.1, 0.2], + } + df = pd.DataFrame(actual_data) + + if "dtype" in kwargs: + for col, dtype_val in kwargs["dtype"].items(): + if col in df.columns and dtype_val is str: + df[col] = df[col].apply(lambda x: str(int(x)) if float(x).is_integer() else str(x)) + + return df + + mock_excel_file.return_value.parse.side_effect = mock_parse + + data = store.load() + result_df = data["test_sheet"] + + assert mock_excel_file.return_value.parse.call_count == 2 + + first_call = mock_excel_file.return_value.parse.call_args_list[0] + assert first_call[1]["nrows"] == 0 + + second_call = mock_excel_file.return_value.parse.call_args_list[1] + assert "dtype" in second_call[1] + assert "Mock.Name" in second_call[1]["dtype"] + assert second_call[1]["dtype"]["Mock.Name"] is str + + assert result_df["Mock.Name"][0] == "12345678900000000000" # Long int as string + assert result_df["Mock.Name"][1] == "987.654" # Float as string + assert result_df["Other.Column"][0] == "value1" + assert result_df["Other.Column"][1] == "value2" + assert result_df["ID"][0] == 1 + assert result_df["ID"][1] == 2 + assert result_df["ratio"][0] == 0.1 + assert result_df["ratio"][1] == 0.2