Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/power_grid_model_io/data_stores/excel_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,18 @@ def load(self) -> TabularData:

def lazy_sheet_loader(xls_file: pd.ExcelFile, xls_sheet_name: str):
def sheet_loader():
sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows)
preview = xls_file.parse(xls_sheet_name, header=self._header_rows, nrows=0)
columns = list(preview.columns)
dtype = {}
for col in columns:
if "Name" in str(col) or (isinstance(col, tuple) and ("Name" in str(col[0]))):
dtype[col] = str
sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows, dtype=dtype)
sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data)
sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=xls_sheet_name)
sheet_data = self._process_uuid_columns(data=sheet_data, sheet_name=xls_sheet_name)
sheet_data = self._update_column_names(data=sheet_data)

return sheet_data

return sheet_loader
Expand Down
56 changes: 55 additions & 1 deletion tests/unit/data_stores/test_vision_excel_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from pathlib import Path
from unittest.mock import MagicMock, mock_open, patch

import pandas as pd

from power_grid_model_io.data_stores.vision_excel_file_store import VisionExcelFileStore


Expand All @@ -19,4 +21,56 @@ def test_header_rows(mock_excel_file: MagicMock):
data["foo"]

# Assert
mock_excel_file.return_value.parse.assert_called_once_with("foo", header=[0, 1])
assert mock_excel_file.return_value.parse.call_count == 2


@patch("power_grid_model_io.data_stores.excel_file_store.pd.ExcelFile")
@patch("power_grid_model_io.data_stores.excel_file_store.Path.open", mock_open())
def test_name_column_dtype_conversion(mock_excel_file: MagicMock):
store = VisionExcelFileStore(file_path=Path("dummy.xlsx"))
mock_excel_file.return_value.sheet_names = ["test_sheet"]

preview_df = pd.DataFrame(columns=["Mock.Name", "Other.Column", "ID"])

def mock_parse(*args, **kwargs):
if kwargs.get("nrows") == 0:
return preview_df
else:
actual_data = {
"Mock.Name": [12345678900000000000, 987.654],
"Other.Column": ["value1", "value2"],
"ID": [1, 2],
"ratio": [0.1, 0.2],
}
df = pd.DataFrame(actual_data)

if "dtype" in kwargs:
for col, dtype_val in kwargs["dtype"].items():
if col in df.columns and dtype_val is str:
df[col] = df[col].apply(lambda x: str(int(x)) if float(x).is_integer() else str(x))

return df

mock_excel_file.return_value.parse.side_effect = mock_parse

data = store.load()
result_df = data["test_sheet"]

assert mock_excel_file.return_value.parse.call_count == 2

first_call = mock_excel_file.return_value.parse.call_args_list[0]
assert first_call[1]["nrows"] == 0

second_call = mock_excel_file.return_value.parse.call_args_list[1]
assert "dtype" in second_call[1]
assert "Mock.Name" in second_call[1]["dtype"]
assert second_call[1]["dtype"]["Mock.Name"] is str

assert result_df["Mock.Name"][0] == "12345678900000000000" # Long int as string
assert result_df["Mock.Name"][1] == "987.654" # Float as string
assert result_df["Other.Column"][0] == "value1"
assert result_df["Other.Column"][1] == "value2"
assert result_df["ID"][0] == 1
assert result_df["ID"][1] == 2
assert result_df["ratio"][0] == 0.1
assert result_df["ratio"][1] == 0.2
Loading