Skip to content

Commit

Permalink
first attempt at going to pgm-1.6.x
Browse files Browse the repository at this point in the history
Signed-off-by: Martijn Govers <[email protected]>
  • Loading branch information
mgovers committed Oct 4, 2023
1 parent 4245735 commit 11bae32
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 38 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies = [
"numpy>=1.20",
"openpyxl",
"pandas",
"power_grid_model>=1.4, <1.6",
"power_grid_model>=1.6",
"pyyaml",
"structlog",
"tqdm",
Expand Down
69 changes: 39 additions & 30 deletions src/power_grid_model_io/converters/pgm_json_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,16 @@
Power Grid Model 'Converter': Load and store power grid model data in the native PGM JSON format.
"""

import json
from pathlib import Path
from typing import Optional, Union, cast
from typing import Any, Dict, Optional, Tuple, Union, cast
import warnings

import numpy as np
from power_grid_model.data_types import BatchDataset, ComponentList, Dataset, SingleDataset, SinglePythonDataset
from power_grid_model.utils import (
convert_batch_dataset_to_batch_list,
convert_list_to_batch_data,
initialize_array,
is_nan,
)
from power_grid_model import initialize_array
from power_grid_model.utils import json_deserialize, json_serialize
from power_grid_model._utils import is_nan

from power_grid_model_io.converters.base_converter import BaseConverter
from power_grid_model_io.data_stores.json_file_store import JsonFileStore
Expand Down Expand Up @@ -65,14 +64,38 @@ def _parse_data(self, data: StructuredData, data_type: str, extra_info: Optional
"""
self._log.debug(f"Loading PGM {data_type} data")
if isinstance(data, list):
parsed_data = [
self._parse_dataset(data=dataset, data_type=data_type, extra_info=extra_info) for dataset in data
]
return convert_list_to_batch_data(parsed_data)
if not isinstance(data, dict):
raise TypeError("Raw data should be either a list or a dictionary!")
return self._parse_dataset(data=data, data_type=data_type, extra_info=extra_info)

result = json_deserialize(
json.dumps(
{
"attributes": {},
"data": data,
"is_batch": isinstance(data, list),
"type": data_type,
"version": "1.0",
}
)
)

if extra_info is not None:
# extract extra info
reserialized = self._serialize_data(data=result, extra_info=None)
if len(data) != len(reserialized):
warnings.warn("The extra info cannot be determined.")
return {}

extra_info: ExtraInfo = {}

for entry in data:
entry_id = entry["id"]
reserialized_entry = next(filter(lambda x, desired=entry_id: x["id"] == desired, reserialized), None)
if reserialized_entry is None:
warnings.warn(f"The extra info cannot be determined for ID {entry_id}")
for key, value in entry.items():
if key not in reserialized_entry:
extra_info[entry_id] = value

return result

def _parse_dataset(
self, data: SinglePythonDataset, data_type: str, extra_info: Optional[ExtraInfo]
Expand Down Expand Up @@ -161,21 +184,7 @@ def _serialize_data(self, data: Dataset, extra_info: Optional[ExtraInfo]) -> Str
the function returns a structured dataset
"""
# Check if the dataset is a single dataset or batch dataset
# It is batch dataset if it is 2D array or a indptr/data structure

# If it is a batch, convert the batch data to a list of batches, then convert each batch individually.
if self._is_batch(data=data):
if extra_info is not None:
self._log.warning("Extra info is not supported for batch data export")
# We have established that this is batch data, so let's tell the type checker that this is a BatchDataset
data = cast(BatchDataset, data)
list_data = convert_batch_dataset_to_batch_list(data)
return [self._serialize_dataset(data=x) for x in list_data]

# We have established that this is not batch data, so let's tell the type checker that this is a SingleDataset
data = cast(SingleDataset, data)
return self._serialize_dataset(data=data, extra_info=extra_info)
return json.loads(json_serialize(data))["data"]

@staticmethod
def _is_batch(data: Dataset) -> bool:
Expand Down
6 changes: 4 additions & 2 deletions tests/unit/converters/test_pgm_json_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest
from power_grid_model import initialize_array
from power_grid_model.data_types import BatchDataset, SingleDataset
from power_grid_model.errors import PowerGridSerializationError
from structlog.testing import capture_logs

from power_grid_model_io.converters.pgm_json_converter import PgmJsonConverter
Expand Down Expand Up @@ -60,7 +61,7 @@ def pgm_sparse_batch_data():


def test_parse_data(converter: PgmJsonConverter, structured_input_data, structured_batch_data):
with pytest.raises(TypeError, match="Raw data should be either a list or a dictionary!"):
with pytest.raises(PowerGridSerializationError, match="Wrong data type for key data in the root level dictionary!"):
converter._parse_data(data="str", data_type="input", extra_info=None) # type: ignore

# test for input dataset
Expand Down Expand Up @@ -118,7 +119,8 @@ def test_serialize_data(converter: PgmJsonConverter, pgm_input_data: SingleDatas
with capture_logs() as cap_log:
structured_batch_data = converter._serialize_data(data=pgm_batch_data, extra_info={})
assert structured_batch_data == [{"line": [{}, {}]}, {"line": [{}, {}]}, {"line": [{}, {}]}]
assert_log_match(cap_log[0], "warning", "Extra info is not supported for batch data export")
# TODO(mgovers): re-add extra info
# assert_log_match(cap_log[0], "warning", "Extra info is not supported for batch data export")


def test_is_batch(
Expand Down
25 changes: 20 additions & 5 deletions tests/validation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from functools import lru_cache
from pathlib import Path
from typing import Generator, List, Mapping, Tuple
import warnings

import numpy as np
import pandas as pd
from power_grid_model import power_grid_meta_data
from power_grid_model.data_types import SingleDataset, SinglePythonDataset
from power_grid_model.utils import convert_python_single_dataset_to_single_dataset
from power_grid_model.errors import PowerGridSerializationError
from power_grid_model.utils import import_json_data, json_deserialize_from_file

from power_grid_model_io.data_types import ExtraInfo, StructuredData

Expand Down Expand Up @@ -172,10 +174,23 @@ def load_json_single_dataset(file_path: Path, data_type: str) -> Tuple[SingleDat
Returns: A native pgm dataset and an extra info lookup table
"""
raw_data = load_json_file(file_path)
assert isinstance(raw_data, dict)
dataset = convert_python_single_dataset_to_single_dataset(data=raw_data, data_type=data_type, ignore_extra=True)
extra_info = extract_extra_info(raw_data, data_type=data_type)
try:
dataset = json_deserialize_from_file(file_path=file_path)
except PowerGridSerializationError as actual_error:
try:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
dataset = import_json_data(json_file=file_path, data_type=data_type, ignore_extra=True)

warnings.warn(
DeprecationWarning,
"Provided file path is in a deprecated format. This is a temporary backwards-compatibility measure. "
"Please upgrade to use_deprecated_format=False or json_serialize_to_file as soon as possible.",
)
except PowerGridSerializationError as fallback_error:
raise actual_error

extra_info = extract_extra_info(file_path, data_type=data_type)
return dataset, extra_info


Expand Down

0 comments on commit 11bae32

Please sign in to comment.