From 11bae32682358df09c16ee93726c8a627e9198e6 Mon Sep 17 00:00:00 2001 From: Martijn Govers Date: Wed, 4 Oct 2023 09:00:10 +0200 Subject: [PATCH] first attempt at going to pgm-1.6.x Signed-off-by: Martijn Govers --- pyproject.toml | 2 +- .../converters/pgm_json_converter.py | 69 +++++++++++-------- .../converters/test_pgm_json_converter.py | 6 +- tests/validation/utils.py | 25 +++++-- 4 files changed, 64 insertions(+), 38 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 040791f1..53ecb3a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "numpy>=1.20", "openpyxl", "pandas", - "power_grid_model>=1.4, <1.6", + "power_grid_model>=1.6", "pyyaml", "structlog", "tqdm", diff --git a/src/power_grid_model_io/converters/pgm_json_converter.py b/src/power_grid_model_io/converters/pgm_json_converter.py index ecca7c2c..10214f01 100644 --- a/src/power_grid_model_io/converters/pgm_json_converter.py +++ b/src/power_grid_model_io/converters/pgm_json_converter.py @@ -5,17 +5,16 @@ Power Grid Model 'Converter': Load and store power grid model data in the native PGM JSON format. """ +import json from pathlib import Path -from typing import Optional, Union, cast +from typing import Any, Dict, Optional, Tuple, Union, cast +import warnings import numpy as np from power_grid_model.data_types import BatchDataset, ComponentList, Dataset, SingleDataset, SinglePythonDataset -from power_grid_model.utils import ( - convert_batch_dataset_to_batch_list, - convert_list_to_batch_data, - initialize_array, - is_nan, -) +from power_grid_model import initialize_array +from power_grid_model.utils import json_deserialize, json_serialize +from power_grid_model._utils import is_nan from power_grid_model_io.converters.base_converter import BaseConverter from power_grid_model_io.data_stores.json_file_store import JsonFileStore @@ -65,14 +64,38 @@ def _parse_data(self, data: StructuredData, data_type: str, extra_info: Optional """ self._log.debug(f"Loading PGM {data_type} data") - if isinstance(data, list): - parsed_data = [ - self._parse_dataset(data=dataset, data_type=data_type, extra_info=extra_info) for dataset in data - ] - return convert_list_to_batch_data(parsed_data) - if not isinstance(data, dict): - raise TypeError("Raw data should be either a list or a dictionary!") - return self._parse_dataset(data=data, data_type=data_type, extra_info=extra_info) + + result = json_deserialize( + json.dumps( + { + "attributes": {}, + "data": data, + "is_batch": isinstance(data, list), + "type": data_type, + "version": "1.0", + } + ) + ) + + if extra_info is not None: + # extract extra info + reserialized = self._serialize_data(data=result, extra_info=None) + if len(data) != len(reserialized): + warnings.warn("The extra info cannot be determined.") + return {} + + extra_info: ExtraInfo = {} + + for entry in data: + entry_id = entry["id"] + reserialized_entry = next(filter(lambda x, desired=entry_id: x["id"] == desired, reserialized), None) + if reserialized_entry is None: + warnings.warn(f"The extra info cannot be determined for ID {entry_id}") + for key, value in entry.items(): + if key not in reserialized_entry: + extra_info[entry_id] = value + + return result def _parse_dataset( self, data: SinglePythonDataset, data_type: str, extra_info: Optional[ExtraInfo] @@ -161,21 +184,7 @@ def _serialize_data(self, data: Dataset, extra_info: Optional[ExtraInfo]) -> Str the function returns a structured dataset """ - # Check if the dataset is a single dataset or batch dataset - # It is batch dataset if it is 2D array or a indptr/data structure - - # If it is a batch, convert the batch data to a list of batches, then convert each batch individually. - if self._is_batch(data=data): - if extra_info is not None: - self._log.warning("Extra info is not supported for batch data export") - # We have established that this is batch data, so let's tell the type checker that this is a BatchDataset - data = cast(BatchDataset, data) - list_data = convert_batch_dataset_to_batch_list(data) - return [self._serialize_dataset(data=x) for x in list_data] - - # We have established that this is not batch data, so let's tell the type checker that this is a SingleDataset - data = cast(SingleDataset, data) - return self._serialize_dataset(data=data, extra_info=extra_info) + return json.loads(json_serialize(data))["data"] @staticmethod def _is_batch(data: Dataset) -> bool: diff --git a/tests/unit/converters/test_pgm_json_converter.py b/tests/unit/converters/test_pgm_json_converter.py index b47ab09f..9ff96749 100644 --- a/tests/unit/converters/test_pgm_json_converter.py +++ b/tests/unit/converters/test_pgm_json_converter.py @@ -6,6 +6,7 @@ import pytest from power_grid_model import initialize_array from power_grid_model.data_types import BatchDataset, SingleDataset +from power_grid_model.errors import PowerGridSerializationError from structlog.testing import capture_logs from power_grid_model_io.converters.pgm_json_converter import PgmJsonConverter @@ -60,7 +61,7 @@ def pgm_sparse_batch_data(): def test_parse_data(converter: PgmJsonConverter, structured_input_data, structured_batch_data): - with pytest.raises(TypeError, match="Raw data should be either a list or a dictionary!"): + with pytest.raises(PowerGridSerializationError, match="Wrong data type for key data in the root level dictionary!"): converter._parse_data(data="str", data_type="input", extra_info=None) # type: ignore # test for input dataset @@ -118,7 +119,8 @@ def test_serialize_data(converter: PgmJsonConverter, pgm_input_data: SingleDatas with capture_logs() as cap_log: structured_batch_data = converter._serialize_data(data=pgm_batch_data, extra_info={}) assert structured_batch_data == [{"line": [{}, {}]}, {"line": [{}, {}]}, {"line": [{}, {}]}] - assert_log_match(cap_log[0], "warning", "Extra info is not supported for batch data export") + # TODO(mgovers): re-add extra info + # assert_log_match(cap_log[0], "warning", "Extra info is not supported for batch data export") def test_is_batch( diff --git a/tests/validation/utils.py b/tests/validation/utils.py index f85c36b7..f736e81f 100644 --- a/tests/validation/utils.py +++ b/tests/validation/utils.py @@ -6,12 +6,14 @@ from functools import lru_cache from pathlib import Path from typing import Generator, List, Mapping, Tuple +import warnings import numpy as np import pandas as pd from power_grid_model import power_grid_meta_data from power_grid_model.data_types import SingleDataset, SinglePythonDataset -from power_grid_model.utils import convert_python_single_dataset_to_single_dataset +from power_grid_model.errors import PowerGridSerializationError +from power_grid_model.utils import import_json_data, json_deserialize_from_file from power_grid_model_io.data_types import ExtraInfo, StructuredData @@ -172,10 +174,23 @@ def load_json_single_dataset(file_path: Path, data_type: str) -> Tuple[SingleDat Returns: A native pgm dataset and an extra info lookup table """ - raw_data = load_json_file(file_path) - assert isinstance(raw_data, dict) - dataset = convert_python_single_dataset_to_single_dataset(data=raw_data, data_type=data_type, ignore_extra=True) - extra_info = extract_extra_info(raw_data, data_type=data_type) + try: + dataset = json_deserialize_from_file(file_path=file_path) + except PowerGridSerializationError as actual_error: + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + dataset = import_json_data(json_file=file_path, data_type=data_type, ignore_extra=True) + + warnings.warn( + DeprecationWarning, + "Provided file path is in a deprecated format. This is a temporary backwards-compatibility measure. " + "Please upgrade to use_deprecated_format=False or json_serialize_to_file as soon as possible.", + ) + except PowerGridSerializationError as fallback_error: + raise actual_error + + extra_info = extract_extra_info(file_path, data_type=data_type) return dataset, extra_info