diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/_module_source_parser.py b/cognite_toolkit/_cdf_tk/commands/build_v2/_module_source_parser.py index f340778833..b1674beaea 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/_module_source_parser.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/_module_source_parser.py @@ -1,4 +1,6 @@ +from collections import defaultdict from collections.abc import Iterable +from itertools import groupby from pathlib import Path from typing import Any, cast @@ -11,7 +13,7 @@ RelativeFilePath, ) from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._module import BuildVariable -from cognite_toolkit._cdf_tk.constants import EXCL_FILES +from cognite_toolkit._cdf_tk.constants import EXCL_FILES, MODULES from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA, ResourceTypes @@ -36,6 +38,14 @@ def parse(self, yaml_files: list[RelativeFilePath], variables: dict[str, Any]) - if errors: self.errors.extend(errors) return [] + return self._create_module_soruces(selected_modules, source_by_module_id, build_variables) + + def _create_module_soruces( + self, + selected_modules: list[Path], + source_by_module_id: dict[Path, ModuleSource], + build_variables: dict[Path, list[BuildVariable]], + ) -> list[ModuleSource]: module_sources: list[ModuleSource] = [] for module in selected_modules: source = source_by_module_id[module] @@ -110,10 +120,86 @@ def _select_modules( ] @classmethod - def _parse_variables( + def _parse_module_variables( cls, variables: dict[str, Any], available_modules: set[RelativeDirPath], selected_modules: set[RelativeDirPath], - ) -> tuple[dict[RelativeDirPath, list[list[BuildVariable]]], list[ModelSyntaxError]]: - return {}, [] + ) -> tuple[dict[RelativeDirPath, dict[int, list[BuildVariable]]], list[ModelSyntaxError]]: + all_available_paths = ( + {Path("")} | available_modules | {parent for module in available_modules for parent in module.parents} + ) + selected_paths = ( + {Path("")} | selected_modules | {parent for module in selected_modules for parent in module.parents} + ) + parsed_variables, errors = cls._parse_variables(variables, all_available_paths, selected_paths) + variable_by_module = cls._organize_variables_by_module(parsed_variables, selected_modules) + return variable_by_module, errors + + @classmethod + def _parse_variables( + cls, variables: dict[str, Any], available_paths: set[RelativeDirPath], selected_paths: set[RelativeDirPath] + ) -> tuple[dict[RelativeDirPath, list[BuildVariable]], list[ModelSyntaxError]]: + variables_by_path: dict[RelativeDirPath, list[BuildVariable]] = defaultdict(list) + errors: list[ModelSyntaxError] = [] + to_check: list[tuple[RelativeDirPath, int | None, dict[str, Any]]] = [(Path(""), None, variables)] + while to_check: + path, iteration, subdict = to_check.pop() + for key, value in subdict.items(): + subpath = path / key + if isinstance(value, str | float | int | bool): + variables_by_path[path].append( + BuildVariable(id=subpath, value=value, is_selected=path in selected_paths, iteration=iteration) + ) + elif isinstance(value, dict): + if subpath in available_paths: + to_check.append((subpath, iteration, value)) + else: + errors.append( + ModelSyntaxError( + code=cls.VARIABLE_ERROR_CODE, + message=f"Invalid variable path: {'.'.join(subpath.parts)}. This does not correspond to the " + f"folder structure inside the {MODULES} directory.", + fix="Ensure that the variable paths correspond to the folder structure inside the modules directory.", + ) + ) + elif isinstance(value, list): + if all(isinstance(item, str | float | int | bool) for item in value): + variables_by_path[path].append( + BuildVariable( + id=subpath, value=value, is_selected=path in selected_paths, iteration=iteration + ) + ) + elif all(isinstance(item, dict) for item in value): + for idx, item in enumerate(value, start=1): + to_check.append((subpath, idx, item)) + else: + errors.append( + ModelSyntaxError( + code=cls.VARIABLE_ERROR_CODE, + message=f"Invalid variable type in list for variable {'.'.join(subpath.parts)}.", + fix="Ensure that all items in the list are of the same supported type either (str, int, float, bool) or dict.", + ) + ) + else: + raise NotImplementedError(f"Unsupported variable type: {type(value)} for variable {subpath}") + return variables_by_path, errors + + @classmethod + def _organize_variables_by_module( + cls, variables_by_path: dict[RelativeDirPath, list[BuildVariable]], selected_modules: set[RelativeDirPath] + ) -> dict[RelativeDirPath, dict[int, list[BuildVariable]]]: + module_path_by_relative_paths: dict[frozenset[RelativeDirPath], RelativeDirPath] = { + frozenset([module, *list(module.parents)]): module for module in selected_modules + } + variables_by_module: dict[RelativeDirPath, dict[int, list[BuildVariable]]] = defaultdict( + lambda: defaultdict(list) + ) + for variable_path, variables in variables_by_path.items(): + for module_paths, module in module_path_by_relative_paths.items(): + if variable_path in module_paths: + for iteration, variable in groupby( + sorted(variables, key=lambda v: v.iteration or 0), key=lambda v: v.iteration or 0 + ): + variables_by_module[module][iteration or 0].extend(variable) + return dict(variables_by_module) diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_v2.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_v2.py index 91265a5090..6897cd7881 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_v2.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/build_v2.py @@ -1,7 +1,7 @@ import os import sys from collections import defaultdict -from collections.abc import Iterable +from collections.abc import Iterable, Sequence from pathlib import Path from pydantic import JsonValue, ValidationError @@ -13,9 +13,9 @@ from cognite_toolkit._cdf_tk.commands._base import ToolkitCommand from cognite_toolkit._cdf_tk.commands.build_v2._module_source_parser import ModuleSourceParser from cognite_toolkit._cdf_tk.commands.build_v2.data_classes import ( - BuildFiles, BuildFolder, BuildParameters, + BuildSourceFiles, BuiltModule, ConfigYAML, InsightList, @@ -134,11 +134,8 @@ def _create_suggested_command(cls, display_path: Path, user_args: list[str]) -> suggestion.append(f"-o {display_path}") return f"'{' '.join(suggestion)}'" - def _parse_module_sources(self, build: BuildFiles) -> list[ModuleSource]: - parser = ModuleSourceParser( - build.selected_modules, - build.organization_dir, - ) + def _parse_module_sources(self, build: BuildSourceFiles) -> list[ModuleSource]: + parser = ModuleSourceParser(build.selected_modules, build.organization_dir) module_sources = parser.parse(build.yaml_files, build.variables) if parser.errors: # Todo: Nicer way of formatting errors. @@ -148,7 +145,7 @@ def _parse_module_sources(self, build: BuildFiles) -> list[ModuleSource]: return module_sources @classmethod - def _read_file_system(cls, parameters: BuildParameters) -> BuildFiles: + def _read_file_system(cls, parameters: BuildParameters) -> BuildSourceFiles: """Reads the file system to find the YAML files to build along with config..yaml if it exists.""" selected: set[RelativeDirPath | str] = { parameters.modules_directory.relative_to(parameters.organization_dir) @@ -184,7 +181,7 @@ def _read_file_system(cls, parameters: BuildParameters) -> BuildFiles: yaml_file.relative_to(parameters.organization_dir) for yaml_file in parameters.modules_directory.rglob("*.y*ml") ] - return BuildFiles( + return BuildSourceFiles( yaml_files=yaml_files, selected_modules=selected, variables=variables, @@ -227,7 +224,7 @@ def _parse_user_selection( return selected, errors def _build_modules( - self, module_sources: Iterable[ModuleSource], build_dir: Path, max_workers: int = 1 + self, module_sources: Sequence[ModuleSource], build_dir: Path, max_workers: int = 1 ) -> BuildFolder: folder: BuildFolder = BuildFolder(path=build_dir) diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/__init__.py b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/__init__.py index 6373853bd7..36e951f948 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/__init__.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/__init__.py @@ -1,14 +1,15 @@ -from ._build import BuildFiles, BuildFolder, BuildParameters, BuiltModule +from ._build import BuildFolder, BuildParameters, BuildSourceFiles, BuiltModule from ._config import ConfigYAML from ._insights import ConsistencyError, Insight, InsightList, ModelSyntaxError, Recommendation -from ._module import Module, ModuleSource, ResourceType +from ._module import BuildVariable, Module, ModuleSource, ResourceType from ._types import AbsoluteDirPath, RelativeDirPath, RelativeFilePath, ValidationType __all__ = [ "AbsoluteDirPath", - "BuildFiles", "BuildFolder", "BuildParameters", + "BuildSourceFiles", + "BuildVariable", "BuiltModule", "ConfigYAML", "ConsistencyError", diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_build.py b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_build.py index 121958afa8..127fee2d2d 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_build.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_build.py @@ -29,7 +29,7 @@ def modules_directory(self) -> Path: return self.organization_dir / MODULES -class BuildFiles(BaseModel): +class BuildSourceFiles(BaseModel): """Intermediate format used when parsing modules""" yaml_files: list[RelativeFilePath] = Field( diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_module.py b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_module.py index 7a31a1a2e5..c8085bb684 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_module.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_module.py @@ -7,7 +7,15 @@ from ._types import AbsoluteFilePath, RelativeDirPath -class BuildVariable(BaseModel): ... +class BuildVariable(BaseModel): + id: RelativeDirPath + value: str | bool | int | float | list[str | bool | int | float] + is_selected: bool + iteration: int | None = None + + @property + def name(self) -> str: + return self.id.name class ModuleSource(BaseModel): diff --git a/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_command.py b/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_command.py index efc7bbf0eb..db60df957a 100644 --- a/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_command.py +++ b/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_command.py @@ -281,8 +281,8 @@ def test_happy_path(self, tmp_path: Path) -> None: config_yaml_name="dev", user_selected_modules=["module1", "module2"], ) - parse_input = BuildV2Command._read_file_system(parameters) - assert parse_input.model_dump() == { + build_files = BuildV2Command._read_file_system(parameters) + assert build_files.model_dump() == { "yaml_files": [resource_file.relative_to(tmp_path)], # Since user_selected_modules are provided, they should be used instead of config selected modules. "selected_modules": {"module1", "module2"}, diff --git a/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_module_source_parser.py b/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_module_source_parser.py index c9ebd2226e..0739c42450 100644 --- a/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_module_source_parser.py +++ b/tests/test_unit/test_cdf_tk/test_commands/test_buildv2/test_module_source_parser.py @@ -1,8 +1,10 @@ from pathlib import Path +from typing import Any import pytest from cognite_toolkit._cdf_tk.commands.build_v2._module_source_parser import ModuleSourceParser +from cognite_toolkit._cdf_tk.commands.build_v2.data_classes import BuildVariable from cognite_toolkit._cdf_tk.constants import DEFAULT_CONFIG_FILE @@ -70,3 +72,175 @@ def test_find_modules( actual_orphans = [file.as_posix() for file in orphans] assert actual_modules == expected_modules assert actual_orphans == expected_orphans + + @pytest.mark.parametrize( + "variables, available_paths, selected_paths, expected_variables, error_messages", + [ + pytest.param( + {"var1": "value1", "var2": "value2", "modules": {"moduleB": {"var3": "value3"}}}, + {"", "modules", "modules/moduleA", "modules/moduleB"}, + {"", "modules", "modules/moduleA"}, + { + ".": [ + BuildVariable(id=Path("var1"), value="value1", is_selected=True, iteration=None), + BuildVariable(id=Path("var2"), value="value2", is_selected=True, iteration=None), + ], + "modules/moduleB": [ + BuildVariable( + id=Path("modules/moduleB/var3"), value="value3", is_selected=False, iteration=None + ) + ], + }, + [], + id="Simple string variables at root level", + ), + pytest.param( + {"modules": {"moduleA": {"var1": "value1"}}}, + {"", "modules", "modules/moduleA"}, + {"", "modules", "modules/moduleA"}, + { + "modules/moduleA": [ + BuildVariable(id=Path("modules/moduleA/var1"), value="value1", is_selected=True, iteration=None) + ], + }, + [], + id="Nested variables in module path", + ), + pytest.param( + {"modules": {"nonexistent": {"var1": "value1"}}}, + {"", "modules", "modules/moduleA"}, + {"", "modules", "modules/moduleA"}, + {}, + [ + "Invalid variable path: modules.nonexistent. This does not correspond to the " + "folder structure inside the modules directory." + ], + id="Invalid nested path returns error", + ), + pytest.param( + {"list_var": ["a", "b", "c"]}, + {""}, + {""}, + { + ".": [BuildVariable(id=Path("list_var"), value=["a", "b", "c"], is_selected=True, iteration=None)], + }, + [], + id="List of strings as single variable", + ), + pytest.param( + {"modules": {"moduleA": [{"var1": "a"}, {"var1": "b"}]}}, + {"", "modules", "modules/moduleA"}, + {"", "modules", "modules/moduleA"}, + { + "modules/moduleA": [ + BuildVariable(id=Path("modules/moduleA/var1"), value="b", is_selected=True, iteration=2), + BuildVariable(id=Path("modules/moduleA/var1"), value="a", is_selected=True, iteration=1), + ], + }, + [], + id="List of dicts creates iterations", + ), + pytest.param( + {"mixed_list": ["a", {"key": "value"}]}, + {"", "mixed_list"}, + {"", "mixed_list"}, + {}, + ["Invalid variable type in list for variable mixed_list."], + id="Mixed list types returns error", + ), + ], + ) + def test_parse_variables( + self, + variables: dict[str, Any], + available_paths: set[str], + selected_paths: set[str], + expected_variables: dict[str, list[BuildVariable]], + error_messages: list[str], + ) -> None: + build_variables, errors = ModuleSourceParser._parse_variables( + variables, {Path(path) for path in available_paths}, {Path(path) for path in selected_paths} + ) + actual_error_messages = [error.message for error in errors] + assert actual_error_messages == error_messages + actual_variables = {path.as_posix(): var_list for path, var_list in build_variables.items()} + assert actual_variables == expected_variables + + @pytest.mark.parametrize( + "variables, available_modules, selected_modules, expected_variables, error_messages", + [ + pytest.param( + {"var1": "value1"}, + {"modules/moduleA"}, + {"modules/moduleA"}, + { + "modules/moduleA": { + 0: [BuildVariable(id=Path("var1"), value="value1", is_selected=True, iteration=None)], + }, + }, + [], + id="Root variable applied to single module", + ), + pytest.param( + {"modules": {"moduleA": {"var1": "value1"}}}, + {"modules/moduleA"}, + {"modules/moduleA"}, + { + "modules/moduleA": { + 0: [ + BuildVariable( + id=Path("modules/moduleA/var1"), value="value1", is_selected=True, iteration=None + ) + ], + }, + }, + [], + id="Module-specific variable", + ), + pytest.param( + {"var1": "root", "modules": {"moduleA": {"var2": "moduleA_value"}}}, + {"modules/moduleA", "modules/moduleB"}, + {"modules/moduleA"}, + { + "modules/moduleA": { + 0: [ + BuildVariable(id=Path("var1"), value="root", is_selected=True, iteration=None), + BuildVariable( + id=Path("modules/moduleA/var2"), value="moduleA_value", is_selected=True, iteration=None + ), + ], + }, + }, + [], + id="Root and module-specific variables combined", + ), + pytest.param( + {"modules": {"moduleA": [{"var1": "a"}, {"var1": "b"}]}}, + {"modules/moduleA"}, + {"modules/moduleA"}, + { + "modules/moduleA": { + 1: [BuildVariable(id=Path("modules/moduleA/var1"), value="a", is_selected=True, iteration=1)], + 2: [BuildVariable(id=Path("modules/moduleA/var1"), value="b", is_selected=True, iteration=2)], + }, + }, + [], + id="Module with iterations", + ), + ], + ) + def test_parse_module_variables( + self, + variables: dict[str, Any], + available_modules: set[str], + selected_modules: set[str], + expected_variables: dict[str, list[list[BuildVariable]]], + error_messages: list[str], + ) -> None: + module_variables, errors = ModuleSourceParser._parse_module_variables( + variables, {Path(path) for path in available_modules}, {Path(path) for path in selected_modules} + ) + actual_error_messages = [error.message for error in errors] + assert actual_error_messages == error_messages + actual_variables = {path.as_posix(): var_list for path, var_list in module_variables.items()} + assert actual_variables == expected_variables