Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from collections import defaultdict
from collections.abc import Iterable
from pathlib import Path
from typing import Any
from typing import Any, cast

from cognite_toolkit._cdf_tk.commands.build_v2.data_classes import (
AbsoluteDirPath,
InsightList,
ModelSyntaxError,
ModuleSource,
RelativeDirPath,
RelativeFilePath,
)
from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._module import BuildVariable
from cognite_toolkit._cdf_tk.constants import EXCL_FILES
from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA
from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA, ResourceTypes


class ModuleSourceParser:
Expand All @@ -23,62 +24,64 @@ def __init__(self, selected_modules: set[RelativeDirPath | str], organization_di
self.organization_dir = organization_dir
self.errors = InsightList()

def parse(self, yaml_files: list[RelativeDirPath], variables: dict[str, Any]) -> list[ModuleSource]:
files_by_module, orphans = self._find_modules(yaml_files)
errors = self._validate_modules(list(files_by_module.keys()), self.selected_modules, orphans)
def parse(self, yaml_files: list[RelativeFilePath], variables: dict[str, Any]) -> list[ModuleSource]:
source_by_module_id, orphans = self._find_modules(yaml_files, self.organization_dir)
module_ids = list(source_by_module_id.keys())
errors = self._validate_modules(module_ids, self.selected_modules, orphans)
if errors:
self.errors.extend(errors)
return []
selected_modules = self._select_modules(files_by_module, self.selected_modules)
build_variables, errors = self._parse_variables(variables, set(files_by_module.keys()), set(selected_modules))
selected_modules = self._select_modules(module_ids, self.selected_modules)
build_variables, errors = self._parse_variables(variables, set(module_ids), set(selected_modules))
if errors:
self.errors.extend(errors)
return []
module_sources: list[ModuleSource] = []
for module in selected_modules:
source = ModuleSource(
path=self.organization_dir / module,
id=module,
resource_files=[self.organization_dir / resource_file for resource_file in files_by_module[module]],
)
source = source_by_module_id[module]
module_build_variables = build_variables.get(module, [])
if module_build_variables:
for iteration, module_variable in enumerate(module_build_variables, start=1):
module_sources.append(
source.model_copy(
update={
"variables": module_variable,
"iteration": iteration,
}
)
source.model_copy(update={"variables": module_variable, "iteration": iteration})
)
else:
module_sources.append(source)
return module_sources

@classmethod
def _find_modules(
cls, yaml_files: list[RelativeDirPath]
) -> tuple[dict[RelativeDirPath, list[RelativeDirPath]], list[RelativeDirPath]]:
cls, yaml_files: list[RelativeFilePath], organization_dir: Path
) -> tuple[dict[RelativeDirPath, ModuleSource], list[RelativeDirPath]]:
"""Organizes YAML files by their module (top-level folder in the modules directory)."""
files_by_module: dict[RelativeDirPath, list[RelativeDirPath]] = defaultdict(list)
source_by_module_id: dict[RelativeDirPath, ModuleSource] = {}
orphan_files: list[RelativeDirPath] = []
for yaml_file in yaml_files:
if yaml_file.name in EXCL_FILES:
continue
module_path = cls._get_module_path_from_resource_file_path(yaml_file)
if module_path:
files_by_module[module_path].append(yaml_file)
relative_module_path, resource_folder = cls._get_module_path_from_resource_file_path(yaml_file)
if relative_module_path and resource_folder:
if relative_module_path not in source_by_module_id:
source_by_module_id[relative_module_path] = ModuleSource(
path=organization_dir / relative_module_path,
id=relative_module_path,
)
source = source_by_module_id[relative_module_path]
if resource_folder not in source.resource_files_by_folder:
source.resource_files_by_folder[resource_folder] = []
source.resource_files_by_folder[resource_folder].append(organization_dir / yaml_file)
else:
orphan_files.append(yaml_file)
return dict(files_by_module), orphan_files
return source_by_module_id, orphan_files

@staticmethod
def _get_module_path_from_resource_file_path(resource_file: Path) -> Path | None:
def _get_module_path_from_resource_file_path(resource_file: Path) -> tuple[Path | None, ResourceTypes | None]:
for parent in resource_file.parents:
if parent.name in CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA:
return parent.parent
return None
# We know that all keys in CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA are valid ResourceTypes,
# so this cast is safe.
return parent.parent, cast(ResourceTypes, parent.name)
return None, None

@classmethod
def _validate_modules(
Expand All @@ -96,11 +99,11 @@ def _validate_modules(

@classmethod
def _select_modules(
cls, files_by_module: dict[RelativeDirPath, list[RelativeDirPath]], selection: set[RelativeDirPath | str]
cls, module_paths: Iterable[RelativeDirPath], selection: set[RelativeDirPath | str]
) -> list[RelativeDirPath]:
return [
module_path
for module_path in files_by_module.keys()
for module_path in module_paths
if module_path in selection
or module_path.name in selection
or any(parent in selection for parent in module_path.parents)
Expand Down
190 changes: 161 additions & 29 deletions cognite_toolkit/_cdf_tk/commands/build_v2/build_v2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import sys
from collections import defaultdict
from collections.abc import Iterable
from pathlib import Path

Expand All @@ -26,13 +25,18 @@
ValidationType,
)
from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._insights import ModelSyntaxError, Recommendation
from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._module import (
BuildVariable,
FailedReadResource,
ReadResource,
SuccessfulReadResource,
)
from cognite_toolkit._cdf_tk.constants import HINT_LEAD_TEXT, MODULES
from cognite_toolkit._cdf_tk.cruds import RESOURCE_CRUD_BY_FOLDER_NAME
from cognite_toolkit._cdf_tk.cruds import RESOURCE_CRUD_BY_FOLDER_NAME, ResourceCRUD
from cognite_toolkit._cdf_tk.cruds._resource_cruds.datamodel import DataModelCRUD
from cognite_toolkit._cdf_tk.exceptions import ToolkitFileNotFoundError, ToolkitNotADirectoryError, ToolkitValueError
from cognite_toolkit._cdf_tk.resource_classes import ToolkitResource
from cognite_toolkit._cdf_tk.utils import read_yaml_file, safe_write
from cognite_toolkit._cdf_tk.utils.file import relative_to_if_possible, yaml_safe_dump
from cognite_toolkit._cdf_tk.utils import calculate_hash, humanize_collection, safe_write
from cognite_toolkit._cdf_tk.utils.file import read_yaml_content, relative_to_if_possible, safe_read, yaml_safe_dump
from cognite_toolkit._cdf_tk.validation import humanize_validation_error


Expand Down Expand Up @@ -140,7 +144,7 @@ def _parse_module_sources(self, build: BuildFiles) -> list[ModuleSource]:
)
module_sources = parser.parse(build.yaml_files, build.variables)
if parser.errors:
# Todo: Nicer way of formatting errors.
# Todo: Nicer way of formatting errors. Jira CDF-27107
raise ToolkitValueError(
"Errors encountered while parsing modules:\n" + "\n".join(f"- {error!s}" for error in parser.errors)
)
Expand Down Expand Up @@ -229,7 +233,6 @@ def _build_modules(
self, module_sources: Iterable[ModuleSource], build_dir: Path, max_workers: int = 1
) -> BuildFolder:
folder: BuildFolder = BuildFolder(path=build_dir)

for source in module_sources:
# Inside this loop, do not raise exceptions.
module = self._import_module(source) # Syntax validation
Expand All @@ -245,31 +248,160 @@ def _build_modules(

return folder

def _import_module(self, module_source: ModuleSource) -> Module:
insights: InsightList = InsightList()
resource_folder_paths = [
resource_path for resource_path in module_source.path.iterdir() if resource_path.is_dir()
]

resource_by_type: dict[ResourceType, list[ToolkitResource]] = defaultdict(list)
for resource_folder_path in resource_folder_paths:
crud_classes = RESOURCE_CRUD_BY_FOLDER_NAME.get(resource_folder_path.name)
def _import_module(self, source: ModuleSource) -> Module:
resources: list[ReadResource] = []
for resource_folder, resource_files in source.resource_files_by_folder.items():
crud_classes = RESOURCE_CRUD_BY_FOLDER_NAME.get(resource_folder)
if not crud_classes:
# This is handled in the module parsing phase.
continue
for crud_class in crud_classes:
resource_type = ResourceType(resource_folder=resource_folder_path.name, kind=crud_class.kind)
resource_files = list(resource_folder_path.rglob(f"*.{crud_class.kind}.y*ml"))
for resource_file in resource_files:
# Todo: Create a classmethod for ToolkitResource
# Todo; Handle lists of resources in a single file
try:
resource = crud_class.yaml_cls.model_validate(read_yaml_file(resource_file))
resource_by_type[resource_type].append(resource)
except ValidationError as e:
insights.extend(self._create_syntax_errors(resource_type, e))

return Module(source=module_source, resources_by_type=resource_by_type, insights=insights)
class_by_kind = {crud_class.kind: crud_class for crud_class in crud_classes}
for resource_file in resource_files:
if "." not in resource_file.stem:
# Todo: Discussion error or silent ignore.
# Reason for error is in the case were the user do not set a kind and intends to.
# Reason for silently ignore is that the user for example has a YAML file as part of their
# function code, and it is not meant to be a resource file.
continue
kind = resource_file.stem.rsplit(".", maxsplit=1)[-1]
crud_class = class_by_kind.get(kind)
if not crud_class:
resources.append(
self._create_failed_read_resource_for_invalid_kind(
resource_file, kind, resource_folder, class_by_kind.keys()
)
)
continue
content_or_error = self._read_resource_file(resource_file)
if isinstance(content_or_error, ModelSyntaxError):
resources.append(FailedReadResource(source_path=resource_file, errors=[content_or_error]))
continue
read_resources = self._parse_resource_file(content_or_error, crud_class.yaml_file, source.variables)
resources.extend(read_resources)

return Module(source=source, resources=resources)

def _create_failed_read_resource_for_invalid_kind(
self, resource_file: Path, kind: str, resource_folder: str, available_kinds: Iterable[str]
) -> FailedReadResource:
return FailedReadResource(
source_path=resource_file,
errors=[
ModelSyntaxError(
code="UNKNOWN-RESOURCE-KIND",
message=f"Resource file '{resource_file.as_posix()!r}' has unknown resource kind '{kind}' for folder '{resource_folder}'",
fix=f"Make sure the file name ends with a known resource kind for the folder. Expected kinds for folder '{resource_folder}' are: {humanize_collection(available_kinds)}",
)
],
)

def _read_resource_file(self, resource_file: Path) -> str | ModelSyntaxError:
try:
return safe_read(resource_file)
except Exception as e:
return ModelSyntaxError(
code="RESOURCE_FILE_READ_ERROR",
message=f"Failed to read resource file '{resource_file.as_posix()!r}': {e!s}",
fix="Make sure the file is a valid YAML file and is accessible.",
)

def _parse_resource_file(
self, file_content: str, io: type[ResourceCRUD], variables: list[BuildVariable], file_path: Path
) -> list[ReadResource]:
"""Parses a resource file into one or more Toolkit resources.

1. Reads the YAML file and substitutes variables if needed.
2. Variable substitution.
3. Parse the YAML content
4. Validate the content against the syntax of the YAML model.


Args:
file_content: The content of the resource YAML file.
crud_yaml_model: The Pydantic model class that the YAML file should conform to.
variables: The variables to substitute in the YAML content.

Returns:
A tuple containing the list of parsed ToolkitResource objects and a list of ModelSyntaxError
objects if there were any syntax errors during parsing.
"""
file_hash = calculate_hash(file_content, shorten=True)
results: list[ReadResource] = []
if variables:
substituted_content = self._substitute_variables_in_content(file_content, variables)
else:
substituted_content = file_content

try:
parsed = read_yaml_content(substituted_content)
except Exception as e:
# Todo Look for variables not replaced in the content and add fix suggestion to the error.
results.append(
FailedReadResource(
source_path=file_path,
errors=[
ModelSyntaxError(
code="YAML_PARSE_ERROR",
message=f"Failed to parse YAML content: {e!s}",
fix="Make sure the YAML content is valid.",
)
],
)
)
return results

listed_resources = parsed if isinstance(parsed, list) else [parsed]
for resource_dict in listed_resources:
try:
resource = io.yaml_cls.model_validate(resource_dict, extra="forbid")
except ValidationError as forbid_errors:
try:
# Fallback to handle unknown fields.
resource = io.yaml_cls.model_validate(resource_dict, extra="ignore")
except ValidationError:
# It is still failing, so we have syntax errors that we want to report.
results.append(
FailedReadResource(
source_path=file_path,
errors=[
ModelSyntaxError(
code="SYNTAX_ERROR",
message=message,
)
for message in humanize_validation_error(forbid_errors)
],
)
)
else:
# Fallback succeeded, so we have insights to report about the unknown fields.
results.append(
SuccessfulReadResource(
source_path=file_path,
source_hash=file_hash,
resource_type=ResourceType(resource_folder=io.folder_name, kind=io.kind),
resource=resource,
insights=[
Recommendation(
code="UNKNOWN_FIELDS",
message=message,
)
for message in humanize_validation_error(forbid_errors)
],
)
)
else:
results.append(
SuccessfulReadResource(
source_path=file_path,
source_hash=file_hash,
resource_type=ResourceType(resource_folder=io.folder_name, kind=io.kind),
resource=resource,
)
)
return results

def _substitute_variables_in_content(self, content: str, variables: list[BuildVariable]) -> str:
raise NotImplementedError()

def _export_module(self, module: Module, build_dir: Path) -> list[Path]:
build_dir.mkdir(parents=True, exist_ok=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ._build import BuildFiles, BuildFolder, BuildParameters, BuiltModule
from ._config import ConfigYAML
from ._insights import ConsistencyError, Insight, InsightList, ModelSyntaxError, Recommendation
from ._module import Module, ModuleSource, ResourceType
from ._module import FailedReadResource, Module, ModuleSource, ReadResource, ResourceType, SuccessfulReadResource
from ._types import AbsoluteDirPath, RelativeDirPath, RelativeFilePath, ValidationType

__all__ = [
Expand All @@ -12,14 +12,17 @@
"BuiltModule",
"ConfigYAML",
"ConsistencyError",
"FailedReadResource",
"Insight",
"InsightList",
"ModelSyntaxError",
"Module",
"ModuleSource",
"ReadResource",
"Recommendation",
"RelativeDirPath",
"RelativeFilePath",
"ResourceType",
"SuccessfulReadResource",
"ValidationType",
]
Loading
Loading