diff --git a/CHANGELOG.md b/CHANGELOG.md index b5dce7f..c24d3fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog -## 1.1.0 (unreleased) +## 1.2.0 (unreleased) + +- New method `node.parent_iterator()`. +- New option `Tree(..., check_dag=True)` to enforce DAG compliance. + +## 1.1.0 (2025-02-09) - DEPRECATE: `TypedTree.iter_by_type()`. Use `iterator(.., kind)`instead. - New methods `TypedTree.iterator(..., kind=ANY_KIND)`, diff --git a/nutree/__init__.py b/nutree/__init__.py index eed4834..17468fa 100644 --- a/nutree/__init__.py +++ b/nutree/__init__.py @@ -20,11 +20,14 @@ from nutree.common import ( AmbiguousMatchError, + CycleDetectedError, DictWrapper, + DuplicateNodeIdError, IterMethod, SelectBranch, SkipBranch, StopTraversal, + StructureError, TreeError, UniqueConstraintError, ) @@ -35,17 +38,20 @@ from nutree.typed_tree import TypedNode, TypedTree __all__ = [ # type: ignore - Tree, - Node, AmbiguousMatchError, + CycleDetectedError, + DictWrapper, diff_node_formatter, DiffClassification, - DictWrapper, + DuplicateNodeIdError, IterMethod, load_tree_from_fs, + Node, SelectBranch, SkipBranch, StopTraversal, + StructureError, + Tree, TreeError, TypedNode, TypedTree, diff --git a/nutree/common.py b/nutree/common.py index 42ac785..69e5aac 100644 --- a/nutree/common.py +++ b/nutree/common.py @@ -52,8 +52,40 @@ class TreeError(RuntimeError): """Base class for all `nutree` errors.""" -class UniqueConstraintError(TreeError): - """Thrown when trying to add the same node_id to the same parent""" +class StructureError(TreeError): + """Base class for errors thrown when the tree structure is invalid.""" + + def __init__(self, message: str): + super().__init__(message) + + +class DuplicateNodeIdError(StructureError): + """Thrown when trying to add the same node_id to the tree.""" + + +class UniqueConstraintError(StructureError): + """Thrown when trying to add the same data_id to the same parent. + + This would violate the constraint of the tree being a 'SIMPLE directed + acyclic graph'. + Note that the tree allows to add the same data_id to different parent nodes. + In TypedTrees, the data_id may be added to the same parent twice, as long as + it has a different kind. + + Pass `check_dag=False` to the tree constructor to suppress this restriction. + """ + + +class CycleDetectedError(StructureError): + """Thrown when trying to add the same data_id to the same ancestor chain. + + This would violate the constraint of the tree being a 'simple directed + ACYCLIC graph' and create a cycle. + In TypedTrees, the data_id may be added to the same ancestor chain more than + once, as long as it has a different kind. + + Pass `check_dag=False` to the tree constructor to suppress this restriction. + """ class AmbiguousMatchError(TreeError): diff --git a/nutree/node.py b/nutree/node.py index 07fbfd5..4792eda 100644 --- a/nutree/node.py +++ b/nutree/node.py @@ -554,16 +554,36 @@ def get_common_ancestor(self, other: Self) -> Self | None: return None def get_parent_list(self, *, add_self=False, bottom_up=False) -> list[Self]: - """Return ordered list of all parent nodes.""" + """Return an ordered list of all parent nodes (top-down by default).""" res = [] parent = self if add_self else self._parent while parent is not None and parent._parent is not None: res.append(parent) parent = parent._parent if not bottom_up: + # Note: it is more efficient to reverse the list than to append + # in reverse order! res.reverse() return res + def parent_iterator(self, *, add_self=False, bottom_up=True) -> Iterator[Self]: + """Generator that walks the parent chain bottom-up. + + Note: top-down requires to convert to a list and revert anyway, + so it can also be implemented as + ```py + for p in self.get_parent_list(add_self=add_self, bottom_up=False): + ``` + """ + if not bottom_up: + yield from self.get_parent_list(add_self=add_self, bottom_up=False) + return + # Bottom-up iteration: + parent = self if add_self else self._parent + while parent is not None and parent._parent is not None: + yield parent + parent = parent._parent + def get_path( self, *, add_self: bool = True, separator: str = "/", repr: str = "{node.name}" ) -> str: diff --git a/nutree/tree.py b/nutree/tree.py index 275e12e..31e206f 100644 --- a/nutree/tree.py +++ b/nutree/tree.py @@ -35,8 +35,10 @@ ROOT_NODE_ID, AmbiguousMatchError, CalcIdCallbackType, + CycleDetectedError, DataIdType, DeserializeMapperType, + DuplicateNodeIdError, FlatJsonDictType, IterMethod, KeyMapType, @@ -107,6 +109,10 @@ class Tree(Generic[TData, TNode]): Set `forward_attrs` to true, to enable aliasing of node attributes, i.e. make `node.data.NAME` accessible as `node.NAME`. |br| **Note:** Use with care, see also :ref:`forward-attributes`. + + `check_dag` enables validations to ensure that the node structure is + compliant with Directed Acyclic Graphs (DAG). This means that no nodes + with the same data_id cannot be added as descendants of each other. """ node_factory: type[TNode] = cast(type[TNode], Node) @@ -129,6 +135,7 @@ def __init__( *, calc_data_id: CalcIdCallbackType | None = None, forward_attrs: bool = False, + check_dag: bool = True, ): self._lock = threading.RLock() #: Tree name used for logging @@ -139,8 +146,10 @@ def __init__( # Optional callback that calculates data_ids from data objects # hash(data) is used by default self._calc_data_id_hook: CalcIdCallbackType | None = calc_data_id - # Enable aliasing when accessing node instances. + #: Enable aliasing when accessing node instances. self._forward_attrs: bool = forward_attrs + #: Enable cycle detection in add_child() + self.check_dag = check_dag def __repr__(self): return f"{self.__class__.__name__}<{self.name!r}>" @@ -222,27 +231,45 @@ def calc_data_id(self, data: Any) -> DataIdType: return self._calc_data_id_hook(self, data) # type: ignore return hash(data) + def _check_insert(self, node: TNode): + """Raise error if inserting a node would violate DAG restrictions.""" + # We can assume that node.parent is set and that node already has at + # least one clone registered in self._nodes_by_data_id, when this is + # called from _register() + data_id = node._data_id + if node._parent._children: + for sibling in node._parent._children: + if sibling._data_id == data_id: + raise UniqueConstraintError( + f"Node with data_id {data_id} already exists under same parent" + "Pass `check_dag=False` to the tree constructor to suppress " + "this restriction." + ) + for n in self._nodes_by_data_id[data_id]: + if node.is_descendant_of(n): + raise CycleDetectedError( + f"Inserting {node} would create a cycle with {n}" + "Pass `check_dag=False` to the tree constructor to suppress " + "this restriction." + ) + def _register(self, node: TNode) -> None: assert node._tree is self - # node._tree = self - assert node._node_id and node._node_id not in self._node_by_id, f"{node}" - self._node_by_id[node._node_id] = node + assert node._node_id is not None + if node._node_id in self._node_by_id: + raise DuplicateNodeIdError( + f"Node ID already registered: {node}" + ) # pragma: no cover + try: clone_list = self._nodes_by_data_id[node._data_id] # may raise KeyError - for clone in clone_list: - if clone.parent is node.parent: - is_same_kind = getattr(clone, "kind", None) == getattr( - node, "kind", None - ) - if is_same_kind: - del self._node_by_id[node._node_id] - raise UniqueConstraintError( - f"Node.data already exists in parent: {clone=}, " - f"{clone.parent=}" - ) + # if we get here, we are adding a clone and should check DAG compliance + if self.check_dag and node._parent: + self._check_insert(node) clone_list.append(node) except KeyError: self._nodes_by_data_id[node._data_id] = [node] + self._node_by_id[node._node_id] = node def _unregister(self, node: TNode, *, clear: bool = True) -> None: """Unlink node from this tree (children must be unregistered first).""" diff --git a/nutree/typed_tree.py b/nutree/typed_tree.py index a4b37d2..d9d9bfa 100644 --- a/nutree/typed_tree.py +++ b/nutree/typed_tree.py @@ -25,6 +25,7 @@ from nutree.common import ( ROOT_DATA_ID, ROOT_NODE_ID, + CycleDetectedError, DataIdType, DeserializeMapperType, IterMethod, @@ -83,11 +84,12 @@ def __init__( node_id: int | None = None, meta: dict | None = None, ): - self._kind: str = kind # tree._register() checks for this attribute + # tree._register() checks for this attribute in __init__(): + self._kind: str = kind super().__init__( data, parent=parent, data_id=data_id, node_id=node_id, meta=meta ) - assert isinstance(kind, str) and kind != ANY_KIND, f"Unsupported `kind`: {kind}" + assert isinstance(kind, str), f"Unsupported `kind`: {kind}" # del self._children # self._child_map: Dict[Node] = None @@ -604,6 +606,27 @@ def deserialize_mapper(cls, parent: Node, data: dict) -> str | object | None: f"Override this method or pass a mapper callback to evaluate {data}." ) + def _check_insert(self, node: Node): + """Raise error if inserting a node would violate DAG restrictions.""" + # We can assume that node.parent is set and that node already has at + # least one clone registered in self._nodes_by_data_id, when this is + # called from _register() + assert node._kind, node + ref_key = node._data_id + kind = node._kind + if node._parent._children: + for sibling in node._parent._children: + if sibling._data_id == ref_key and sibling._kind == kind: + raise UniqueConstraintError( + f"Node with data_id {ref_key} and kind {kind} " + f"already exists in parent {node._parent}" + ) + for n in self._nodes_by_data_id[ref_key]: + if node.is_descendant_of(n) and n._kind == kind: + raise CycleDetectedError( + f"Inserting {node} would create a cycle with {n}" + ) + def add_child( self, child: TypedNode[TData] | Self | TData, diff --git a/tests/test_clones.py b/tests/test_clones.py index e23a062..1acf7dc 100644 --- a/tests/test_clones.py +++ b/tests/test_clones.py @@ -5,7 +5,11 @@ import pytest from nutree import AmbiguousMatchError, Node, Tree -from nutree.common import DictWrapper, UniqueConstraintError +from nutree.common import ( + CycleDetectedError, + DictWrapper, + UniqueConstraintError, +) from . import fixture @@ -39,11 +43,13 @@ def test_clones(self): with pytest.raises(AmbiguousMatchError): tree["a1"] - # # Not allowed to add two clones to same parent + # Not allowed to add two clones to same parent with pytest.raises(UniqueConstraintError): tree.add("A") with pytest.raises(UniqueConstraintError): tree.add(tree["A"]) + with pytest.raises(CycleDetectedError): + tree["a2"].add(tree["A"]) res = tree.find("a1") assert res @@ -93,12 +99,19 @@ def test_clones_typed(self): assert tree.count_unique == 8 fail1 = tree["fail1"] - # Not allowed to add two clones to same parent + # Not allowed to add two clones with the same kind to same parent with pytest.raises(UniqueConstraintError): fail1.add("cause1", kind="cause") fail1.add("cause1", kind="other") + + # Not allowed to add two clones with the same kind to ancestor chain + eff2 = tree["eff2"] + with pytest.raises(CycleDetectedError): + eff2.add("func1", kind="function") + eff2.add("func1", kind="other") + tree.print() - assert tree.count == 9 + assert tree.count == 10 assert tree.count_unique == 8 def test_dict(self): diff --git a/tests/test_core.py b/tests/test_core.py index f77682d..6e91abe 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -565,6 +565,37 @@ def test_iter(self): s = [n.data for n in tree.iterator(IterMethod.RANDOM_ORDER)] assert len(s) == 8 + def test_iter_parents(self): + """ + Tree<'fixture'> + ├── A + │ ├── a1 + │ │ ├── a11 + │ │ ╰── a12 + │ ╰── a2 + ╰── B + ╰── b1 + ╰── b11 + """ + tree = fixture.create_tree_simple() + + # print(tree.format(repr="{node.data}")) + a11 = tree["a12"] + + s = ",".join(n.data for n in a11.parent_iterator()) + assert s == "a1,A" + + s = ",".join(n.data for n in a11.parent_iterator(add_self=True)) + assert s == "a12,a1,A" + + s = ",".join(n.data for n in a11.parent_iterator(bottom_up=False)) + assert s == "A,a1" + + s = ",".join( + n.data for n in a11.parent_iterator(bottom_up=False, add_self=True) + ) + assert s == "A,a1,a12" + def test_visit(self): """ Tree<'fixture'> diff --git a/tests/test_mermaid.py b/tests/test_mermaid.py index fb93500..1db85c2 100644 --- a/tests/test_mermaid.py +++ b/tests/test_mermaid.py @@ -3,6 +3,7 @@ """ """ # ruff: noqa: T201, T203 `print` found +import io import shutil from pathlib import Path @@ -76,6 +77,16 @@ def test_serialize_mermaid_mappers(self): assert '7-. "1" .->8' in buffer assert "classDef default fill" in buffer + def test_serialize_mermaid_errors(self): + """Save/load as object tree with clones.""" + tree = fixture.create_typed_tree_simple(clones=True, name="Root") + + with pytest.raises(ValueError, match="target must be a Path, str, or"): + tree.to_mermaid_flowchart(15) # type: ignore + + with pytest.raises(RuntimeError, match="Need a filepath to"): + tree.to_mermaid_flowchart(io.StringIO("x"), format="png") + def test_serialize_mermaid_typed(self): """Save/load as object tree with clones.""" KEEP_FILES = not fixture.is_running_on_ci() and False