Skip to content

Commit 5d9cb72

Browse files
committed
feat: enhance indexing by adding root parameter to _DocIndex.index method
Signed-off-by: Olivier Antonelli <[email protected]>
1 parent 8feb09f commit 5d9cb72

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

docling_core/types/doc/document.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6221,19 +6221,31 @@ def get_item_list(self, key: str) -> list[NodeItem]:
62216221
return getattr(self, key)
62226222

62236223
def index(
6224-
self, doc: "DoclingDocument", page_nrs: Optional[set[int]] = None
6224+
self,
6225+
doc: "DoclingDocument",
6226+
page_nrs: Optional[set[int]] = None,
6227+
root: Optional[NodeItem] = None,
62256228
) -> None:
62266229

62276230
orig_ref_to_new_ref: dict[str, str] = {}
6228-
page_delta = self._max_page - min(doc.pages.keys()) + 1 if doc.pages else 0
6231+
6232+
if root:
6233+
if root.parent:
6234+
orig_ref_to_new_ref[root.parent.cref] = "#/body"
6235+
self._names.append(doc.name + root.self_ref)
6236+
page_delta = 0
6237+
else:
6238+
self._names.append(doc.name)
6239+
page_delta = (
6240+
self._max_page - min(doc.pages.keys()) + 1 if doc.pages else 0
6241+
)
62296242

62306243
if self._body is None:
62316244
self._body = GroupItem(**doc.body.model_dump(exclude={"children"}))
62326245

6233-
self._names.append(doc.name)
6234-
62356246
# collect items in traversal order
62366247
for item, _ in doc._iterate_items_with_stack(
6248+
root=root,
62376249
with_groups=True,
62386250
traverse_pictures=True,
62396251
included_content_layers={c for c in ContentLayer},

0 commit comments

Comments
 (0)