From aed52590f0e426224766b700b8d9448e82df517e Mon Sep 17 00:00:00 2001 From: Thomas Mustier Date: Thu, 12 Feb 2026 16:26:43 +0000 Subject: [PATCH 1/2] refactor(inspect): route text-frame xml access through helper --- clean_slides/inspect_pptx.py | 46 +++++++++++++++++++++--------------- clean_slides/pptx_access.py | 5 ++++ 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/clean_slides/inspect_pptx.py b/clean_slides/inspect_pptx.py index 6baaa0e..092479a 100644 --- a/clean_slides/inspect_pptx.py +++ b/clean_slides/inspect_pptx.py @@ -23,7 +23,7 @@ from pptx.text.text import TextFrame from typing_extensions import TypeGuard -from .pptx_access import chart_xml_space, paragraph_xml_element +from .pptx_access import chart_xml_space, paragraph_xml_element, text_frame_xml_element # ── Helpers ──────────────────────────────────────────────────────────── @@ -232,7 +232,7 @@ def _paragraph_xml(paragraph: _ParagraphLike) -> XmlElement: paragraph_el = paragraph_xml_element(paragraph) if paragraph_el is None: raise ValueError("Paragraph object missing _element") - return paragraph_el + return cast(XmlElement, paragraph_el) # ── Data classes ─────────────────────────────────────────────────────── @@ -304,21 +304,23 @@ class RunInfo: subscript: bool | None = None def to_dict(self): - d = {"text": self.text} - # Output in consistent order matching edit command - for k in ( - "font", - "size", - "bold", - "italic", - "underline", - "color", - "superscript", - "subscript", - ): - v = getattr(self, k) - if v is not None: - d[k] = v + d: dict[str, object] = {"text": self.text} + if self.font is not None: + d["font"] = self.font + if self.size is not None: + d["size"] = self.size + if self.bold is not None: + d["bold"] = self.bold + if self.italic is not None: + d["italic"] = self.italic + if self.underline is not None: + d["underline"] = self.underline + if self.color is not None: + d["color"] = self.color + if self.superscript is not None: + d["superscript"] = self.superscript + if self.subscript is not None: + d["subscript"] = self.subscript return d def __str__(self): @@ -1046,7 +1048,11 @@ def inspect_layout(layout: SlideLayout) -> list[PlaceholderInfo]: default_alignment = align_map.get(int(p.alignment), str(p.alignment)) # Check defRPr on the paragraph - pPr = p._element.find(qn("a:pPr")) + paragraph_el_obj = paragraph_xml_element(p) + paragraph_el = ( + cast(XmlElement, paragraph_el_obj) if paragraph_el_obj is not None else None + ) + pPr = paragraph_el.find(qn("a:pPr")) if paragraph_el is not None else None if pPr is not None: defRPr = pPr.find(qn("a:defRPr")) if defRPr is not None: @@ -1168,7 +1174,9 @@ def identify_color(prs: Presentation, rgb_hex: str) -> str | None: def _parse_text_frame(tf: TextFrame) -> TextFrameInfo: """Parse a text frame into TextFrameInfo.""" - bodyPr = tf._element.find(qn("a:bodyPr")) + tf_el_obj = text_frame_xml_element(tf) + tf_el = cast(XmlElement, tf_el_obj) if tf_el_obj is not None else None + bodyPr = tf_el.find(qn("a:bodyPr")) if tf_el is not None else None anchor = bodyPr.get("anchor") if bodyPr is not None else None margins = None diff --git a/clean_slides/pptx_access.py b/clean_slides/pptx_access.py index 772f63a..4bf96a5 100644 --- a/clean_slides/pptx_access.py +++ b/clean_slides/pptx_access.py @@ -153,6 +153,11 @@ def paragraph_xml_element(paragraph: object) -> object | None: return getattr(paragraph, "_element", None) +def text_frame_xml_element(text_frame: object) -> object | None: + """Return underlying OOXML text-frame element when available.""" + return getattr(text_frame, "_element", None) + + def chart_xml_space(chart: object) -> object | None: """Return underlying OOXML chart-space element when available.""" return getattr(chart, "_chartSpace", None) From 0142ed024bd8332e95d992389c15136891b22606 Mon Sep 17 00:00:00 2001 From: Thomas Mustier Date: Thu, 12 Feb 2026 16:26:50 +0000 Subject: [PATCH 2/2] test(pptx): cover text-frame xml element helper --- tests/test_pptx_access.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_pptx_access.py b/tests/test_pptx_access.py index a6a32fc..fb41b37 100644 --- a/tests/test_pptx_access.py +++ b/tests/test_pptx_access.py @@ -38,6 +38,7 @@ slide_size_emu, text_frame_paragraphs, text_frame_text, + text_frame_xml_element, ) @@ -68,6 +69,7 @@ class _Chart: class _TextFrame: text: str paragraphs: list[object] + _element: object | None = None @dataclass @@ -224,7 +226,7 @@ def test_presentation_chart_types_collects_chart_values() -> None: def test_shape_and_text_frame_helpers_cover_text_placeholder_connector() -> None: - text_frame = _TextFrame(text=" Hello ", paragraphs=[object()]) + text_frame = _TextFrame(text=" Hello ", paragraphs=[object()], _element={"tag": "txBody"}) shape = _Shape( has_chart=True, chart=_Chart(chart_type=57, series=[_Series(name="Revenue")], _chartSpace={"tag": "cs"}), @@ -248,6 +250,7 @@ def test_shape_and_text_frame_helpers_cover_text_placeholder_connector() -> None assert frame is not None assert text_frame_text(frame) == " Hello " assert shape_text_frame_text(shape) == " Hello " + assert text_frame_xml_element(frame) == {"tag": "txBody"} assert len(text_frame_paragraphs(frame)) == 1 assert shape_text(shape) == "inline"