Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions clean_slides/inspect_pptx.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from pptx.text.text import TextFrame
from typing_extensions import TypeGuard

from .pptx_access import chart_xml_space, paragraph_xml_element
from .pptx_access import chart_xml_space, paragraph_xml_element, text_frame_xml_element

# ── Helpers ────────────────────────────────────────────────────────────

Expand Down Expand Up @@ -232,7 +232,7 @@ def _paragraph_xml(paragraph: _ParagraphLike) -> XmlElement:
paragraph_el = paragraph_xml_element(paragraph)
if paragraph_el is None:
raise ValueError("Paragraph object missing _element")
return paragraph_el
return cast(XmlElement, paragraph_el)


# ── Data classes ───────────────────────────────────────────────────────
Expand Down Expand Up @@ -304,21 +304,23 @@ class RunInfo:
subscript: bool | None = None

def to_dict(self):
d = {"text": self.text}
# Output in consistent order matching edit command
for k in (
"font",
"size",
"bold",
"italic",
"underline",
"color",
"superscript",
"subscript",
):
v = getattr(self, k)
if v is not None:
d[k] = v
d: dict[str, object] = {"text": self.text}
if self.font is not None:
d["font"] = self.font
if self.size is not None:
d["size"] = self.size
if self.bold is not None:
d["bold"] = self.bold
if self.italic is not None:
d["italic"] = self.italic
if self.underline is not None:
d["underline"] = self.underline
if self.color is not None:
d["color"] = self.color
if self.superscript is not None:
d["superscript"] = self.superscript
if self.subscript is not None:
d["subscript"] = self.subscript
return d

def __str__(self):
Expand Down Expand Up @@ -1046,7 +1048,11 @@ def inspect_layout(layout: SlideLayout) -> list[PlaceholderInfo]:
default_alignment = align_map.get(int(p.alignment), str(p.alignment))

# Check defRPr on the paragraph
pPr = p._element.find(qn("a:pPr"))
paragraph_el_obj = paragraph_xml_element(p)
paragraph_el = (
cast(XmlElement, paragraph_el_obj) if paragraph_el_obj is not None else None
)
pPr = paragraph_el.find(qn("a:pPr")) if paragraph_el is not None else None
if pPr is not None:
defRPr = pPr.find(qn("a:defRPr"))
if defRPr is not None:
Expand Down Expand Up @@ -1168,7 +1174,9 @@ def identify_color(prs: Presentation, rgb_hex: str) -> str | None:

def _parse_text_frame(tf: TextFrame) -> TextFrameInfo:
"""Parse a text frame into TextFrameInfo."""
bodyPr = tf._element.find(qn("a:bodyPr"))
tf_el_obj = text_frame_xml_element(tf)
tf_el = cast(XmlElement, tf_el_obj) if tf_el_obj is not None else None
bodyPr = tf_el.find(qn("a:bodyPr")) if tf_el is not None else None
anchor = bodyPr.get("anchor") if bodyPr is not None else None

margins = None
Expand Down
5 changes: 5 additions & 0 deletions clean_slides/pptx_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ def paragraph_xml_element(paragraph: object) -> object | None:
return getattr(paragraph, "_element", None)


def text_frame_xml_element(text_frame: object) -> object | None:
"""Return underlying OOXML text-frame element when available."""
return getattr(text_frame, "_element", None)


def chart_xml_space(chart: object) -> object | None:
"""Return underlying OOXML chart-space element when available."""
return getattr(chart, "_chartSpace", None)
Expand Down
5 changes: 4 additions & 1 deletion tests/test_pptx_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
slide_size_emu,
text_frame_paragraphs,
text_frame_text,
text_frame_xml_element,
)


Expand Down Expand Up @@ -68,6 +69,7 @@ class _Chart:
class _TextFrame:
text: str
paragraphs: list[object]
_element: object | None = None


@dataclass
Expand Down Expand Up @@ -224,7 +226,7 @@ def test_presentation_chart_types_collects_chart_values() -> None:


def test_shape_and_text_frame_helpers_cover_text_placeholder_connector() -> None:
text_frame = _TextFrame(text=" Hello ", paragraphs=[object()])
text_frame = _TextFrame(text=" Hello ", paragraphs=[object()], _element={"tag": "txBody"})
shape = _Shape(
has_chart=True,
chart=_Chart(chart_type=57, series=[_Series(name="Revenue")], _chartSpace={"tag": "cs"}),
Expand All @@ -248,6 +250,7 @@ def test_shape_and_text_frame_helpers_cover_text_placeholder_connector() -> None
assert frame is not None
assert text_frame_text(frame) == " Hello "
assert shape_text_frame_text(shape) == " Hello "
assert text_frame_xml_element(frame) == {"tag": "txBody"}
assert len(text_frame_paragraphs(frame)) == 1

assert shape_text(shape) == "inline"
Expand Down