Skip to content

Commit 887b120

Browse files
committed
Unify path separators in OPF files to slashes.
1 parent 5d0ad16 commit 887b120

File tree

2 files changed

+155
-2
lines changed

2 files changed

+155
-2
lines changed

sphinx/builders/_epub_base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,9 +613,11 @@ def build_content(self) -> None:
613613
continue
614614
if refnode['refuri'] in self.ignored_files:
615615
continue
616-
spine = Spine(html.escape(self.make_id(refnode['refuri'])), True)
616+
spine = Spine(
617+
html.escape(self.make_id(refnode['refuri'].replace(os.sep, '/'))), True
618+
)
617619
metadata['spines'].append(spine)
618-
spinefiles.add(refnode['refuri'])
620+
spinefiles.add(refnode['refuri'].replace(os.sep, '/'))
619621
for info in self.domain_indices:
620622
spine = Spine(html.escape(self.make_id(info[0] + self.out_suffix)), True)
621623
metadata['spines'].append(spine)

tests/test_builders/test_build_epub.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,154 @@ def test_copy_images(app: SphinxTestApp) -> None:
542542
'svgimg.svg',
543543
'testimäge.png',
544544
}
545+
546+
547+
@pytest.mark.sphinx('epub', testroot='builder-dirhtml')
548+
def test_epub_manifest_path_separator_normalization(app: SphinxTestApp) -> None:
549+
"""Test that path separators are normalized to forward slashes
550+
in EPUB manifests, even on Windows.
551+
"""
552+
app.build()
553+
554+
# Read the content.opf file
555+
opf_path = app.outdir / 'content.opf'
556+
assert opf_path.exists(), 'content.opf was not generated'
557+
558+
# Parse manifest and spine elements
559+
# Verify that all idrefs in spine match ids in manifest
560+
from xml.etree import ElementTree as ET
561+
562+
tree = ET.parse(str(opf_path)) # noqa: S314
563+
root = tree.getroot()
564+
565+
# Define namespace
566+
ns = {'opf': 'http://www.idpf.org/2007/opf'}
567+
568+
# Collect items from manifest
569+
manifest_ids: set[str | None] = set()
570+
manifest_hrefs: dict[str, str] = {}
571+
for item in root.findall('.//opf:manifest/opf:item', ns):
572+
item_id: str | None = item.get('id')
573+
item_href: str | None = item.get('href')
574+
manifest_ids.add(item_id)
575+
if item_id is not None and item_href is not None:
576+
manifest_hrefs[item_id] = item_href
577+
578+
# Check idrefs in spine
579+
spine_idrefs = []
580+
for itemref in root.findall('.//opf:spine/opf:itemref', ns):
581+
idref: str | None = itemref.get('idref')
582+
spine_idrefs.append(idref)
583+
584+
# Verify all spine idrefs exist in manifest
585+
for idref in spine_idrefs:
586+
assert idref in manifest_ids, (
587+
f"spine idref '{idref}' does not exist in manifest"
588+
)
589+
590+
# Verify hrefs do not contain backslashes
591+
# (should be normalized to forward slashes even on Windows)
592+
for item_id, href in manifest_hrefs.items():
593+
assert '\\' not in href, (
594+
f"manifest item '{item_id}' href '{href}' contains backslashes"
595+
)
596+
597+
# Verify no duplicate IDs are assigned to the same href
598+
href_to_ids: dict[str, list[str | None]] = {}
599+
for item_id, href in manifest_hrefs.items():
600+
# Normalize path for comparison
601+
normalized_href = href.replace('\\', '/')
602+
if normalized_href not in href_to_ids:
603+
href_to_ids[normalized_href] = []
604+
href_to_ids[normalized_href].append(item_id)
605+
606+
# Detect duplicate IDs
607+
duplicates: dict[str, list[str | None]] = {
608+
href: ids for href, ids in href_to_ids.items() if len(ids) > 1
609+
}
610+
assert not duplicates, f'Multiple IDs assigned to the same file: {duplicates}'
611+
612+
613+
@pytest.mark.sphinx('epub', testroot='builder-dirhtml')
614+
def test_epub_manifest_subdirectory_paths(app: SphinxTestApp) -> None:
615+
"""Test that path separators are correctly normalized to forward slashes
616+
even for paths containing subdirectories.
617+
"""
618+
app.build()
619+
620+
opf_path = app.outdir / 'content.opf'
621+
assert opf_path.exists()
622+
623+
from xml.etree import ElementTree as ET
624+
625+
tree = ET.parse(str(opf_path)) # noqa: S314
626+
root = tree.getroot()
627+
628+
ns: dict[str, str] = {'opf': 'http://www.idpf.org/2007/opf'}
629+
630+
# Check all manifest item hrefs
631+
for item in root.findall('.//opf:manifest/opf:item', ns):
632+
href: str | None = item.get('href')
633+
if href is not None:
634+
# Verify no backslashes are present
635+
assert '\\' not in href, (
636+
f"href '{href}' contains backslashes (should be forward slashes)"
637+
)
638+
639+
# For paths with subdirectories, verify they are separated by forward slashes
640+
if href is not None and '/' in href:
641+
# Verify the path is correctly constructed
642+
parts: list[str] = href.split('/')
643+
assert all(part for part in parts), (
644+
f"href '{href}' contains empty path segments"
645+
)
646+
647+
648+
@pytest.mark.sphinx('epub', testroot='basic')
649+
def test_epub_spine_idref_consistency(app: SphinxTestApp) -> None:
650+
"""Test that spine idrefs and manifest ids are consistent.
651+
Verify that path separator normalization ensures the same file
652+
is reliably referenced with the same ID.
653+
"""
654+
app.build()
655+
656+
opf_path = app.outdir / 'content.opf'
657+
from xml.etree import ElementTree as ET
658+
659+
tree = ET.parse(str(opf_path)) # noqa: S314
660+
root = tree.getroot()
661+
662+
ns: dict[str, str] = {'opf': 'http://www.idpf.org/2007/opf'}
663+
664+
# Create id→href mapping from manifest
665+
id_to_href = {}
666+
for item in root.findall('.//opf:manifest/opf:item', ns):
667+
item_id: str | None = item.get('id')
668+
item_href: str | None = item.get('href')
669+
id_to_href[item_id] = item_href
670+
671+
# For each idref in spine, verify corresponding href exists
672+
# and that href is unique
673+
spine_hrefs = []
674+
for itemref in root.findall('.//opf:spine/opf:itemref', ns):
675+
idref: str | None = itemref.get('idref')
676+
assert idref in id_to_href, f"manifest item not found for spine idref '{idref}'"
677+
678+
href = id_to_href[idref]
679+
spine_hrefs.append(href)
680+
681+
# Warn if the same href is referenced multiple times
682+
# (normally each file should appear only once in spine)
683+
from collections import Counter
684+
685+
href_counts = Counter(spine_hrefs)
686+
duplicated_hrefs: list[str | None] = [
687+
href for href, count in href_counts.items() if count > 1
688+
]
689+
690+
# Note: Some EPUBs may intentionally reference the same file multiple times,
691+
# so this is logged as informational rather than a strict error
692+
if duplicated_hrefs:
693+
print(
694+
f'Info: The following hrefs are referenced multiple times in spine: {duplicated_hrefs}'
695+
)

0 commit comments

Comments
 (0)