@@ -542,3 +542,154 @@ def test_copy_images(app: SphinxTestApp) -> None:
542542 'svgimg.svg' ,
543543 'testimäge.png' ,
544544 }
545+
546+
547+ @pytest .mark .sphinx ('epub' , testroot = 'builder-dirhtml' )
548+ def test_epub_manifest_path_separator_normalization (app : SphinxTestApp ) -> None :
549+ """Test that path separators are normalized to forward slashes
550+ in EPUB manifests, even on Windows.
551+ """
552+ app .build ()
553+
554+ # Read the content.opf file
555+ opf_path = app .outdir / 'content.opf'
556+ assert opf_path .exists (), 'content.opf was not generated'
557+
558+ # Parse manifest and spine elements
559+ # Verify that all idrefs in spine match ids in manifest
560+ from xml .etree import ElementTree as ET
561+
562+ tree = ET .parse (str (opf_path )) # noqa: S314
563+ root = tree .getroot ()
564+
565+ # Define namespace
566+ ns = {'opf' : 'http://www.idpf.org/2007/opf' }
567+
568+ # Collect items from manifest
569+ manifest_ids : set [str | None ] = set ()
570+ manifest_hrefs : dict [str , str ] = {}
571+ for item in root .findall ('.//opf:manifest/opf:item' , ns ):
572+ item_id : str | None = item .get ('id' )
573+ item_href : str | None = item .get ('href' )
574+ manifest_ids .add (item_id )
575+ if item_id is not None and item_href is not None :
576+ manifest_hrefs [item_id ] = item_href
577+
578+ # Check idrefs in spine
579+ spine_idrefs = []
580+ for itemref in root .findall ('.//opf:spine/opf:itemref' , ns ):
581+ idref : str | None = itemref .get ('idref' )
582+ spine_idrefs .append (idref )
583+
584+ # Verify all spine idrefs exist in manifest
585+ for idref in spine_idrefs :
586+ assert idref in manifest_ids , (
587+ f"spine idref '{ idref } ' does not exist in manifest"
588+ )
589+
590+ # Verify hrefs do not contain backslashes
591+ # (should be normalized to forward slashes even on Windows)
592+ for item_id , href in manifest_hrefs .items ():
593+ assert '\\ ' not in href , (
594+ f"manifest item '{ item_id } ' href '{ href } ' contains backslashes"
595+ )
596+
597+ # Verify no duplicate IDs are assigned to the same href
598+ href_to_ids : dict [str , list [str | None ]] = {}
599+ for item_id , href in manifest_hrefs .items ():
600+ # Normalize path for comparison
601+ normalized_href = href .replace ('\\ ' , '/' )
602+ if normalized_href not in href_to_ids :
603+ href_to_ids [normalized_href ] = []
604+ href_to_ids [normalized_href ].append (item_id )
605+
606+ # Detect duplicate IDs
607+ duplicates : dict [str , list [str | None ]] = {
608+ href : ids for href , ids in href_to_ids .items () if len (ids ) > 1
609+ }
610+ assert not duplicates , f'Multiple IDs assigned to the same file: { duplicates } '
611+
612+
613+ @pytest .mark .sphinx ('epub' , testroot = 'builder-dirhtml' )
614+ def test_epub_manifest_subdirectory_paths (app : SphinxTestApp ) -> None :
615+ """Test that path separators are correctly normalized to forward slashes
616+ even for paths containing subdirectories.
617+ """
618+ app .build ()
619+
620+ opf_path = app .outdir / 'content.opf'
621+ assert opf_path .exists ()
622+
623+ from xml .etree import ElementTree as ET
624+
625+ tree = ET .parse (str (opf_path )) # noqa: S314
626+ root = tree .getroot ()
627+
628+ ns : dict [str , str ] = {'opf' : 'http://www.idpf.org/2007/opf' }
629+
630+ # Check all manifest item hrefs
631+ for item in root .findall ('.//opf:manifest/opf:item' , ns ):
632+ href : str | None = item .get ('href' )
633+ if href is not None :
634+ # Verify no backslashes are present
635+ assert '\\ ' not in href , (
636+ f"href '{ href } ' contains backslashes (should be forward slashes)"
637+ )
638+
639+ # For paths with subdirectories, verify they are separated by forward slashes
640+ if href is not None and '/' in href :
641+ # Verify the path is correctly constructed
642+ parts : list [str ] = href .split ('/' )
643+ assert all (part for part in parts ), (
644+ f"href '{ href } ' contains empty path segments"
645+ )
646+
647+
648+ @pytest .mark .sphinx ('epub' , testroot = 'basic' )
649+ def test_epub_spine_idref_consistency (app : SphinxTestApp ) -> None :
650+ """Test that spine idrefs and manifest ids are consistent.
651+ Verify that path separator normalization ensures the same file
652+ is reliably referenced with the same ID.
653+ """
654+ app .build ()
655+
656+ opf_path = app .outdir / 'content.opf'
657+ from xml .etree import ElementTree as ET
658+
659+ tree = ET .parse (str (opf_path )) # noqa: S314
660+ root = tree .getroot ()
661+
662+ ns : dict [str , str ] = {'opf' : 'http://www.idpf.org/2007/opf' }
663+
664+ # Create id→href mapping from manifest
665+ id_to_href = {}
666+ for item in root .findall ('.//opf:manifest/opf:item' , ns ):
667+ item_id : str | None = item .get ('id' )
668+ item_href : str | None = item .get ('href' )
669+ id_to_href [item_id ] = item_href
670+
671+ # For each idref in spine, verify corresponding href exists
672+ # and that href is unique
673+ spine_hrefs = []
674+ for itemref in root .findall ('.//opf:spine/opf:itemref' , ns ):
675+ idref : str | None = itemref .get ('idref' )
676+ assert idref in id_to_href , f"manifest item not found for spine idref '{ idref } '"
677+
678+ href = id_to_href [idref ]
679+ spine_hrefs .append (href )
680+
681+ # Warn if the same href is referenced multiple times
682+ # (normally each file should appear only once in spine)
683+ from collections import Counter
684+
685+ href_counts = Counter (spine_hrefs )
686+ duplicated_hrefs : list [str | None ] = [
687+ href for href , count in href_counts .items () if count > 1
688+ ]
689+
690+ # Note: Some EPUBs may intentionally reference the same file multiple times,
691+ # so this is logged as informational rather than a strict error
692+ if duplicated_hrefs :
693+ print (
694+ f'Info: The following hrefs are referenced multiple times in spine: { duplicated_hrefs } '
695+ )
0 commit comments