|
34 | 34 | from sphinx.util.nodes import get_node_line |
35 | 35 |
|
36 | 36 | if TYPE_CHECKING: |
37 | | - from collections.abc import Callable, Iterator |
| 37 | + from collections.abc import Callable, Iterator, Sequence |
38 | 38 | from typing import Any, Literal, TypeAlias |
39 | 39 |
|
40 | 40 | from requests import Response |
@@ -385,6 +385,9 @@ def __init__( |
385 | 385 | self.documents_exclude: list[re.Pattern[str]] = list( |
386 | 386 | map(re.compile, config.linkcheck_exclude_documents) |
387 | 387 | ) |
| 388 | + self.ignore_case: Sequence[re.Pattern[str]] = tuple( |
| 389 | + map(re.compile, config.linkcheck_case_insensitive_urls) |
| 390 | + ) |
388 | 391 | self.auth = [ |
389 | 392 | (re.compile(pattern), auth_info) |
390 | 393 | for pattern, auth_info in config.linkcheck_auth |
@@ -629,8 +632,15 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: |
629 | 632 | netloc = urlsplit(req_url).netloc |
630 | 633 | self.rate_limits.pop(netloc, None) |
631 | 634 |
|
| 635 | + # Check if URL should be normalised case-insensitively |
| 636 | + ignore_case = any(pat.match(req_url) for pat in self.ignore_case) |
| 637 | + normalised_req_url = self._normalise_url(req_url, ignore_case=ignore_case) |
| 638 | + normalised_response_url = self._normalise_url( |
| 639 | + response_url, ignore_case=ignore_case |
| 640 | + ) |
| 641 | + |
632 | 642 | if ( |
633 | | - (response_url.rstrip('/') == req_url.rstrip('/')) |
| 643 | + normalised_response_url == normalised_req_url |
634 | 644 | or _allowed_redirect(req_url, response_url, self.allowed_redirects) |
635 | 645 | ): # fmt: skip |
636 | 646 | return _Status.WORKING, '', 0 |
@@ -676,6 +686,17 @@ def limit_rate(self, response_url: str, retry_after: str | None) -> float | None |
676 | 686 | self.rate_limits[netloc] = RateLimit(delay, next_check) |
677 | 687 | return next_check |
678 | 688 |
|
| 689 | + @staticmethod |
| 690 | + def _normalise_url(url: str, *, ignore_case: bool) -> str: |
| 691 | + normalised_url = url.rstrip('/') |
| 692 | + if not ignore_case: |
| 693 | + return normalised_url |
| 694 | + # URI fragments are case-sensitive |
| 695 | + url_part, sep, fragment = normalised_url.partition('#') |
| 696 | + if sep: |
| 697 | + return f'{url_part.casefold()}#{fragment}' |
| 698 | + return url_part.casefold() |
| 699 | + |
679 | 700 |
|
680 | 701 | def _get_request_headers( |
681 | 702 | uri: str, |
@@ -816,6 +837,12 @@ def setup(app: Sphinx) -> ExtensionMetadata: |
816 | 837 | app.add_config_value( |
817 | 838 | 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) |
818 | 839 | ) |
| 840 | + app.add_config_value( |
| 841 | + 'linkcheck_case_insensitive_urls', |
| 842 | + (), |
| 843 | + '', |
| 844 | + types=frozenset({frozenset, list, set, tuple}), |
| 845 | + ) |
819 | 846 |
|
820 | 847 | app.add_event('linkcheck-process-uri') |
821 | 848 |
|
|
0 commit comments