diff --git a/CHANGES.rst b/CHANGES.rst index abc7bb18..fcb22475 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,11 +11,18 @@ Breaking changes: * Drop support for Python 3.3. (#358) * Drop support for Python 3.4. (#421) +Deprecations: + +* Deprecate the ``html5lib`` sanitizer (``html5lib.serialize(sanitize=True)`` and + ``html5lib.filters.sanitizer``). We recommend users migrate to `Bleach + `. Please let us know if Bleach doesn't suffice for your + use. (#443) + Other changes: -* Try to import from `collections.abc` to remove DeprecationWarning and ensure - `html5lib` keeps working in future Python versions. (#403) -* Drop optional `datrie` dependency. (#442) +* Try to import from ``collections.abc`` to remove DeprecationWarning and ensure + ``html5lib`` keeps working in future Python versions. (#403) +* Drop optional ``datrie`` dependency. (#442) 1.0.1 diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index e852f53b..70ef9066 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -1,6 +1,15 @@ +"""Deprecated from html5lib 1.1. + +See `here `_ for +information about its deprecation; `Bleach `_ +is recommended as a replacement. Please let us know in the aforementioned issue +if Bleach is unsuitable for your needs. + +""" from __future__ import absolute_import, division, unicode_literals import re +import warnings from xml.sax.saxutils import escape, unescape from six.moves import urllib_parse as urlparse @@ -11,6 +20,14 @@ __all__ = ["Filter"] +_deprecation_msg = ( + "html5lib's sanitizer is deprecated; see " + + "https://github.com/html5lib/html5lib-python/issues/443 and please let " + + "us know if Bleach is unsuitable for your needs" +) + +warnings.warn(_deprecation_msg, DeprecationWarning) + allowed_elements = frozenset(( (namespaces['html'], 'a'), (namespaces['html'], 'abbr'), @@ -750,6 +767,9 @@ def __init__(self, """ super(Filter, self).__init__(source) + + warnings.warn(_deprecation_msg, DeprecationWarning) + self.allowed_elements = allowed_elements self.allowed_attributes = allowed_attributes self.allowed_css_properties = allowed_css_properties diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py index 93882ec4..bb483421 100644 --- a/html5lib/tests/sanitizer.py +++ b/html5lib/tests/sanitizer.py @@ -27,14 +27,15 @@ def runtest(self): expected = self.test["output"] parsed = parseFragment(input) - serialized = serialize(parsed, - sanitize=True, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char="'", - alphabetical_attributes=True) + with pytest.deprecated_call(): + serialized = serialize(parsed, + sanitize=True, + omit_optional_tags=False, + use_trailing_solidus=True, + space_before_trailing_solidus=False, + quote_attr_values="always", + quote_char="'", + alphabetical_attributes=True) errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, "\nReceived:", serialized]) diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 9a8e7f2d..f3faeb80 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -8,14 +8,15 @@ def sanitize_html(stream): parsed = parseFragment(stream) - serialized = serialize(parsed, - sanitize=True, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char='"', - alphabetical_attributes=True) + with pytest.deprecated_call(): + serialized = serialize(parsed, + sanitize=True, + omit_optional_tags=False, + use_trailing_solidus=True, + space_before_trailing_solidus=False, + quote_attr_values="always", + quote_char='"', + alphabetical_attributes=True) return serialized