From ba3fd738f65d0e7b89a1552f7658b4e0d1544108 Mon Sep 17 00:00:00 2001 From: Sam Sneddon Date: Wed, 10 Jun 2020 03:04:42 +0100 Subject: [PATCH 1/3] Deprecate the sanitizer and recommend Bleach --- CHANGES.rst | 13 ++++++++++--- html5lib/filters/sanitizer.py | 16 ++++++++++++++++ html5lib/tests/sanitizer.py | 17 +++++++++-------- html5lib/tests/test_sanitizer.py | 17 +++++++++-------- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index abc7bb18..fcb22475 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,11 +11,18 @@ Breaking changes: * Drop support for Python 3.3. (#358) * Drop support for Python 3.4. (#421) +Deprecations: + +* Deprecate the ``html5lib`` sanitizer (``html5lib.serialize(sanitize=True)`` and + ``html5lib.filters.sanitizer``). We recommend users migrate to `Bleach + `. Please let us know if Bleach doesn't suffice for your + use. (#443) + Other changes: -* Try to import from `collections.abc` to remove DeprecationWarning and ensure - `html5lib` keeps working in future Python versions. (#403) -* Drop optional `datrie` dependency. (#442) +* Try to import from ``collections.abc`` to remove DeprecationWarning and ensure + ``html5lib`` keeps working in future Python versions. (#403) +* Drop optional ``datrie`` dependency. (#442) 1.0.1 diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index e852f53b..6569c155 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, unicode_literals import re +import warnings from xml.sax.saxutils import escape, unescape from six.moves import urllib_parse as urlparse @@ -11,6 +12,13 @@ __all__ = ["Filter"] +warnings.warn( + "html5lib's sanitizer is deprecated; see " + + "https://github.com/html5lib/html5lib-python/issues/443 and please let " + + "us know if Bleach in unsuitable for your needs", + DeprecationWarning +) + allowed_elements = frozenset(( (namespaces['html'], 'a'), (namespaces['html'], 'abbr'), @@ -750,6 +758,14 @@ def __init__(self, """ super(Filter, self).__init__(source) + + warnings.warn( + "html5lib's sanitizer is deprecated; see " + + "https://github.com/html5lib/html5lib-python/issues/443 and please let " + + "us know if Bleach in unsuitable for your needs", + DeprecationWarning + ) + self.allowed_elements = allowed_elements self.allowed_attributes = allowed_attributes self.allowed_css_properties = allowed_css_properties diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py index 93882ec4..bb483421 100644 --- a/html5lib/tests/sanitizer.py +++ b/html5lib/tests/sanitizer.py @@ -27,14 +27,15 @@ def runtest(self): expected = self.test["output"] parsed = parseFragment(input) - serialized = serialize(parsed, - sanitize=True, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char="'", - alphabetical_attributes=True) + with pytest.deprecated_call(): + serialized = serialize(parsed, + sanitize=True, + omit_optional_tags=False, + use_trailing_solidus=True, + space_before_trailing_solidus=False, + quote_attr_values="always", + quote_char="'", + alphabetical_attributes=True) errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, "\nReceived:", serialized]) diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 9a8e7f2d..f3faeb80 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -8,14 +8,15 @@ def sanitize_html(stream): parsed = parseFragment(stream) - serialized = serialize(parsed, - sanitize=True, - omit_optional_tags=False, - use_trailing_solidus=True, - space_before_trailing_solidus=False, - quote_attr_values="always", - quote_char='"', - alphabetical_attributes=True) + with pytest.deprecated_call(): + serialized = serialize(parsed, + sanitize=True, + omit_optional_tags=False, + use_trailing_solidus=True, + space_before_trailing_solidus=False, + quote_attr_values="always", + quote_char='"', + alphabetical_attributes=True) return serialized From 254b9102d869e12dfc1cd267c406f61d03668993 Mon Sep 17 00:00:00 2001 From: Sam Sneddon Date: Sun, 21 Jun 2020 19:54:23 +0100 Subject: [PATCH 2/3] fixup! Deprecate the sanitizer and recommend Bleach --- html5lib/filters/sanitizer.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index 6569c155..ae18eccd 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -1,3 +1,10 @@ +"""Deprecated from html5lib 1.1. + +See `here `_ for information about its +deprecation; `Bleach `_ is recommended as a replacement. Please +let us know in the aforementioned issue if Bleach is unsuitable for your needs. + +""" from __future__ import absolute_import, division, unicode_literals import re @@ -12,13 +19,14 @@ __all__ = ["Filter"] -warnings.warn( +__deprecation_msg = ( "html5lib's sanitizer is deprecated; see " + "https://github.com/html5lib/html5lib-python/issues/443 and please let " + - "us know if Bleach in unsuitable for your needs", - DeprecationWarning + "us know if Bleach is unsuitable for your needs" ) +warnings.warn(__deprecation_msg, DeprecationWarning) + allowed_elements = frozenset(( (namespaces['html'], 'a'), (namespaces['html'], 'abbr'), @@ -759,12 +767,7 @@ def __init__(self, """ super(Filter, self).__init__(source) - warnings.warn( - "html5lib's sanitizer is deprecated; see " + - "https://github.com/html5lib/html5lib-python/issues/443 and please let " + - "us know if Bleach in unsuitable for your needs", - DeprecationWarning - ) + warnings.warn(__deprecation_msg, DeprecationWarning) self.allowed_elements = allowed_elements self.allowed_attributes = allowed_attributes From bbebb8d7b07d07cfa9f50521f8dd61785854f5a2 Mon Sep 17 00:00:00 2001 From: Sam Sneddon Date: Sun, 21 Jun 2020 20:08:25 +0100 Subject: [PATCH 3/3] fixup! fixup! Deprecate the sanitizer and recommend Bleach --- html5lib/filters/sanitizer.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index ae18eccd..70ef9066 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -1,8 +1,9 @@ """Deprecated from html5lib 1.1. -See `here `_ for information about its -deprecation; `Bleach `_ is recommended as a replacement. Please -let us know in the aforementioned issue if Bleach is unsuitable for your needs. +See `here `_ for +information about its deprecation; `Bleach `_ +is recommended as a replacement. Please let us know in the aforementioned issue +if Bleach is unsuitable for your needs. """ from __future__ import absolute_import, division, unicode_literals @@ -19,13 +20,13 @@ __all__ = ["Filter"] -__deprecation_msg = ( +_deprecation_msg = ( "html5lib's sanitizer is deprecated; see " + "https://github.com/html5lib/html5lib-python/issues/443 and please let " + "us know if Bleach is unsuitable for your needs" ) -warnings.warn(__deprecation_msg, DeprecationWarning) +warnings.warn(_deprecation_msg, DeprecationWarning) allowed_elements = frozenset(( (namespaces['html'], 'a'), @@ -767,7 +768,7 @@ def __init__(self, """ super(Filter, self).__init__(source) - warnings.warn(__deprecation_msg, DeprecationWarning) + warnings.warn(_deprecation_msg, DeprecationWarning) self.allowed_elements = allowed_elements self.allowed_attributes = allowed_attributes