Skip to content

Commit

Permalink
replace html5lib with nh3
Browse files Browse the repository at this point in the history
We're only using the santizer part of html5lib, and it's being
deprecated. It seems nh3 is the recommended replacement at this time.
This change eliminates a series of Deprecation Warnings.
  • Loading branch information
ikirudennis committed Aug 9, 2024
1 parent 8a5053e commit c758c63
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 18 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ classifiers = [
]
dynamic = ["version",]
dependencies = [
'html5lib>=1.0.1',
'nh3',
'regex>1.0; implementation_name != "pypy"',
]
requires-python = '>=3.8'
Expand Down
2 changes: 1 addition & 1 deletion tests/test_textile.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_sanitize():
assert result == expect

test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
result = '<p style="">a paragraph of evil text</p>'
result = '<p>a paragraph of evil text</p>'
expect = textile.Textile().parse(test, sanitize=True)
assert result == expect

Expand Down
11 changes: 6 additions & 5 deletions textile/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
import uuid
from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote
from collections import OrderedDict
from nh3 import clean

from textile.tools import sanitizer, imagesize
from textile.tools import imagesize
from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
regex_snippets, syms_re_s, table_span_re_s)
from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
Expand Down Expand Up @@ -236,12 +237,12 @@ def parse(self, text, rel=None, sanitize=False):

if self.block_tags:
if self.lite:
self.blocktag_whitelist = ['bq', 'p']
self.blocktag_allowlist = set(['bq', 'p', 'br'])
text = self.block(text)
else:
self.blocktag_whitelist = ['bq', 'p', 'bc', 'notextile',
self.blocktag_allowlist = set(['bq', 'p', 'br', 'bc', 'notextile',
'pre', 'h[1-6]', 'fn{0}+'.format(
regex_snippets['digit']), '###']
regex_snippets['digit']), '###'])
text = self.block(text)
text = self.placeNoteLists(text)
else:
Expand All @@ -263,7 +264,7 @@ def parse(self, text, rel=None, sanitize=False):
text = text.replace('{0}:glyph:'.format(self.uid), '')

if sanitize:
text = sanitizer.sanitize(text)
text = clean(text, tags=self.blocktag_allowlist)

text = self.retrieveTags(text)
text = self.retrieveURLs(text)
Expand Down
11 changes: 0 additions & 11 deletions textile/tools/sanitizer.py

This file was deleted.

0 comments on commit c758c63

Please sign in to comment.