diff --git a/misago/__init__.py b/misago/__init__.py index 4d6b17c95c..58f1d46665 100644 --- a/misago/__init__.py +++ b/misago/__init__.py @@ -1,5 +1,5 @@ from .plugins.pluginlist import load_plugin_list_if_exists -__version__ = "0.28.2" +__version__ = "0.29.0" __released__ = True diff --git a/misago/conf/defaults.py b/misago/conf/defaults.py index 63db329de4..03d0d5719d 100644 --- a/misago/conf/defaults.py +++ b/misago/conf/defaults.py @@ -36,11 +36,6 @@ MISAGO_MARKUP_EXTENSIONS = [] -# Bleach callbacks for linkifying paragraphs - -MISAGO_BLEACH_CALLBACKS = [] - - # Custom post validators MISAGO_POST_VALIDATORS = [] diff --git a/misago/markup/htmlparser.py b/misago/markup/htmlparser.py new file mode 100644 index 0000000000..dd68ae8e3b --- /dev/null +++ b/misago/markup/htmlparser.py @@ -0,0 +1,110 @@ +import html +from dataclasses import dataclass + +import html5lib + +SINGLETON_TAGS = ( + "area", + "base", + "br", + "col", + "command", + "embed", + "hr", + "img", + "input", + "keygen", + "link", + "meta", + "param", + "source", + "track", + "wbr", +) + + +class Node: + def __str__(self): + raise NotImplementedError("Subclasses of 'Node' need to implement __str__") + + +@dataclass +class RootNode(Node): + tag = None + children: list + + def __str__(self): + return "".join(str(child) for child in self.children) + + +@dataclass +class ElementNode(Node): + tag: str + attrs: dict + children: list + + def __str__(self): + attrs_padding = " " if self.attrs else "" + attrs = " ".join(self.attrs_str()) + + if self.tag in SINGLETON_TAGS: + return f"<{self.tag}{attrs_padding}{attrs} />" + + children = "".join(str(child) for child in self.children) + return f"<{self.tag}{attrs_padding}{attrs}>{children}{self.tag}>" + + def attrs_str(self): + for name, value in self.attrs.items(): + if value is True or not value: + yield html.escape(str(name)) + else: + yield (f'{html.escape(str(name))}="{html.escape(str(value))}"') + + +@dataclass +class TextNode(Node): + text: str + + def __str__(self): + return html.escape(self.text) + + +def parse_html_string(string: str) -> RootNode: + element = html5lib.parse( + string, + namespaceHTMLElements=False, + ) + + body = element.find("body") + root_node = RootNode(children=[]) + + if body.text: + root_node.children.append(TextNode(text=body.text)) + + for child in body: + add_child_node(root_node, child) + + return root_node + + +def add_child_node(parent, element): + node = ElementNode( + tag=element.tag, + attrs=element.attrib, + children=[], + ) + + if element.text: + node.children.append(TextNode(text=element.text)) + + parent.children.append(node) + + if element.tail: + parent.children.append(TextNode(text=element.tail)) + + for child in element: + add_child_node(node, child) + + +def print_html_string(root_node: RootNode) -> str: + return str(root_node) diff --git a/misago/markup/links.py b/misago/markup/links.py new file mode 100644 index 0000000000..344f8b84ac --- /dev/null +++ b/misago/markup/links.py @@ -0,0 +1,199 @@ +import re +from typing import Union + +from django.http import Http404 +from django.urls import resolve + +from .htmlparser import ElementNode, RootNode, TextNode + +MISAGO_ATTACHMENT_VIEWS = ("misago:attachment", "misago:attachment-thumbnail") +URL_RE = re.compile( + r"(https?://)?" + r"(www\.)?" + r"(\w+((-|_)\w+)?\.)?" + r"\w+((_|-|\w)+)?(\.[a-z][a-z]+)" + r"(:[1-9][0-9]+)?" + r"([^\s<>\[\]\(\);:]+)?" +) + + +def linkify_texts(node: Union[RootNode, ElementNode]): + # Skip link replacement in some nodes + if node.tag in ("pre", "code", "a"): + return + + new_children = [] + for child in node.children: + if isinstance(child, TextNode): + if URL_RE.search(child.text): + new_children += replace_links_in_text(child.text) + else: + new_children.append(child) + else: + new_children.append(child) + linkify_texts(child) + + node.children = new_children + + +def replace_links_in_text(text: str) -> list: + nodes = [] + + while True: + match = URL_RE.search(text) + if not match: + if text: + nodes.append(TextNode(text=text)) + return nodes + + start, end = match.span() + url = text[start:end] + + # Append text between 0 and start to nodes + if start > 0: + nodes.append(TextNode(text=text[:start])) + + nodes.append( + ElementNode( + tag="a", + attrs={"href": url}, + children=[ + TextNode(text=strip_link_protocol(url)), + ], + ) + ) + + text = text[end:] + + +def clean_links( + request, + result, + node: Union[RootNode, ElementNode, TextNode], + force_shva=False, +): + if isinstance(node, TextNode): + return + + for child in node.children: + if not isinstance(child, ElementNode): + continue + + if child.tag == "a": + clean_link_node(request, result, child, force_shva) + clean_links(request, result, child, force_shva) + elif child.tag == "img": + clean_image_node(request, result, child, force_shva) + else: + clean_links(request, result, child, force_shva) + + +def clean_link_node( + request, + result: dict, + node: ElementNode, + force_shva: bool, +): + host = request.get_host() + href = node.attrs.get("href") or "/" + + if is_internal_link(href, host): + href = clean_internal_link(href, host) + result["internal_links"].append(href) + href = clean_attachment_link(href, force_shva) + else: + result["outgoing_links"].append(strip_link_protocol(href)) + href = assert_link_prefix(href) + node.attrs["rel"] = "external nofollow noopener" + + node.attrs["target"] = "_blank" + node.attrs["href"] = href + + if len(node.children) == 0: + node.children.append(strip_link_protocol(href)) + elif len(node.children) == 1 and isinstance(node.children[0], TextNode): + text = node.children[0].text + if URL_RE.match(text): + node.children[0].text = strip_link_protocol(text) + + +def clean_image_node( + request, + result: dict, + node: ElementNode, + force_shva: bool, +): + host = request.get_host() + src = node.attrs.get("src") or "/" + + node.attrs["alt"] = strip_link_protocol(node.attrs["alt"]) + + if is_internal_link(src, host): + src = clean_internal_link(src, host) + result["images"].append(src) + src = clean_attachment_link(src, force_shva) + else: + result["images"].append(strip_link_protocol(src)) + src = assert_link_prefix(src) + + node.attrs["src"] = src + + +def is_internal_link(link, host): + if link.startswith("/") and not link.startswith("//"): + return True + + link = strip_link_protocol(link).lstrip("www.").lower() + return link.lower().startswith(host.lstrip("www.")) + + +def strip_link_protocol(link): + if link.lower().startswith("https:"): + link = link[6:] + if link.lower().startswith("http:"): + link = link[5:] + if link.startswith("//"): + link = link[2:] + return link + + +def assert_link_prefix(link): + if link.lower().startswith("https:"): + return link + if link.lower().startswith("http:"): + return link + if link.startswith("//"): + return "http:%s" % link + + return "http://%s" % link + + +def clean_internal_link(link, host): + link = strip_link_protocol(link) + + if link.lower().startswith("www."): + link = link[4:] + if host.lower().startswith("www."): + host = host[4:] + + if link.lower().startswith(host): + link = link[len(host) :] + + return link or "/" + + +def clean_attachment_link(link, force_shva=False): + try: + resolution = resolve(link) + if not resolution.namespaces: + return link + url_name = ":".join(resolution.namespaces + [resolution.url_name]) + except (Http404, ValueError): + return link + + if url_name in MISAGO_ATTACHMENT_VIEWS: + if force_shva: + link = "%s?shva=1" % link + elif link.endswith("?shva=1"): + link = link[:-7] + return link diff --git a/misago/markup/mentions.py b/misago/markup/mentions.py index 56fdb5f2f4..8414772399 100644 --- a/misago/markup/mentions.py +++ b/misago/markup/mentions.py @@ -1,65 +1,128 @@ import re +from typing import Union -from bs4 import BeautifulSoup from django.contrib.auth import get_user_model -SUPPORTED_TAGS = ("h1", "h2", "h3", "h4", "h5", "h6", "div", "p") +from .htmlparser import ( + ElementNode, + RootNode, + TextNode, +) + +EXCLUDE_ELEMENTS = ("pre", "code", "a") USERNAME_RE = re.compile(r"@[0-9a-z]+", re.IGNORECASE) -MENTIONS_LIMIT = 24 +MENTIONS_LIMIT = 32 -def add_mentions(request, result): +def add_mentions(result, root_node): if "@" not in result["parsed_text"]: return - mentions_dict = {} + mentions = set() + nodes = [] + + find_mentions(root_node, mentions, nodes) + + if not mentions or len(mentions) > MENTIONS_LIMIT: + return # No need to run mentions logic - soup = BeautifulSoup(result["parsed_text"], "html5lib") + users_data = get_users_data(mentions) + if not users_data: + return # Mentioned users don't exist - elements = [] - for tagname in SUPPORTED_TAGS: - if tagname in result["parsed_text"]: - elements += soup.find_all(tagname) - for element in elements: - add_mentions_to_element(request, element, mentions_dict) + for node in nodes: + add_mentions_to_node(node, users_data) - result["parsed_text"] = str(soup.body)[6:-7].strip() - result["mentions"] = list(filter(bool, mentions_dict.values())) + result["mentions"] = [user[0] for user in users_data.values()] -def add_mentions_to_element(request, element, mentions_dict): - for item in element.contents: - if item.name: - if item.name != "a": - add_mentions_to_element(request, item, mentions_dict) - elif "@" in item.string: - parse_string(request, item, mentions_dict) +def find_mentions( + node: Union[ElementNode, RootNode], + mentions: set, + nodes: set, +): + if isinstance(node, ElementNode) and node.tag in EXCLUDE_ELEMENTS: + return + + tracked_node = False + for child in node.children: + if isinstance(child, TextNode): + results = find_mentions_in_str(child.text) + if results: + mentions.update(results) + if not tracked_node: + tracked_node = True + nodes.append(node) + else: + find_mentions(child, mentions, nodes) -def parse_string(request, element, mentions_dict): +def find_mentions_in_str(text: str): + matches = USERNAME_RE.findall(text) + if not matches: + return None + + return set([match.lower()[1:] for match in matches]) + + +def get_users_data(mentions): User = get_user_model() + users_data = {} + + queryset = User.objects.filter(slug__in=mentions).values_list( + "id", "username", "slug" + ) + + for user_id, username, slug in queryset: + users_data[slug] = (user_id, username) + + return users_data + + +def add_mentions_to_node(node, users_data): + new_children = [] + + for child in node.children: + if isinstance(child, TextNode): + new_children += add_mentions_to_text(child.text, users_data) + else: + new_children.append(child) + + node.children = new_children + + +def add_mentions_to_text(text: str, users_data): + nodes = [] - def replace_mentions(matchobj): - if len(mentions_dict) >= MENTIONS_LIMIT: - return matchobj.group(0) + while True: + match = USERNAME_RE.search(text) + if not match: + if text: + nodes.append(TextNode(text=text)) + return nodes - username = matchobj.group(0)[1:].strip().lower() + start, end = match.span() + user_slug = text[start + 1 : end].lower() - if username not in mentions_dict: - if username == request.user.slug: - mentions_dict[username] = request.user - else: - try: - mentions_dict[username] = User.objects.get(slug=username) - except User.DoesNotExist: - mentions_dict[username] = None + # Append text between 0 and start to nodes + if start > 0: + nodes.append(TextNode(text=text[:start])) - if mentions_dict[username]: - user = mentions_dict[username] - return '@%s' % (user.get_absolute_url(), user.username) + # Append match string to nodes and keep scanning + if user_slug not in users_data: + nodes.append(TextNode(text=text[:end])) + text = text[end:] + continue - # we've failed to resolve user for username - return matchobj.group(0) + user_id, username = users_data[user_slug] + nodes.append( + ElementNode( + tag="a", + attrs={ + "href": f"/u/{user_slug}/{user_id}/", + }, + children=[TextNode(text=f"@{username}")], + ) + ) - replaced_string = USERNAME_RE.sub(replace_mentions, element.string) - element.replace_with(BeautifulSoup(replaced_string, "html.parser")) + text = text[end:] diff --git a/misago/markup/parser.py b/misago/markup/parser.py index 1ecc2ad530..47dbab2bdf 100644 --- a/misago/markup/parser.py +++ b/misago/markup/parser.py @@ -1,24 +1,18 @@ -import bleach import markdown -from bs4 import BeautifulSoup -from django.http import Http404 -from django.urls import resolve -from htmlmin.minify import html_minify from markdown.extensions.fenced_code import FencedCodeExtension -from ..conf import settings from .bbcode.code import CodeBlockExtension from .bbcode.hr import BBCodeHRProcessor from .bbcode.inline import bold, image, italics, underline, url from .bbcode.quote import QuoteExtension from .bbcode.spoiler import SpoilerExtension +from .htmlparser import parse_html_string, print_html_string +from .links import clean_links, linkify_texts from .md.shortimgs import ShortImagesExtension from .md.strikethrough import StrikethroughExtension from .mentions import add_mentions from .pipeline import pipeline -MISAGO_ATTACHMENT_VIEWS = ("misago:attachment", "misago:attachment-thumbnail") - def parse( text, @@ -29,7 +23,6 @@ def parse( allow_images=True, allow_blocks=True, force_shva=False, - minify=True, ): """ Message parser @@ -61,19 +54,24 @@ def parse( # Clean and store parsed text parsing_result["parsed_text"] = parsed_text.strip() - if allow_links: - linkify_paragraphs(parsing_result) + # Run additional operations + if allow_mentions or allow_links or allow_images: + root_node = parse_html_string(parsing_result["parsed_text"]) - parsing_result = pipeline.process_result(parsing_result) + if allow_links: + linkify_texts(root_node) - if allow_mentions: - add_mentions(request, parsing_result) + if allow_mentions: + add_mentions(parsing_result, root_node) - if allow_links or allow_images: - clean_links(request, parsing_result, force_shva) + if allow_links or allow_images: + clean_links(request, parsing_result, root_node, force_shva) + + parsing_result["parsed_text"] = print_html_string(root_node) + + # Let plugins do their magic + parsing_result = pipeline.process_result(parsing_result) - if minify: - minify_result(parsing_result) return parsing_result @@ -144,115 +142,3 @@ def md_factory(allow_links=True, allow_images=True, allow_blocks=True): md.parser.blockprocessors.deregister("ulist") return pipeline.extend_markdown(md) - - -def linkify_paragraphs(result): - result["parsed_text"] = bleach.linkify( - result["parsed_text"], - callbacks=settings.MISAGO_BLEACH_CALLBACKS, - skip_tags=["a", "code", "pre"], - parse_email=True, - ) - - -def clean_links(request, result, force_shva=False): - host = request.get_host() - - soup = BeautifulSoup(result["parsed_text"], "html5lib") - for link in soup.find_all("a"): - if is_internal_link(link["href"], host): - link["href"] = clean_internal_link(link["href"], host) - result["internal_links"].append(link["href"]) - link["href"] = clean_attachment_link(link["href"], force_shva) - else: - result["outgoing_links"].append(clean_link_prefix(link["href"])) - link["href"] = assert_link_prefix(link["href"]) - link["rel"] = "external nofollow noopener" - - link["target"] = "_blank" - - if link.string: - link.string = clean_link_prefix(link.string) - - for img in soup.find_all("img"): - img["alt"] = clean_link_prefix(img["alt"]) - if is_internal_link(img["src"], host): - img["src"] = clean_internal_link(img["src"], host) - result["images"].append(img["src"]) - img["src"] = clean_attachment_link(img["src"], force_shva) - else: - result["images"].append(clean_link_prefix(img["src"])) - img["src"] = assert_link_prefix(img["src"]) - - # [6:-7] trims
wrap - result["parsed_text"] = str(soup.body)[6:-7] - - -def is_internal_link(link, host): - if link.startswith("/") and not link.startswith("//"): - return True - - link = clean_link_prefix(link).lstrip("www.").lower() - return link.lower().startswith(host.lstrip("www.")) - - -def clean_link_prefix(link): - if link.lower().startswith("https:"): - link = link[6:] - if link.lower().startswith("http:"): - link = link[5:] - if link.startswith("//"): - link = link[2:] - return link - - -def assert_link_prefix(link): - if link.lower().startswith("https:"): - return link - if link.lower().startswith("http:"): - return link - if link.startswith("//"): - return "http:%s" % link - - return "http://%s" % link - - -def clean_internal_link(link, host): - link = clean_link_prefix(link) - - if link.lower().startswith("www."): - link = link[4:] - if host.lower().startswith("www."): - host = host[4:] - - if link.lower().startswith(host): - link = link[len(host) :] - - return link or "/" - - -def clean_attachment_link(link, force_shva=False): - try: - resolution = resolve(link) - if not resolution.namespaces: - return link - url_name = ":".join(resolution.namespaces + [resolution.url_name]) - except (Http404, ValueError): - return link - - if url_name in MISAGO_ATTACHMENT_VIEWS: - if force_shva: - link = "%s?shva=1" % link - elif link.endswith("?shva=1"): - link = link[:-7] - return link - - -def minify_result(result): - result["parsed_text"] = html_minify(result["parsed_text"]) - result["parsed_text"] = strip_html_head_body(result["parsed_text"]) - - -def strip_html_head_body(parsed_text): - # [25:-14] trims and - return parsed_text[25:-14] diff --git a/misago/markup/pipeline.py b/misago/markup/pipeline.py index 7cf29deb6d..d938171547 100644 --- a/misago/markup/pipeline.py +++ b/misago/markup/pipeline.py @@ -1,9 +1,8 @@ from importlib import import_module -from bs4 import BeautifulSoup - from .. import hooks from ..conf import settings +from .htmlparser import parse_html_string, print_html_string class MarkupPipeline: @@ -22,18 +21,23 @@ def extend_markdown(self, md): return md def process_result(self, result): - soup = BeautifulSoup(result["parsed_text"], "html5lib") + if ( + not settings.MISAGO_MARKUP_EXTENSIONS + and not hooks.parsing_result_processors + ): + return result + + html_tree = parse_html_string(result["parsed_text"]) for extension in settings.MISAGO_MARKUP_EXTENSIONS: module = import_module(extension) if hasattr(module, "clean_parsed"): hook = getattr(module, "clean_parsed") - hook.process_result(result, soup) + hook.process_result(result, html_tree) for extension in hooks.parsing_result_processors: - extension(result, soup) + extension(result, html_tree) - souped_text = str(soup.body).strip()[6:-7] - result["parsed_text"] = souped_text.strip() + result["parsed_text"] = print_html_string(html_tree) return result diff --git a/misago/markup/tests/snapshots/snap_test_code_bbcode.py b/misago/markup/tests/snapshots/snap_test_code_bbcode.py index d12efb0f42..938bc010f3 100644 --- a/misago/markup/tests/snapshots/snap_test_code_bbcode.py +++ b/misago/markup/tests/snapshots/snap_test_code_bbcode.py @@ -13,16 +13,18 @@ snapshots[ "test_code_with_language_parameter 1" -] = 'echo("Hello!");
'
+] = 'echo("Hello!");
'
snapshots[
"test_code_with_quoted_language_parameter 1"
-] = 'echo("Hello!");
'
+] = 'echo("Hello!");
'
snapshots[
"test_multi_line_code 1"
] = """<script>
-alert("!")
+alert("!")
</script>
"""
-snapshots["test_single_line_code 1"] = 'echo("Hello!");
'
+snapshots[
+ "test_single_line_code 1"
+] = "echo("Hello!");
"
diff --git a/misago/markup/tests/snapshots/snap_test_code_md.py b/misago/markup/tests/snapshots/snap_test_code_md.py
index 8099e7d404..63a61a1919 100644
--- a/misago/markup/tests/snapshots/snap_test_code_md.py
+++ b/misago/markup/tests/snapshots/snap_test_code_md.py
@@ -10,17 +10,17 @@
snapshots[
"test_multi_line_code_markdown 1"
] = """<script>
-alert("!")
+alert("!")
</script>
"""
snapshots[
"test_multi_line_code_markdown_with_language 1"
] = """<script>
-alert("!")
+alert("!")
</script>
"""
snapshots[
"test_single_line_code_markdown 1"
-] = '<script>alert("!")</script>
<script>alert("!")</script>
-""" snapshots[ "test_inline_code_is_escaped 1" -] = '<script>alert("!")</script>
+<script>alert("!")</script>
<script>alert("!")</script>
<script>alert("!")</script>
<script>alert("!")</script>
' +snapshots[ + "test_text_is_escaped 1" +] = "<script>alert("!")</script>
" diff --git a/misago/markup/tests/snapshots/snap_test_hr_bbcode.py b/misago/markup/tests/snapshots/snap_test_hr_bbcode.py index e90364d82b..73a745bd25 100644 --- a/misago/markup/tests/snapshots/snap_test_hr_bbcode.py +++ b/misago/markup/tests/snapshots/snap_test_hr_bbcode.py @@ -10,5 +10,5 @@ snapshots[ "test_hr_bbcode_is_replaced_if_its_alone_in_paragraph 1" ] = """Lorem ipsum dolor met.
-Sit amet elit.
""" diff --git a/misago/markup/tests/snapshots/snap_test_inline_bbcode.py b/misago/markup/tests/snapshots/snap_test_inline_bbcode.py index d2e397edaa..eec65f9c93 100644 --- a/misago/markup/tests/snapshots/snap_test_inline_bbcode.py +++ b/misago/markup/tests/snapshots/snap_test_inline_bbcode.py @@ -11,15 +11,15 @@ snapshots[ "test_image_bbcode 1" -] = 'Lorem ipsum
Lorem ipsum
Lorem ipsum
Lorem ipsum
Lorem ipsum
Lorem ipsum
Lorem ipsum!
" @@ -31,7 +31,7 @@ snapshots[ "test_simple_inline_bbcode_is_escaped 1" -] = 'Lorem ips <script language="application/javascript"> um!
' +] = "Lorem ips <script language="application/javascript"> um!
" snapshots["test_underline_bbcode 1"] = "Lorem ipsum!
" @@ -41,11 +41,11 @@ snapshots[ "test_url_bbcode_is_escaped 1" -] = 'Lorem <script language="application/javascript"> ipsum
' +] = 'Lorem <script language="application/javascript"> ipsum
' snapshots[ "test_url_bbcode_link_text_is_escaped 1" -] = 'Lorem <script language="application/javascript"> ipsum
' +] = 'Lorem <script language="application/javascript"> ipsum
' snapshots[ "test_url_bbcode_with_link_text 1" diff --git a/misago/markup/tests/snapshots/snap_test_link_handling.py b/misago/markup/tests/snapshots/snap_test_link_handling.py index 1c686af11d..bc45f93841 100644 --- a/misago/markup/tests/snapshots/snap_test_link_handling.py +++ b/misago/markup/tests/snapshots/snap_test_link_handling.py @@ -21,7 +21,7 @@ snapshots[ "test_local_image_is_changed_to_relative_link 1" -] = 'clean_links step cleans
clean_links step cleans
Lorem ipsum
+] = """
Lorem ipsum
http://test.com
"""
snapshots[
diff --git a/misago/markup/tests/snapshots/snap_test_quote_bbcode.py b/misago/markup/tests/snapshots/snap_test_quote_bbcode.py
index 89fd225eb6..093169a488 100644
--- a/misago/markup/tests/snapshots/snap_test_quote_bbcode.py
+++ b/misago/markup/tests/snapshots/snap_test_quote_bbcode.py
@@ -8,73 +8,73 @@
snapshots = Snapshot()
snapshots[
- "test_single_line_quote 1"
+ "test_multi_line_quote 1"
] = """"""
snapshots[
- "test_single_line_authored_quote 1"
+ "test_quote_can_contain_bbcode_or_markdown 1"
] = """"""
snapshots[
- "test_single_line_authored_quote_without_quotations 1"
+ "test_quotes_can_be_nested 1"
] = """"""
snapshots[
- "test_quote_can_contain_bbcode_or_markdown 1"
+ "test_quotes_can_contain_hr_markdown 1"
] = """-Sit amet elit.
+Sit amet elit.
+
+Another line.
Sit amet elit.
-Another line.
Sit amet elit.
-- - --Nested quote
-
Sit amet elit.
-
-Another line.
! (space)
" +] = '! (space with other words)
" + +snapshots["test_short_image_markdown[space-one-word] 1"] = "! (space)
" + snapshots[ "test_short_image_markdown[text-before-mark] 1" -] = 'Text before exclamation mark
Text before exclamation mark
Text before with space in between! (sometext)
" diff --git a/misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py b/misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py index 16d488bdb8..617b3ae59e 100644 --- a/misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py +++ b/misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py @@ -8,29 +8,29 @@ snapshots = Snapshot() snapshots[ - "test_single_line_spoiler 1" + "test_multi_line_spoiler 1" ] = """-Daenerys and Jon live happily ever after!
+Sit amet elit.
+Another line.
-Sit amet elit.
+Daenerys and Jon live happily ever after!
-Sit amet elit.
-Another line.
+Sit amet elit.
diff --git a/misago/markup/tests/test_code_bbcode.py b/misago/markup/tests/test_code_bbcode.py index 181fc85e11..034a68b38d 100644 --- a/misago/markup/tests/test_code_bbcode.py +++ b/misago/markup/tests/test_code_bbcode.py @@ -3,7 +3,7 @@ def test_single_line_code(request_mock, user, snapshot): text = '[code]echo("Hello!");[/code]' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) @@ -15,23 +15,23 @@ def test_multi_line_code(request_mock, user, snapshot): [/code] """ - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) def test_code_with_language_parameter(request_mock, user, snapshot): text = '[code=php]echo("Hello!");[/code]' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) def test_code_with_quoted_language_parameter(request_mock, user, snapshot): text = '[code="php"]echo("Hello!");[/code]' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) def test_code_block_disables_parsing(request_mock, user, snapshot): text = "[code]Dolor [b]met.[/b][/code]" - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) diff --git a/misago/markup/tests/test_code_md.py b/misago/markup/tests/test_code_md.py index 02b8b2aba7..9c267737f9 100644 --- a/misago/markup/tests/test_code_md.py +++ b/misago/markup/tests/test_code_md.py @@ -3,7 +3,7 @@ def test_single_line_code_markdown(request_mock, user, snapshot): text = '``````' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) @@ -15,7 +15,7 @@ def test_multi_line_code_markdown(request_mock, user, snapshot): ``` """ - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) @@ -27,5 +27,5 @@ def test_multi_line_code_markdown_with_language(request_mock, user, snapshot): ``` """ - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) diff --git a/misago/markup/tests/test_escaping.py b/misago/markup/tests/test_escaping.py index c0095ecec5..592a818fc0 100644 --- a/misago/markup/tests/test_escaping.py +++ b/misago/markup/tests/test_escaping.py @@ -4,23 +4,29 @@ def test_text_is_escaped(request_mock, user, snapshot): text = '' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) def test_inline_code_is_escaped(request_mock, user, snapshot): text = '``' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) def test_code_in_quote_markdown_is_escaped(request_mock, user, snapshot): text = '> ' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) def test_code_in_quote_bbcode_is_escaped(request_mock, user, snapshot): text = '[quote][/quote]' - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) + snapshot.assert_match(result["parsed_text"]) + + +def test_code_in_quote_bbcode_header_is_escaped(request_mock, user, snapshot): + text = '[quote="@Us">er"]Test[/quote]' + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) diff --git a/misago/markup/tests/test_hr_bbcode.py b/misago/markup/tests/test_hr_bbcode.py index 7574200927..1e469bc11e 100644 --- a/misago/markup/tests/test_hr_bbcode.py +++ b/misago/markup/tests/test_hr_bbcode.py @@ -7,11 +7,11 @@ def test_hr_bbcode_is_replaced_if_its_alone_in_paragraph(request_mock, user, sna [hr] Sit amet elit. """ - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) snapshot.assert_match(result["parsed_text"]) def test_hr_bbcode_is_skipped_if_its_part_of_paragraph(request_mock, user, snapshot): text = "Lorem ipsum[hr]dolor met." - result = parse(text, request_mock, user, minify=False) + result = parse(text, request_mock, user) assert result["parsed_text"] == "Sit amet elit.
-
+
Another line.
Lorem ipsum[hr]dolor met.
" diff --git a/misago/markup/tests/test_htmlparser.py b/misago/markup/tests/test_htmlparser.py new file mode 100644 index 0000000000..367751d89b --- /dev/null +++ b/misago/markup/tests/test_htmlparser.py @@ -0,0 +1,69 @@ +from ..htmlparser import parse_html_string, print_html_string + + +def test_parser_handles_simple_html(): + root_node = parse_html_string("Hello World!
") + assert print_html_string(root_node) == "Hello World!
" + + +def test_parser_handles_html_with_brs(): + root_node = parse_html_string("Hello
World!
Hello
World!
Hello
World!
") + assert print_html_string(root_node) == "Hello
World!
" + + +def test_parser_escapes_html_in_text_nodes(): + root_node = parse_html_string("Hello <br> World!") + assert print_html_string(root_node) == "Hello <br> World!" + + +def test_parser_escapes_quotes_in_text_nodes(): + root_node = parse_html_string('Hello "World"!') + assert print_html_string(root_node) == "Hello "World"!" + + +def test_parser_handles_attributes(): + root_node = parse_html_string('Hello World!') + assert print_html_string(root_node) == 'Hello World!' + + +def test_parser_escapes_html_in_attributes_names(): + root_node = parse_html_string('r="