From 1ec54149ae7ce3ed1f182d8b37f1fee8e02d0292 Mon Sep 17 00:00:00 2001
From: David Ross <daboross@daboross.net>
Date: Sat, 5 Jun 2021 02:13:23 -0700
Subject: [PATCH 1/2] Add incomplete ast-based Rust implementation

This lacks support for quote authors, lists, and referencing past
used URLs. It's also failing a few tests that I have not investigated.

It's otherwise complete.
---
 rust-ast-based/.gitignore                   |   1 +
 rust-ast-based/Cargo.lock                   |   5 +
 rust-ast-based/Cargo.toml                   |   9 +
 rust-ast-based/src/lib.rs                   | 113 +++++
 rust-ast-based/src/main.rs                  |   7 +
 rust-ast-based/src/parse.rs                 | 523 ++++++++++++++++++++
 rust-ast-based/src/print.rs                 | 253 ++++++++++
 rust-ast-based/tests/test_unwrapped_html.rs |  33 ++
 8 files changed, 944 insertions(+)
 create mode 100644 rust-ast-based/.gitignore
 create mode 100644 rust-ast-based/Cargo.lock
 create mode 100644 rust-ast-based/Cargo.toml
 create mode 100644 rust-ast-based/src/lib.rs
 create mode 100644 rust-ast-based/src/main.rs
 create mode 100644 rust-ast-based/src/parse.rs
 create mode 100644 rust-ast-based/src/print.rs
 create mode 100644 rust-ast-based/tests/test_unwrapped_html.rs

diff --git a/rust-ast-based/.gitignore b/rust-ast-based/.gitignore
new file mode 100644
index 0000000..b83d222
--- /dev/null
+++ b/rust-ast-based/.gitignore
@@ -0,0 +1 @@
+/target/
diff --git a/rust-ast-based/Cargo.lock b/rust-ast-based/Cargo.lock
new file mode 100644
index 0000000..f013e69
--- /dev/null
+++ b/rust-ast-based/Cargo.lock
@@ -0,0 +1,5 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "ast_pqlite"
+version = "0.1.0"
diff --git a/rust-ast-based/Cargo.toml b/rust-ast-based/Cargo.toml
new file mode 100644
index 0000000..fde93e7
--- /dev/null
+++ b/rust-ast-based/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "ast_pqlite"
+version = "0.1.0"
+authors = ["David Ross <daboross@daboross.net>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/rust-ast-based/src/lib.rs b/rust-ast-based/src/lib.rs
new file mode 100644
index 0000000..927823c
--- /dev/null
+++ b/rust-ast-based/src/lib.rs
@@ -0,0 +1,113 @@
+use std::{borrow::Cow, io};
+
+pub mod parse;
+pub mod print;
+
+const OPEN_QUOTE: char = '‘';
+const OPEN_QUOTE_STR: &str = "‘";
+const CLOSE_QUOTE: char = '’';
+const CLOSE_QUOTE_STR: &str = "’";
+
+#[derive(Debug)]
+pub enum PqLiteError {
+    UnmatchedOpen {
+        opening_at_index: usize,
+        opening: &'static str,
+        expected_close: &'static str,
+    },
+    Io(io::Error),
+    Utf8(std::string::FromUtf8Error),
+}
+impl PqLiteError {
+    fn unmatched(
+        opening_at_index: usize,
+        opening: &'static str,
+        expected_close: &'static str,
+    ) -> Self {
+        PqLiteError::UnmatchedOpen {
+            opening,
+            opening_at_index,
+            expected_close,
+        }
+    }
+}
+impl From<io::Error> for PqLiteError {
+    fn from(e: io::Error) -> Self {
+        PqLiteError::Io(e)
+    }
+}
+impl From<std::string::FromUtf8Error> for PqLiteError {
+    fn from(e: std::string::FromUtf8Error) -> Self {
+        PqLiteError::Utf8(e)
+    }
+}
+
+/// Utilize Rust's string slices to their fullest extent with an Ast.
+///
+/// We allocate Ast structs in memory, but critically, we never copy the
+/// text! Every `&'a str` is a reference to the original string read in from
+/// the input file. Rust's borrow checker ensures we never
+///
+/// Besides that, my main rationale for making an AST is to simplify the
+/// parsing. With this, we can have a fairly simple initial parse, followed by
+/// some postprocessing on the AST to handle each different kind of formatting.
+///
+/// This makes it easy to handle things like
+/// ```
+/// b‘hello world’[https://example.com]
+/// ```
+#[derive(Debug)]
+pub enum Ast<'a> {
+    Text(&'a str),
+    CowText(Cow<'a, str>),
+    NoBrText(&'a str),
+    Root(Vec<Ast<'a>>),
+    Quoted {
+        original_text: &'a str,
+        inner: Vec<Ast<'a>>,
+    },
+    Bracketed(Vec<Ast<'a>>),
+    CurlyBraced(Vec<Ast<'a>>),
+    BlockQuoted(Vec<Ast<'a>>),
+    CodeQuoted(Vec<Ast<'a>>),
+    TooltipText(Cow<'a, str>),
+    ProcessedPrefixSuffix(&'static str, Vec<Ast<'a>>, &'static str),
+    Header(
+        /// A number, 1-6, to output.
+        i32,
+        /// The inner text
+        Vec<Ast<'a>>,
+    ),
+    Tooltip {
+        tooltip_text: Cow<'a, str>,
+        inner: Vec<Ast<'a>>,
+    },
+    Link {
+        link_location: &'a str,
+        tooltip_text: Option<Cow<'a, str>>,
+        inner: Vec<Ast<'a>>,
+    },
+}
+
+pub fn write_wrapped_html_from_pqlite(
+    input: &str,
+    output: impl io::Write,
+) -> Result<(), PqLiteError> {
+    let ast = parse::parse_to_processed_ast(input)?;
+
+    print::ast_to_wrapped_html(&ast, output)?;
+
+    Ok(())
+}
+
+pub fn pqlite_to_unwrapped_html_string(input: &str) -> Result<String, PqLiteError> {
+    let ast = parse::parse_to_processed_ast(input)?;
+
+    let mut out = Vec::new();
+
+    print::ast_to_unwrapped_html(&ast, &mut out)?;
+
+    let out = String::from_utf8(out)?;
+
+    Ok(out)
+}
diff --git a/rust-ast-based/src/main.rs b/rust-ast-based/src/main.rs
new file mode 100644
index 0000000..dceeae2
--- /dev/null
+++ b/rust-ast-based/src/main.rs
@@ -0,0 +1,7 @@
+fn main() {
+    let input = r#"‘[[[Scoping rules/]]]Code blocks’[./code-blocks]"#;
+    let mut out = Vec::new();
+    ast_pqlite::write_wrapped_html_from_pqlite(input, &mut out).unwrap();
+    let out = std::str::from_utf8(&out).unwrap();
+    println!("{}", out);
+}
diff --git a/rust-ast-based/src/parse.rs b/rust-ast-based/src/parse.rs
new file mode 100644
index 0000000..7458037
--- /dev/null
+++ b/rust-ast-based/src/parse.rs
@@ -0,0 +1,523 @@
+use std::borrow::Cow;
+
+use crate::{Ast, PqLiteError, CLOSE_QUOTE, CLOSE_QUOTE_STR, OPEN_QUOTE, OPEN_QUOTE_STR};
+
+impl<'a> Ast<'a> {
+    fn visit_direct_subnodes_mut<E>(
+        &mut self,
+        mut f: impl FnMut(&mut Ast<'a>) -> Result<(), E>,
+    ) -> Result<(), E> {
+        match self.children_mut() {
+            Some(children) => {
+                for node in children {
+                    f(node)?;
+                }
+            }
+            _ => (),
+        }
+        Ok(())
+    }
+
+    /// If this Ast node contains children nodes, return a list of them.
+    fn children_mut(&mut self) -> Option<&mut Vec<Ast<'a>>> {
+        match self {
+            Ast::Root(nodes)
+            | Ast::Quoted { inner: nodes, .. }
+            | Ast::Bracketed(nodes)
+            | Ast::CurlyBraced(nodes)
+            | Ast::BlockQuoted(nodes)
+            | Ast::CodeQuoted(nodes)
+            | Ast::ProcessedPrefixSuffix(_, nodes, _)
+            | Ast::Header(_, nodes)
+            | Ast::Tooltip { inner: nodes, .. }
+            | Ast::Link { inner: nodes, .. } => Some(nodes),
+            Ast::Text(_) | Ast::CowText(_) | Ast::NoBrText(_) | Ast::TooltipText(_) => None,
+        }
+    }
+}
+
+/// Retrieves the first character in a string. Panics if the string is empty.
+fn first_char(s: &str) -> char {
+    s.chars().next().unwrap()
+}
+
+fn parse_to_unprocessed_ast<'a>(input: &'a str) -> Result<Ast<'a>, PqLiteError> {
+    Ok(Ast::Root(parse_to_ast_inner(input, &mut 0, None)?))
+}
+
+fn parse_comment<'a>(
+    input: &'a str,
+    index: &mut usize,
+    opening_index: usize,
+) -> Result<(), PqLiteError> {
+    // find 3 closing brackets
+    let mut remaining = 3;
+    while remaining > 0 {
+        let next_i = *index
+            + input[*index..]
+                .find(&['[', ']'][..])
+                .ok_or(PqLiteError::unmatched(opening_index, "[[[", "]"))?;
+        let next_c = first_char(&input[next_i..]);
+        match next_c {
+            '[' => remaining += 1,
+            ']' => remaining -= 1,
+            _ => unreachable!(),
+        }
+        *index = next_i + next_c.len_utf8();
+    }
+    Ok(())
+}
+
+fn parse_code_block<'a>(
+    input: &'a str,
+    index: &mut usize,
+    closing: &str,
+) -> Result<Option<Ast<'a>>, PqLiteError> {
+    let next_i = *index
+        + match input[*index..].find(closing) {
+            Some(i) => i,
+            None => return Ok(None),
+        };
+    let inner = &input[*index..next_i];
+    *index = next_i + closing.len();
+    Ok(Some(Ast::CodeQuoted(vec![Ast::NoBrText(inner)])))
+}
+
+fn parse_to_ast_inner<'a>(
+    input: &'a str,
+    index: &mut usize,
+    ending: Option<&'static str>,
+) -> Result<Vec<Ast<'a>>, PqLiteError> {
+    let mut parsed = Vec::new();
+    while *index < input.len() {
+        // first, let's find the next noteworth character
+        let next_i = {
+            let mut searched_through = *index;
+            loop {
+                let arr_with_ending_first_char;
+                let search_chars = match ending {
+                    Some(ending) => {
+                        arr_with_ending_first_char =
+                            [OPEN_QUOTE, '[', '{', '>', '`', first_char(ending)];
+                        &arr_with_ending_first_char[..]
+                    }
+                    None => &[OPEN_QUOTE, '[', '{', '>', '`'][..],
+                };
+                let candidate = match input[searched_through..].find(search_chars) {
+                    Some(i) => searched_through + i,
+                    None => break None,
+                };
+                // we _actually_ want to search for "> ", not just ">", so let's
+                // filter out any ">" that aren't followed by " ". We'll get
+                // ">‘" later on, when post-processing the AST.
+                if input[candidate..].starts_with('>') && !input[candidate..].starts_with("> ") {
+                    searched_through = candidate + first_char(&input[candidate..]).len_utf8();
+                    continue;
+                }
+                // same for ending
+                if let Some(ending) = ending {
+                    if input[candidate..].starts_with(first_char(ending))
+                        && !input[candidate..].starts_with(ending)
+                    {
+                        searched_through = candidate + first_char(&input[candidate..]).len_utf8();
+                        continue;
+                    }
+                }
+                break Some(candidate);
+            }
+        };
+        let next_i = match next_i {
+            Some(i) => i,
+            None => {
+                // we have searched to the end of the string, and found nothing.
+                // just push the rest of the string as plaintext.
+                parsed.push(Ast::Text(&input[*index..]));
+                *index = input.len();
+                break;
+            }
+        };
+
+        // push all the text up to the next interesting char as plaintext.
+        parsed.push(Ast::Text(&input[*index..next_i]));
+        *index = next_i;
+
+        let next_s = {
+            let next_c = first_char(&input[next_i..]);
+            match next_c {
+                OPEN_QUOTE => OPEN_QUOTE_STR,
+                '[' => {
+                    if input[next_i..].starts_with("[[[") {
+                        "[[["
+                    } else {
+                        "["
+                    }
+                }
+                '{' => "{",
+                '`' => {
+                    if input[next_i..].starts_with("```") {
+                        "```"
+                    } else if input[next_i..].starts_with("``") {
+                        "``"
+                    } else {
+                        "`"
+                    }
+                }
+                '>' => "> ",
+                _ => {
+                    let ending = ending.unwrap();
+                    assert!(input[next_i..].starts_with(ending));
+                    ending
+                }
+            }
+        };
+
+        if Some(next_s) == ending {
+            // if the next interesting char is the closing char for our parent,
+            // just exit now and let them deal with the rest.
+            break;
+        }
+
+        *index += next_s.len();
+
+        // otherwise, find our closing string.
+        let closing_s = match next_s {
+            OPEN_QUOTE_STR => CLOSE_QUOTE_STR,
+            "[" => "]",
+            "[[[" => "]]]",
+            "{" => "}",
+            "`" => "`",
+            "``" => "``",
+            "```" => "```",
+            "> " => "\n",
+            _ => unreachable!(),
+        };
+
+        if next_s == "[[[" {
+            // we don't care about inner structures inside a comment.
+            // we also don't care about adding the comment to the AST.
+            parse_comment(input, index, next_i)?;
+        } else if next_s.starts_with('`') {
+            // we similarly don't care about inner structures of a code block.
+            // but we _do_ care about the contents.
+            match parse_code_block(input, index, closing_s)? {
+                Some(c) => parsed.push(c),
+                None => parsed.push(Ast::Text(next_s)),
+            }
+        } else {
+            // now, we'll delegate finding all inner structures (and our closing
+            // string) to a new invocation.
+            let inner_ast = parse_to_ast_inner(input, index, Some(closing_s))?;
+
+            if *index == input.len() {
+                // we're missing a closing piece. We're a forgiving parser, so
+                // what we'll do is put the starting string back in, then
+                // just go on.
+                parsed.push(Ast::Text(next_s));
+                parsed.extend(inner_ast);
+                break;
+            } else {
+                // if the inner invocation exited with input left, it should have
+                // found our closing stirng. Verify this is the case (and panic
+                // out otherwise).
+                assert!(input[*index..].starts_with(closing_s));
+                // we've processed the closing string.
+                *index += closing_s.len();
+
+                let ast = match next_s {
+                    OPEN_QUOTE_STR => Ast::Quoted {
+                        original_text: &input[next_i..*index],
+                        inner: inner_ast,
+                    },
+                    "[" => Ast::Bracketed(inner_ast),
+                    "{" => Ast::CurlyBraced(inner_ast),
+                    "`" | "``" | "```" => Ast::CodeQuoted(inner_ast),
+                    "> " => Ast::BlockQuoted(inner_ast),
+                    _ => unreachable!(),
+                };
+                parsed.push(ast);
+            }
+        }
+    }
+    Ok(parsed)
+}
+
+// ---
+// AST Processing Functions
+// ---
+
+/// Minimizes the AST by removing empty text nodes.
+/// Necessay for some subsequent processing to work.
+fn remove_empty_text(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
+    // process inner nodes first
+    ast.visit_direct_subnodes_mut(remove_empty_text)?;
+    if let Some(children) = ast.children_mut() {
+        children.retain(|child| match child {
+            Ast::Text(s) => !s.is_empty(),
+            _ => true,
+        });
+    }
+    Ok(())
+}
+
+fn process_ast_quotes(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
+    // process inner nodes first
+    ast.visit_direct_subnodes_mut(process_ast_quotes)?;
+    if let Some(children) = ast.children_mut() {
+        'children_loop: for i in 1..children.len() {
+            let split_to_access_children = children.split_at_mut(i);
+            let child1 = &mut split_to_access_children.0[i - 1];
+            let child2 = &mut split_to_access_children.1[0];
+            if let (Ast::Text(pre), Ast::Quoted { inner, .. }) = (&mut *child1, &mut *child2) {
+                const SIMPLE_RULES: &[(&str, &str, &str)] = &[
+                    ("*", "<b>", "</b>"),
+                    ("_", "<u>", "</u>"),
+                    ("-", "<s>", "</s>"),
+                    ("~", "<i>", "</i>"),
+                    (">", "<blockquote>", "</blockquote>"),
+                    ("H", "<h3>", "</h3>"),
+                    ("/\\", "<sup>", "</sup>"),
+                    ("\\/", "<sub>", "</sub>"),
+                ];
+                for (start, prefix, postfix) in SIMPLE_RULES {
+                    if pre.ends_with(start) {
+                        // remove formatting character from text before
+                        *child1 = Ast::Text(&pre[0..pre.len() - start.len()]);
+                        // replace quoted block with prefix+suffix'd block
+                        *child2 =
+                            Ast::ProcessedPrefixSuffix(prefix, std::mem::take(inner), postfix);
+                        // skip all remaining transforms for this child1, child2 pairing
+                        continue 'children_loop;
+                    }
+                }
+                // test for Header
+                if pre.ends_with(')') {
+                    if let Some(h_idx) = pre.rfind("H(") {
+                        let header_number_as_str =
+                            &pre[h_idx + "H(".len()..pre.len() - ')'.len_utf8()];
+                        if let Ok(header_number) = header_number_as_str.parse::<i32>() {
+                            // H0 or H in the source is <h3> in output.
+                            // additionally, negative input results in a
+                            // smaller header, which means a larger number
+                            // in the output
+                            let h = 3 - header_number;
+                            if h >= 1 && h <= 6 {
+                                *child1 = Ast::Text(&pre[0..h_idx]);
+                                *child2 = Ast::Header(h, std::mem::take(inner))
+                            }
+                            // even if the number wasn't valid, we've found
+                            // the prefix _was_ supposed to be a header.
+                            // Let's thus stop.
+                            continue 'children_loop;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+fn process_spoilers(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
+    // process inner nodes first
+    ast.visit_direct_subnodes_mut(process_spoilers)?;
+    if let Some(children) = ast.children_mut() {
+        for child in children {
+            if let Ast::CurlyBraced(inner) = child {
+                let prefix = r#"<span class="cu_brackets" onclick="return spoiler(this, event)"><span class="cu_brackets_b">{</span><span>…</span><span class="cu" style="display: none">"#;
+                let postfix = r#"</span><span class="cu_brackets_b">}</span></span>"#;
+                *child = Ast::ProcessedPrefixSuffix(prefix, std::mem::take(inner), postfix);
+            }
+        }
+    }
+    Ok(())
+}
+
+fn is_url_tooltip(child2: &Ast<'_>) -> bool {
+    let bracketed_ast = match child2 {
+        Ast::Bracketed(v) => v,
+        _ => return false,
+    };
+    let tooltip = match bracketed_ast.last() {
+        Some(Ast::Quoted { .. }) => true,
+        Some(Ast::TooltipText(_)) => true,
+        _ => false,
+    };
+    let url = match bracketed_ast.first() {
+        Some(Ast::Text(_)) => true,
+        _ => false,
+    };
+    match (tooltip, url, bracketed_ast.len()) {
+        (true, false, 1) | (false, true, 1) | (true, true, 2) => true,
+        _ => false,
+    }
+}
+
+fn bracket_child_to_url_and_tooltip<'a>(
+    child2: Ast<'a>,
+) -> (Option<&'a str>, Option<Cow<'a, str>>) {
+    let bracketed_ast = match child2 {
+        Ast::Bracketed(v) => v,
+        _ => return (None, None),
+    };
+    let mut iter = bracketed_ast.into_iter();
+    let first = iter.next();
+    let second = iter.next();
+    if iter.next().is_some() {
+        return (None, None);
+    }
+    match (first, second) {
+        (Some(Ast::TooltipText(tooltip)), None) => (None, Some(tooltip)),
+        (Some(Ast::Text(url)), Some(Ast::TooltipText(tooltip))) => {
+            (Some(url.trim_end()), Some(tooltip))
+        }
+        (Some(Ast::Text(url)), None) => (Some(url.trim_end()), None),
+        _ => (None, None),
+    }
+}
+
+/// This is a necessary repetition of other comment removal, as regular comment
+/// removal happens alongside parsing the entire source into an AST tree, and
+/// that makes other adjustments such as equating syntactically-equivalent
+/// source bits.
+fn remove_comments_for_tooltip(original_text: &str) -> Result<Cow<'_, str>, PqLiteError> {
+    let mut result = String::new();
+    let mut index = 0;
+    while index < original_text.len() {
+        let next_i = match original_text[index..].find("[[[") {
+            Some(i) => index + i,
+            None => {
+                if index == 0 {
+                    return Ok(original_text.into());
+                } else {
+                    result.push_str(&original_text[index..]);
+                    break;
+                }
+            }
+        };
+        result.push_str(&original_text[index..next_i]);
+        index = next_i + "[[[".len();
+        parse_comment(original_text, &mut index, next_i)?;
+    }
+    Ok(result.into())
+}
+
+/// Replace tooltip text with TooltipText element to prevent further processing
+/// of text inside.
+fn pull_out_tooltip_text(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
+    if let Some(children) = ast.children_mut() {
+        for child in children {
+            if is_url_tooltip(&child) {
+                let bracketed_ast = match child {
+                    Ast::Bracketed(v) => v,
+                    _ => unreachable!(),
+                };
+                let inner_last = match bracketed_ast.last_mut() {
+                    Some(v) => v,
+                    None => continue,
+                };
+                match inner_last {
+                    Ast::Quoted { original_text, .. } => {
+                        *inner_last = Ast::TooltipText(remove_comments_for_tooltip(
+                            original_text
+                                .trim_start_matches(OPEN_QUOTE)
+                                .trim_end_matches(CLOSE_QUOTE),
+                        )?)
+                    }
+                    _ => continue,
+                }
+            }
+        }
+    }
+    // process inner nodes last
+    ast.visit_direct_subnodes_mut(pull_out_tooltip_text)?;
+    Ok(())
+}
+
+// match child1 {
+//     Ast::Text(s) if s.split_ascii_whitespace().next().is_none() => return false,
+//     _ => (),
+// }
+fn process_brackets(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
+    // process inner nodes first
+    ast.visit_direct_subnodes_mut(process_brackets)?;
+    if let Some(children) = ast.children_mut() {
+        let mut i = 0;
+        while i < children.len() {
+            let split_to_access_children = children.split_at_mut(i);
+            let mut child1 = match i {
+                0 => None,
+                _ => Some(&mut split_to_access_children.0[i - 1]),
+            };
+            let child2 = &mut split_to_access_children.1[0];
+            if is_url_tooltip(&child2) {
+                let child2_owned = std::mem::replace(child2, Ast::Text(""));
+                let (url, tooltip) = bracket_child_to_url_and_tooltip(child2_owned);
+                let (extra_text, inner) = if let Some(child1) = &mut child1 {
+                    let child1_owned = std::mem::replace(*child1, Ast::Text(""));
+                    match child1_owned {
+                        Ast::Text(s) => match s.rsplit_once(|c: char| c.is_ascii_whitespace()) {
+                            None => (None, vec![Ast::Text(s)]),
+                            Some((_, after)) if after.is_empty() => (
+                                Some(Ast::Text(s)),
+                                vec![Ast::CowText(
+                                    url.map(Cow::from).or_else(|| tooltip.clone()).unwrap(),
+                                )],
+                            ),
+                            Some((before, after)) => {
+                                (Some(Ast::Text(before)), vec![Ast::Text(after)])
+                            }
+                        },
+                        Ast::Quoted { inner, .. } => (None, inner),
+                        other => (None, vec![other]),
+                    }
+                } else {
+                    (
+                        None,
+                        vec![Ast::CowText(
+                            url.map(Cow::from).or_else(|| tooltip.clone()).unwrap(),
+                        )],
+                    )
+                };
+                let applied = match (tooltip, url) {
+                    (Some(tooltip_text), None) => Ast::Tooltip {
+                        tooltip_text,
+                        inner,
+                    },
+                    (tooltip_text, Some(url)) => Ast::Link {
+                        link_location: url,
+                        tooltip_text,
+                        inner,
+                    },
+                    _ => unreachable!(),
+                };
+                *child2 = applied;
+                if let Some(child1) = child1 {
+                    match extra_text {
+                        Some(extra_text) => *child1 = extra_text,
+                        None => {
+                            children.remove(i - 1);
+                            i -= 1;
+                        }
+                    }
+                }
+            }
+            i += 1;
+        }
+    }
+    Ok(())
+}
+
+fn process_ast(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
+    remove_empty_text(ast)?;
+    pull_out_tooltip_text(ast)?;
+    process_ast_quotes(ast)?;
+    process_spoilers(ast)?;
+    process_brackets(ast)?;
+    Ok(())
+}
+
+pub fn parse_to_processed_ast<'a>(input: &'a str) -> Result<Ast<'a>, PqLiteError> {
+    let mut ast = parse_to_unprocessed_ast(input)?;
+    process_ast(&mut ast)?;
+    Ok(ast)
+}
diff --git a/rust-ast-based/src/print.rs b/rust-ast-based/src/print.rs
new file mode 100644
index 0000000..5462a07
--- /dev/null
+++ b/rust-ast-based/src/print.rs
@@ -0,0 +1,253 @@
+use std::{fmt, io};
+
+use crate::{Ast, PqLiteError, CLOSE_QUOTE_STR, OPEN_QUOTE_STR};
+
+fn write_html_escaped(text: &str, f: &mut fmt::Formatter) -> fmt::Result {
+    // & becomes &amp;
+    // < becomes &lt;
+    // > becomes &gt;
+    // note: efficiency could be greatly improved here by using `find` manually, rather
+    // than allocating 3 strings for this operation.
+    f.write_str(
+        &text
+            .replace('&', "&amp;")
+            .replace('<', "&lt;")
+            .replace('>', "&gt;"),
+    )
+}
+fn write_html_escaped_with_linebreaks(text: &str, f: &mut fmt::Formatter) -> fmt::Result {
+    // & becomes &amp;
+    // < becomes &lt;
+    // > becomes &gt;
+    // note: efficiency could be greatly improved here by using `find` manually, rather
+    // than allocating 4 strings for this operation.
+    let mut text = &*text
+        .replace('&', "&amp;")
+        .replace('<', "&lt;")
+        .replace('>', "&gt;");
+    // don't write a br for the first \n.
+    if text.starts_with('\n') {
+        f.write_str("\n")?;
+        text = &text[1..];
+    }
+    f.write_str(&text.replace('\n', "<br />\n"))
+}
+fn write_attr_value_escaped(text: &str, f: &mut fmt::Formatter) -> fmt::Result {
+    // & becomes &amp;
+    // < becomes &lt;
+    // > becomes &gt;
+    // " becomes &quot;
+    // ' becomes &#39;
+    // note: efficiency could be greatly improved here by using `find` manually, rather
+    // than allocating 4 strings for this operation.
+    f.write_str(
+        &text
+            .replace('&', "&amp;")
+            .replace('<', "&lt;")
+            .replace('>', "&gt;")
+            .replace('\"', "&quot;"),
+    )
+}
+
+impl<'a> fmt::Display for Ast<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Ast::Text(text) => write_html_escaped_with_linebreaks(text, f)?,
+            Ast::CowText(text) => write_html_escaped_with_linebreaks(text, f)?,
+            Ast::NoBrText(text) => write_html_escaped(text, f)?,
+            Ast::Root(nodes) => {
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+            }
+            Ast::Quoted { inner, .. } => {
+                f.write_str(OPEN_QUOTE_STR)?;
+                for node in inner {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str(CLOSE_QUOTE_STR)?;
+            }
+            Ast::Bracketed(nodes) => {
+                f.write_str("[")?;
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str("]")?;
+            }
+            Ast::CurlyBraced(nodes) => {
+                f.write_str("{")?;
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str("}")?;
+            }
+            Ast::BlockQuoted(nodes) => {
+                f.write_str("<blockquote>")?;
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str("</blockquote>\n")?;
+            }
+            Ast::CodeQuoted(nodes) => {
+                f.write_str("<pre>")?;
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str("</pre>")?;
+            }
+            Ast::TooltipText(_) => {
+                unreachable!("TooltipText should be processed out by now.");
+            }
+            Ast::ProcessedPrefixSuffix(prefix, nodes, suffix) => {
+                f.write_str(prefix)?;
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str(suffix)?;
+            }
+            Ast::Header(header_number, nodes) => {
+                write!(f, "<h{}>", header_number)?;
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+                write!(f, "</h{}>", header_number)?;
+            }
+            Ast::Tooltip {
+                tooltip_text,
+                inner,
+            } => {
+                f.write_str("<abbr title=\"")?;
+                write_attr_value_escaped(tooltip_text, f)?;
+                f.write_str("\">")?;
+                for node in inner {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str("</abbr>")?;
+            }
+            Ast::Link {
+                link_location,
+                tooltip_text,
+                inner,
+            } => {
+                f.write_str("<a href=\"")?;
+                write_attr_value_escaped(link_location, f)?;
+                // *shrug* reference impl does this so we will too, despite it
+                // not being in spec.
+                if link_location.starts_with("./") {
+                    f.write_str("\" target=\"_self")?;
+                }
+                if let Some(tooltip_text) = tooltip_text {
+                    f.write_str("\" title=\"")?;
+                    write_attr_value_escaped(tooltip_text, f)?;
+                }
+                f.write_str("\">")?;
+                for node in inner {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str("</a>")?;
+            }
+        }
+        Ok(())
+    }
+}
+
+pub fn ast_to_unwrapped_html(ast: &Ast<'_>, mut output: impl io::Write) -> Result<(), PqLiteError> {
+    write!(output, "{}", ast)?;
+    Ok(())
+}
+
+pub fn ast_to_wrapped_html(ast: &Ast<'_>, mut output: impl io::Write) -> Result<(), PqLiteError> {
+    write!(
+        output,
+        "{}",
+        r#"<html>
+<head>
+<meta charset="utf-8" />
+<base target="_blank">
+<script type="text/javascript">
+function spoiler(element, event)
+{
+    if (event.target.nodeName == 'A' || event.target.parentNode.nodeName == 'A' || event.target.onclick)//for links in spoilers and spoilers2 in spoilers to work
+        return;
+    var e = element.firstChild.nextSibling.nextSibling;//element.getElementsByTagName('span')[0]
+    e.previousSibling.style.display = e.style.display;//<span>…</span> must have inverted display style
+    e.style.display = (e.style.display == "none" ? "" : "none");
+    element.firstChild.style.fontWeight =
+    element. lastChild.style.fontWeight = (e.style.display == "" ? "normal" : "bold");
+    event.stopPropagation();
+}
+</script>
+<style type="text/css">
+div#main, td {
+    font-size: 14px;
+    font-family: Verdana, sans-serif;
+    line-height: 160%;
+    text-align: justify;
+}
+span.cu_brackets_b {
+    font-size: initial;
+    font-family: initial;
+    font-weight: bold;
+}
+a {
+    text-decoration: none;
+    color: #6da3bd;
+}
+a:hover {
+    text-decoration: underline;
+    color: #4d7285;
+}
+h1, h2, h3, h4, h5, h6 {
+    margin: 0;
+    font-weight: 400;
+}
+h1 {font-size: 200%; line-height: 130%;}
+h2 {font-size: 180%; line-height: 135%;}
+h3 {font-size: 160%; line-height: 140%;}
+h4 {font-size: 145%; line-height: 145%;}
+h5 {font-size: 130%; line-height: 140%;}
+h6 {font-size: 120%; line-height: 140%;}
+span.sq {color: gray; font-size: 0.8rem; font-weight: normal; /*pointer-events: none;*/}
+span.sq_brackets {color: #BFBFBF;}
+span.cu_brackets {cursor: pointer;}
+span.cu {background-color: #F7F7FF;}
+abbr {text-decoration: none; border-bottom: 1px dotted;}
+pre {margin: 0; font-family: 'Courier New'; line-height: normal;}
+blockquote {
+    margin: 0 0 7px 0;
+    padding: 7px 12px;
+}
+blockquote:not(.re) {border-left:  0.2em solid #C7EED4; background-color: #FCFFFC;}
+blockquote.re       {border-right: 0.2em solid #C7EED4; background-color: #F9FFFB;}
+div.note {
+    padding: 18px 20px;
+    background: #ffffd7;
+}
+pre.inline_code {
+    display: inline;
+    padding: 0px 3px;
+    border: 1px solid #E5E5E5;
+    background-color: #FAFAFA;
+    border-radius: 3px;
+}
+div#main {width: 100%;}
+@media screen and (min-width: 750px) {
+    div#main {width: 724px;}
+}
+</style>
+</head>
+<body>
+<div id="main" style="margin: 0 auto">"#
+    )?;
+    ast_to_unwrapped_html(ast, &mut output)?;
+    write!(
+        output,
+        "{}",
+        r#"</div>
+</body>
+</html>
+"#
+    )?;
+
+    Ok(())
+}
diff --git a/rust-ast-based/tests/test_unwrapped_html.rs b/rust-ast-based/tests/test_unwrapped_html.rs
new file mode 100644
index 0000000..997af53
--- /dev/null
+++ b/rust-ast-based/tests/test_unwrapped_html.rs
@@ -0,0 +1,33 @@
+use std::path::PathBuf;
+
+use ast_pqlite::pqlite_to_unwrapped_html_string;
+
+#[test]
+fn test_from_tests_txt() {
+    let mut path = PathBuf::from(std::env::var_os("CARGO_MANIFEST_DIR").unwrap());
+    path.push("../tests.txt");
+    let data = std::fs::read_to_string(path).unwrap();
+    let test_cases = data.split("|\n\n|");
+
+    let mut failure = false;
+
+    for case in test_cases {
+        let (input, output) = case
+            .split_once(" (()) ")
+            .unwrap_or_else(|| panic!("badly formatted test case: {:?}", case));
+
+        println!("Running test case {:?}", input);
+        let actual_output = pqlite_to_unwrapped_html_string(input).unwrap();
+
+        if actual_output != output {
+            eprintln!(
+                "test failure:\n expected: {:?}\n   actual: {:?}\nfor input: {:?}",
+                output, actual_output, input
+            );
+            failure = true;
+        }
+    }
+    if failure {
+        panic!("one or more test cases from tests.txt failed.");
+    }
+}

From cbf0e66d47402e95a985570d3511d4e4d0e5dde3 Mon Sep 17 00:00:00 2001
From: David Ross <daboross@daboross.net>
Date: Sat, 5 Jun 2021 02:57:46 -0700
Subject: [PATCH 2/2] Fix producing recursing <a>

---
 rust-ast-based/src/parse.rs | 52 ++++++++++++++++++++++++++++++++++---
 rust-ast-based/src/print.rs |  2 --
 2 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/rust-ast-based/src/parse.rs b/rust-ast-based/src/parse.rs
index 7458037..251a9f6 100644
--- a/rust-ast-based/src/parse.rs
+++ b/rust-ast-based/src/parse.rs
@@ -18,6 +18,23 @@ impl<'a> Ast<'a> {
         Ok(())
     }
 
+    /// If this Ast node contains children nodes, return a list of them.
+    fn children(&self) -> Option<&Vec<Ast<'a>>> {
+        match self {
+            Ast::Root(nodes)
+            | Ast::Quoted { inner: nodes, .. }
+            | Ast::Bracketed(nodes)
+            | Ast::CurlyBraced(nodes)
+            | Ast::BlockQuoted(nodes)
+            | Ast::CodeQuoted(nodes)
+            | Ast::ProcessedPrefixSuffix(_, nodes, _)
+            | Ast::Header(_, nodes)
+            | Ast::Tooltip { inner: nodes, .. }
+            | Ast::Link { inner: nodes, .. } => Some(nodes),
+            Ast::Text(_) | Ast::CowText(_) | Ast::NoBrText(_) | Ast::TooltipText(_) => None,
+        }
+    }
+
     /// If this Ast node contains children nodes, return a list of them.
     fn children_mut(&mut self) -> Option<&mut Vec<Ast<'a>>> {
         match self {
@@ -332,7 +349,21 @@ fn process_spoilers(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
     Ok(())
 }
 
-fn is_url_tooltip(child2: &Ast<'_>) -> bool {
+fn contains_link(node: &Ast<'_>) -> bool {
+    if let Ast::Link { .. } = node {
+        return true;
+    }
+    if let Some(children) = node.children() {
+        for child in children {
+            if contains_link(child) {
+                return true;
+            }
+        }
+    }
+    false
+}
+
+fn is_pair_url_tooltip(child1: Option<&Ast<'_>>, child2: &Ast<'_>) -> bool {
     let bracketed_ast = match child2 {
         Ast::Bracketed(v) => v,
         _ => return false,
@@ -346,6 +377,13 @@ fn is_url_tooltip(child2: &Ast<'_>) -> bool {
         Some(Ast::Text(_)) => true,
         _ => false,
     };
+    if url {
+        if let Some(child1) = child1 {
+            if contains_link(child1) {
+                return false;
+            }
+        }
+    }
     match (tooltip, url, bracketed_ast.len()) {
         (true, false, 1) | (false, true, 1) | (true, true, 2) => true,
         _ => false,
@@ -405,8 +443,14 @@ fn remove_comments_for_tooltip(original_text: &str) -> Result<Cow<'_, str>, PqLi
 /// of text inside.
 fn pull_out_tooltip_text(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
     if let Some(children) = ast.children_mut() {
-        for child in children {
-            if is_url_tooltip(&child) {
+        for i in 0..children.len() {
+            let child1 = match i {
+                0 => None,
+                i => Some(&children[i - 1]),
+            };
+            let child2 = &children[i];
+            if is_pair_url_tooltip(child1, child2) {
+                let child = &mut children[i];
                 let bracketed_ast = match child {
                     Ast::Bracketed(v) => v,
                     _ => unreachable!(),
@@ -449,7 +493,7 @@ fn process_brackets(ast: &mut Ast<'_>) -> Result<(), PqLiteError> {
                 _ => Some(&mut split_to_access_children.0[i - 1]),
             };
             let child2 = &mut split_to_access_children.1[0];
-            if is_url_tooltip(&child2) {
+            if is_pair_url_tooltip(child1.as_ref().map(|c| &**c), &child2) {
                 let child2_owned = std::mem::replace(child2, Ast::Text(""));
                 let (url, tooltip) = bracket_child_to_url_and_tooltip(child2_owned);
                 let (extra_text, inner) = if let Some(child1) = &mut child1 {
diff --git a/rust-ast-based/src/print.rs b/rust-ast-based/src/print.rs
index 5462a07..0bff480 100644
--- a/rust-ast-based/src/print.rs
+++ b/rust-ast-based/src/print.rs
@@ -131,8 +131,6 @@ impl<'a> fmt::Display for Ast<'a> {
             } => {
                 f.write_str("<a href=\"")?;
                 write_attr_value_escaped(link_location, f)?;
-                // *shrug* reference impl does this so we will too, despite it
-                // not being in spec.
                 if link_location.starts_with("./") {
                     f.write_str("\" target=\"_self")?;
                 }