From 1ec54149ae7ce3ed1f182d8b37f1fee8e02d0292 Mon Sep 17 00:00:00 2001 From: David Ross Date: Sat, 5 Jun 2021 02:13:23 -0700 Subject: [PATCH 1/2] Add incomplete ast-based Rust implementation This lacks support for quote authors, lists, and referencing past used URLs. It's also failing a few tests that I have not investigated. It's otherwise complete. --- rust-ast-based/.gitignore | 1 + rust-ast-based/Cargo.lock | 5 + rust-ast-based/Cargo.toml | 9 + rust-ast-based/src/lib.rs | 113 +++++ rust-ast-based/src/main.rs | 7 + rust-ast-based/src/parse.rs | 523 ++++++++++++++++++++ rust-ast-based/src/print.rs | 253 ++++++++++ rust-ast-based/tests/test_unwrapped_html.rs | 33 ++ 8 files changed, 944 insertions(+) create mode 100644 rust-ast-based/.gitignore create mode 100644 rust-ast-based/Cargo.lock create mode 100644 rust-ast-based/Cargo.toml create mode 100644 rust-ast-based/src/lib.rs create mode 100644 rust-ast-based/src/main.rs create mode 100644 rust-ast-based/src/parse.rs create mode 100644 rust-ast-based/src/print.rs create mode 100644 rust-ast-based/tests/test_unwrapped_html.rs diff --git a/rust-ast-based/.gitignore b/rust-ast-based/.gitignore new file mode 100644 index 0000000..b83d222 --- /dev/null +++ b/rust-ast-based/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/rust-ast-based/Cargo.lock b/rust-ast-based/Cargo.lock new file mode 100644 index 0000000..f013e69 --- /dev/null +++ b/rust-ast-based/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "ast_pqlite" +version = "0.1.0" diff --git a/rust-ast-based/Cargo.toml b/rust-ast-based/Cargo.toml new file mode 100644 index 0000000..fde93e7 --- /dev/null +++ b/rust-ast-based/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "ast_pqlite" +version = "0.1.0" +authors = ["David Ross "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/rust-ast-based/src/lib.rs b/rust-ast-based/src/lib.rs new file mode 100644 index 0000000..927823c --- /dev/null +++ b/rust-ast-based/src/lib.rs @@ -0,0 +1,113 @@ +use std::{borrow::Cow, io}; + +pub mod parse; +pub mod print; + +const OPEN_QUOTE: char = '‘'; +const OPEN_QUOTE_STR: &str = "‘"; +const CLOSE_QUOTE: char = '’'; +const CLOSE_QUOTE_STR: &str = "’"; + +#[derive(Debug)] +pub enum PqLiteError { + UnmatchedOpen { + opening_at_index: usize, + opening: &'static str, + expected_close: &'static str, + }, + Io(io::Error), + Utf8(std::string::FromUtf8Error), +} +impl PqLiteError { + fn unmatched( + opening_at_index: usize, + opening: &'static str, + expected_close: &'static str, + ) -> Self { + PqLiteError::UnmatchedOpen { + opening, + opening_at_index, + expected_close, + } + } +} +impl From for PqLiteError { + fn from(e: io::Error) -> Self { + PqLiteError::Io(e) + } +} +impl From for PqLiteError { + fn from(e: std::string::FromUtf8Error) -> Self { + PqLiteError::Utf8(e) + } +} + +/// Utilize Rust's string slices to their fullest extent with an Ast. +/// +/// We allocate Ast structs in memory, but critically, we never copy the +/// text! Every `&'a str` is a reference to the original string read in from +/// the input file. Rust's borrow checker ensures we never +/// +/// Besides that, my main rationale for making an AST is to simplify the +/// parsing. With this, we can have a fairly simple initial parse, followed by +/// some postprocessing on the AST to handle each different kind of formatting. +/// +/// This makes it easy to handle things like +/// ``` +/// b‘hello world’[https://example.com] +/// ``` +#[derive(Debug)] +pub enum Ast<'a> { + Text(&'a str), + CowText(Cow<'a, str>), + NoBrText(&'a str), + Root(Vec>), + Quoted { + original_text: &'a str, + inner: Vec>, + }, + Bracketed(Vec>), + CurlyBraced(Vec>), + BlockQuoted(Vec>), + CodeQuoted(Vec>), + TooltipText(Cow<'a, str>), + ProcessedPrefixSuffix(&'static str, Vec>, &'static str), + Header( + /// A number, 1-6, to output. + i32, + /// The inner text + Vec>, + ), + Tooltip { + tooltip_text: Cow<'a, str>, + inner: Vec>, + }, + Link { + link_location: &'a str, + tooltip_text: Option>, + inner: Vec>, + }, +} + +pub fn write_wrapped_html_from_pqlite( + input: &str, + output: impl io::Write, +) -> Result<(), PqLiteError> { + let ast = parse::parse_to_processed_ast(input)?; + + print::ast_to_wrapped_html(&ast, output)?; + + Ok(()) +} + +pub fn pqlite_to_unwrapped_html_string(input: &str) -> Result { + let ast = parse::parse_to_processed_ast(input)?; + + let mut out = Vec::new(); + + print::ast_to_unwrapped_html(&ast, &mut out)?; + + let out = String::from_utf8(out)?; + + Ok(out) +} diff --git a/rust-ast-based/src/main.rs b/rust-ast-based/src/main.rs new file mode 100644 index 0000000..dceeae2 --- /dev/null +++ b/rust-ast-based/src/main.rs @@ -0,0 +1,7 @@ +fn main() { + let input = r#"‘[[[Scoping rules/]]]Code blocks’[./code-blocks]"#; + let mut out = Vec::new(); + ast_pqlite::write_wrapped_html_from_pqlite(input, &mut out).unwrap(); + let out = std::str::from_utf8(&out).unwrap(); + println!("{}", out); +} diff --git a/rust-ast-based/src/parse.rs b/rust-ast-based/src/parse.rs new file mode 100644 index 0000000..7458037 --- /dev/null +++ b/rust-ast-based/src/parse.rs @@ -0,0 +1,523 @@ +use std::borrow::Cow; + +use crate::{Ast, PqLiteError, CLOSE_QUOTE, CLOSE_QUOTE_STR, OPEN_QUOTE, OPEN_QUOTE_STR}; + +impl<'a> Ast<'a> { + fn visit_direct_subnodes_mut( + &mut self, + mut f: impl FnMut(&mut Ast<'a>) -> Result<(), E>, + ) -> Result<(), E> { + match self.children_mut() { + Some(children) => { + for node in children { + f(node)?; + } + } + _ => (), + } + Ok(()) + } + + /// If this Ast node contains children nodes, return a list of them. + fn children_mut(&mut self) -> Option<&mut Vec>> { + match self { + Ast::Root(nodes) + | Ast::Quoted { inner: nodes, .. } + | Ast::Bracketed(nodes) + | Ast::CurlyBraced(nodes) + | Ast::BlockQuoted(nodes) + | Ast::CodeQuoted(nodes) + | Ast::ProcessedPrefixSuffix(_, nodes, _) + | Ast::Header(_, nodes) + | Ast::Tooltip { inner: nodes, .. } + | Ast::Link { inner: nodes, .. } => Some(nodes), + Ast::Text(_) | Ast::CowText(_) | Ast::NoBrText(_) | Ast::TooltipText(_) => None, + } + } +} + +/// Retrieves the first character in a string. Panics if the string is empty. +fn first_char(s: &str) -> char { + s.chars().next().unwrap() +} + +fn parse_to_unprocessed_ast<'a>(input: &'a str) -> Result, PqLiteError> { + Ok(Ast::Root(parse_to_ast_inner(input, &mut 0, None)?)) +} + +fn parse_comment<'a>( + input: &'a str, + index: &mut usize, + opening_index: usize, +) -> Result<(), PqLiteError> { + // find 3 closing brackets + let mut remaining = 3; + while remaining > 0 { + let next_i = *index + + input[*index..] + .find(&['[', ']'][..]) + .ok_or(PqLiteError::unmatched(opening_index, "[[[", "]"))?; + let next_c = first_char(&input[next_i..]); + match next_c { + '[' => remaining += 1, + ']' => remaining -= 1, + _ => unreachable!(), + } + *index = next_i + next_c.len_utf8(); + } + Ok(()) +} + +fn parse_code_block<'a>( + input: &'a str, + index: &mut usize, + closing: &str, +) -> Result>, PqLiteError> { + let next_i = *index + + match input[*index..].find(closing) { + Some(i) => i, + None => return Ok(None), + }; + let inner = &input[*index..next_i]; + *index = next_i + closing.len(); + Ok(Some(Ast::CodeQuoted(vec![Ast::NoBrText(inner)]))) +} + +fn parse_to_ast_inner<'a>( + input: &'a str, + index: &mut usize, + ending: Option<&'static str>, +) -> Result>, PqLiteError> { + let mut parsed = Vec::new(); + while *index < input.len() { + // first, let's find the next noteworth character + let next_i = { + let mut searched_through = *index; + loop { + let arr_with_ending_first_char; + let search_chars = match ending { + Some(ending) => { + arr_with_ending_first_char = + [OPEN_QUOTE, '[', '{', '>', '`', first_char(ending)]; + &arr_with_ending_first_char[..] + } + None => &[OPEN_QUOTE, '[', '{', '>', '`'][..], + }; + let candidate = match input[searched_through..].find(search_chars) { + Some(i) => searched_through + i, + None => break None, + }; + // we _actually_ want to search for "> ", not just ">", so let's + // filter out any ">" that aren't followed by " ". We'll get + // ">‘" later on, when post-processing the AST. + if input[candidate..].starts_with('>') && !input[candidate..].starts_with("> ") { + searched_through = candidate + first_char(&input[candidate..]).len_utf8(); + continue; + } + // same for ending + if let Some(ending) = ending { + if input[candidate..].starts_with(first_char(ending)) + && !input[candidate..].starts_with(ending) + { + searched_through = candidate + first_char(&input[candidate..]).len_utf8(); + continue; + } + } + break Some(candidate); + } + }; + let next_i = match next_i { + Some(i) => i, + None => { + // we have searched to the end of the string, and found nothing. + // just push the rest of the string as plaintext. + parsed.push(Ast::Text(&input[*index..])); + *index = input.len(); + break; + } + }; + + // push all the text up to the next interesting char as plaintext. + parsed.push(Ast::Text(&input[*index..next_i])); + *index = next_i; + + let next_s = { + let next_c = first_char(&input[next_i..]); + match next_c { + OPEN_QUOTE => OPEN_QUOTE_STR, + '[' => { + if input[next_i..].starts_with("[[[") { + "[[[" + } else { + "[" + } + } + '{' => "{", + '`' => { + if input[next_i..].starts_with("```") { + "```" + } else if input[next_i..].starts_with("``") { + "``" + } else { + "`" + } + } + '>' => "> ", + _ => { + let ending = ending.unwrap(); + assert!(input[next_i..].starts_with(ending)); + ending + } + } + }; + + if Some(next_s) == ending { + // if the next interesting char is the closing char for our parent, + // just exit now and let them deal with the rest. + break; + } + + *index += next_s.len(); + + // otherwise, find our closing string. + let closing_s = match next_s { + OPEN_QUOTE_STR => CLOSE_QUOTE_STR, + "[" => "]", + "[[[" => "]]]", + "{" => "}", + "`" => "`", + "``" => "``", + "```" => "```", + "> " => "\n", + _ => unreachable!(), + }; + + if next_s == "[[[" { + // we don't care about inner structures inside a comment. + // we also don't care about adding the comment to the AST. + parse_comment(input, index, next_i)?; + } else if next_s.starts_with('`') { + // we similarly don't care about inner structures of a code block. + // but we _do_ care about the contents. + match parse_code_block(input, index, closing_s)? { + Some(c) => parsed.push(c), + None => parsed.push(Ast::Text(next_s)), + } + } else { + // now, we'll delegate finding all inner structures (and our closing + // string) to a new invocation. + let inner_ast = parse_to_ast_inner(input, index, Some(closing_s))?; + + if *index == input.len() { + // we're missing a closing piece. We're a forgiving parser, so + // what we'll do is put the starting string back in, then + // just go on. + parsed.push(Ast::Text(next_s)); + parsed.extend(inner_ast); + break; + } else { + // if the inner invocation exited with input left, it should have + // found our closing stirng. Verify this is the case (and panic + // out otherwise). + assert!(input[*index..].starts_with(closing_s)); + // we've processed the closing string. + *index += closing_s.len(); + + let ast = match next_s { + OPEN_QUOTE_STR => Ast::Quoted { + original_text: &input[next_i..*index], + inner: inner_ast, + }, + "[" => Ast::Bracketed(inner_ast), + "{" => Ast::CurlyBraced(inner_ast), + "`" | "``" | "```" => Ast::CodeQuoted(inner_ast), + "> " => Ast::BlockQuoted(inner_ast), + _ => unreachable!(), + }; + parsed.push(ast); + } + } + } + Ok(parsed) +} + +// --- +// AST Processing Functions +// --- + +/// Minimizes the AST by removing empty text nodes. +/// Necessay for some subsequent processing to work. +fn remove_empty_text(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { + // process inner nodes first + ast.visit_direct_subnodes_mut(remove_empty_text)?; + if let Some(children) = ast.children_mut() { + children.retain(|child| match child { + Ast::Text(s) => !s.is_empty(), + _ => true, + }); + } + Ok(()) +} + +fn process_ast_quotes(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { + // process inner nodes first + ast.visit_direct_subnodes_mut(process_ast_quotes)?; + if let Some(children) = ast.children_mut() { + 'children_loop: for i in 1..children.len() { + let split_to_access_children = children.split_at_mut(i); + let child1 = &mut split_to_access_children.0[i - 1]; + let child2 = &mut split_to_access_children.1[0]; + if let (Ast::Text(pre), Ast::Quoted { inner, .. }) = (&mut *child1, &mut *child2) { + const SIMPLE_RULES: &[(&str, &str, &str)] = &[ + ("*", "", ""), + ("_", "", ""), + ("-", "", ""), + ("~", "", ""), + (">", "
", "
"), + ("H", "

", "

"), + ("/\\", "", ""), + ("\\/", "", ""), + ]; + for (start, prefix, postfix) in SIMPLE_RULES { + if pre.ends_with(start) { + // remove formatting character from text before + *child1 = Ast::Text(&pre[0..pre.len() - start.len()]); + // replace quoted block with prefix+suffix'd block + *child2 = + Ast::ProcessedPrefixSuffix(prefix, std::mem::take(inner), postfix); + // skip all remaining transforms for this child1, child2 pairing + continue 'children_loop; + } + } + // test for Header + if pre.ends_with(')') { + if let Some(h_idx) = pre.rfind("H(") { + let header_number_as_str = + &pre[h_idx + "H(".len()..pre.len() - ')'.len_utf8()]; + if let Ok(header_number) = header_number_as_str.parse::() { + // H0 or H in the source is

in output. + // additionally, negative input results in a + // smaller header, which means a larger number + // in the output + let h = 3 - header_number; + if h >= 1 && h <= 6 { + *child1 = Ast::Text(&pre[0..h_idx]); + *child2 = Ast::Header(h, std::mem::take(inner)) + } + // even if the number wasn't valid, we've found + // the prefix _was_ supposed to be a header. + // Let's thus stop. + continue 'children_loop; + } + } + } + } + } + } + Ok(()) +} + +fn process_spoilers(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { + // process inner nodes first + ast.visit_direct_subnodes_mut(process_spoilers)?; + if let Some(children) = ast.children_mut() { + for child in children { + if let Ast::CurlyBraced(inner) = child { + let prefix = r#"{}"#; + *child = Ast::ProcessedPrefixSuffix(prefix, std::mem::take(inner), postfix); + } + } + } + Ok(()) +} + +fn is_url_tooltip(child2: &Ast<'_>) -> bool { + let bracketed_ast = match child2 { + Ast::Bracketed(v) => v, + _ => return false, + }; + let tooltip = match bracketed_ast.last() { + Some(Ast::Quoted { .. }) => true, + Some(Ast::TooltipText(_)) => true, + _ => false, + }; + let url = match bracketed_ast.first() { + Some(Ast::Text(_)) => true, + _ => false, + }; + match (tooltip, url, bracketed_ast.len()) { + (true, false, 1) | (false, true, 1) | (true, true, 2) => true, + _ => false, + } +} + +fn bracket_child_to_url_and_tooltip<'a>( + child2: Ast<'a>, +) -> (Option<&'a str>, Option>) { + let bracketed_ast = match child2 { + Ast::Bracketed(v) => v, + _ => return (None, None), + }; + let mut iter = bracketed_ast.into_iter(); + let first = iter.next(); + let second = iter.next(); + if iter.next().is_some() { + return (None, None); + } + match (first, second) { + (Some(Ast::TooltipText(tooltip)), None) => (None, Some(tooltip)), + (Some(Ast::Text(url)), Some(Ast::TooltipText(tooltip))) => { + (Some(url.trim_end()), Some(tooltip)) + } + (Some(Ast::Text(url)), None) => (Some(url.trim_end()), None), + _ => (None, None), + } +} + +/// This is a necessary repetition of other comment removal, as regular comment +/// removal happens alongside parsing the entire source into an AST tree, and +/// that makes other adjustments such as equating syntactically-equivalent +/// source bits. +fn remove_comments_for_tooltip(original_text: &str) -> Result, PqLiteError> { + let mut result = String::new(); + let mut index = 0; + while index < original_text.len() { + let next_i = match original_text[index..].find("[[[") { + Some(i) => index + i, + None => { + if index == 0 { + return Ok(original_text.into()); + } else { + result.push_str(&original_text[index..]); + break; + } + } + }; + result.push_str(&original_text[index..next_i]); + index = next_i + "[[[".len(); + parse_comment(original_text, &mut index, next_i)?; + } + Ok(result.into()) +} + +/// Replace tooltip text with TooltipText element to prevent further processing +/// of text inside. +fn pull_out_tooltip_text(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { + if let Some(children) = ast.children_mut() { + for child in children { + if is_url_tooltip(&child) { + let bracketed_ast = match child { + Ast::Bracketed(v) => v, + _ => unreachable!(), + }; + let inner_last = match bracketed_ast.last_mut() { + Some(v) => v, + None => continue, + }; + match inner_last { + Ast::Quoted { original_text, .. } => { + *inner_last = Ast::TooltipText(remove_comments_for_tooltip( + original_text + .trim_start_matches(OPEN_QUOTE) + .trim_end_matches(CLOSE_QUOTE), + )?) + } + _ => continue, + } + } + } + } + // process inner nodes last + ast.visit_direct_subnodes_mut(pull_out_tooltip_text)?; + Ok(()) +} + +// match child1 { +// Ast::Text(s) if s.split_ascii_whitespace().next().is_none() => return false, +// _ => (), +// } +fn process_brackets(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { + // process inner nodes first + ast.visit_direct_subnodes_mut(process_brackets)?; + if let Some(children) = ast.children_mut() { + let mut i = 0; + while i < children.len() { + let split_to_access_children = children.split_at_mut(i); + let mut child1 = match i { + 0 => None, + _ => Some(&mut split_to_access_children.0[i - 1]), + }; + let child2 = &mut split_to_access_children.1[0]; + if is_url_tooltip(&child2) { + let child2_owned = std::mem::replace(child2, Ast::Text("")); + let (url, tooltip) = bracket_child_to_url_and_tooltip(child2_owned); + let (extra_text, inner) = if let Some(child1) = &mut child1 { + let child1_owned = std::mem::replace(*child1, Ast::Text("")); + match child1_owned { + Ast::Text(s) => match s.rsplit_once(|c: char| c.is_ascii_whitespace()) { + None => (None, vec![Ast::Text(s)]), + Some((_, after)) if after.is_empty() => ( + Some(Ast::Text(s)), + vec![Ast::CowText( + url.map(Cow::from).or_else(|| tooltip.clone()).unwrap(), + )], + ), + Some((before, after)) => { + (Some(Ast::Text(before)), vec![Ast::Text(after)]) + } + }, + Ast::Quoted { inner, .. } => (None, inner), + other => (None, vec![other]), + } + } else { + ( + None, + vec![Ast::CowText( + url.map(Cow::from).or_else(|| tooltip.clone()).unwrap(), + )], + ) + }; + let applied = match (tooltip, url) { + (Some(tooltip_text), None) => Ast::Tooltip { + tooltip_text, + inner, + }, + (tooltip_text, Some(url)) => Ast::Link { + link_location: url, + tooltip_text, + inner, + }, + _ => unreachable!(), + }; + *child2 = applied; + if let Some(child1) = child1 { + match extra_text { + Some(extra_text) => *child1 = extra_text, + None => { + children.remove(i - 1); + i -= 1; + } + } + } + } + i += 1; + } + } + Ok(()) +} + +fn process_ast(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { + remove_empty_text(ast)?; + pull_out_tooltip_text(ast)?; + process_ast_quotes(ast)?; + process_spoilers(ast)?; + process_brackets(ast)?; + Ok(()) +} + +pub fn parse_to_processed_ast<'a>(input: &'a str) -> Result, PqLiteError> { + let mut ast = parse_to_unprocessed_ast(input)?; + process_ast(&mut ast)?; + Ok(ast) +} diff --git a/rust-ast-based/src/print.rs b/rust-ast-based/src/print.rs new file mode 100644 index 0000000..5462a07 --- /dev/null +++ b/rust-ast-based/src/print.rs @@ -0,0 +1,253 @@ +use std::{fmt, io}; + +use crate::{Ast, PqLiteError, CLOSE_QUOTE_STR, OPEN_QUOTE_STR}; + +fn write_html_escaped(text: &str, f: &mut fmt::Formatter) -> fmt::Result { + // & becomes & + // < becomes < + // > becomes > + // note: efficiency could be greatly improved here by using `find` manually, rather + // than allocating 3 strings for this operation. + f.write_str( + &text + .replace('&', "&") + .replace('<', "<") + .replace('>', ">"), + ) +} +fn write_html_escaped_with_linebreaks(text: &str, f: &mut fmt::Formatter) -> fmt::Result { + // & becomes & + // < becomes < + // > becomes > + // note: efficiency could be greatly improved here by using `find` manually, rather + // than allocating 4 strings for this operation. + let mut text = &*text + .replace('&', "&") + .replace('<', "<") + .replace('>', ">"); + // don't write a br for the first \n. + if text.starts_with('\n') { + f.write_str("\n")?; + text = &text[1..]; + } + f.write_str(&text.replace('\n', "
\n")) +} +fn write_attr_value_escaped(text: &str, f: &mut fmt::Formatter) -> fmt::Result { + // & becomes & + // < becomes < + // > becomes > + // " becomes " + // ' becomes ' + // note: efficiency could be greatly improved here by using `find` manually, rather + // than allocating 4 strings for this operation. + f.write_str( + &text + .replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('\"', """), + ) +} + +impl<'a> fmt::Display for Ast<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Ast::Text(text) => write_html_escaped_with_linebreaks(text, f)?, + Ast::CowText(text) => write_html_escaped_with_linebreaks(text, f)?, + Ast::NoBrText(text) => write_html_escaped(text, f)?, + Ast::Root(nodes) => { + for node in nodes { + write!(f, "{}", node)?; + } + } + Ast::Quoted { inner, .. } => { + f.write_str(OPEN_QUOTE_STR)?; + for node in inner { + write!(f, "{}", node)?; + } + f.write_str(CLOSE_QUOTE_STR)?; + } + Ast::Bracketed(nodes) => { + f.write_str("[")?; + for node in nodes { + write!(f, "{}", node)?; + } + f.write_str("]")?; + } + Ast::CurlyBraced(nodes) => { + f.write_str("{")?; + for node in nodes { + write!(f, "{}", node)?; + } + f.write_str("}")?; + } + Ast::BlockQuoted(nodes) => { + f.write_str("
")?; + for node in nodes { + write!(f, "{}", node)?; + } + f.write_str("
\n")?; + } + Ast::CodeQuoted(nodes) => { + f.write_str("
")?;
+                for node in nodes {
+                    write!(f, "{}", node)?;
+                }
+                f.write_str("
")?; + } + Ast::TooltipText(_) => { + unreachable!("TooltipText should be processed out by now."); + } + Ast::ProcessedPrefixSuffix(prefix, nodes, suffix) => { + f.write_str(prefix)?; + for node in nodes { + write!(f, "{}", node)?; + } + f.write_str(suffix)?; + } + Ast::Header(header_number, nodes) => { + write!(f, "", header_number)?; + for node in nodes { + write!(f, "{}", node)?; + } + write!(f, "", header_number)?; + } + Ast::Tooltip { + tooltip_text, + inner, + } => { + f.write_str("")?; + for node in inner { + write!(f, "{}", node)?; + } + f.write_str("")?; + } + Ast::Link { + link_location, + tooltip_text, + inner, + } => { + f.write_str("")?; + for node in inner { + write!(f, "{}", node)?; + } + f.write_str("")?; + } + } + Ok(()) + } +} + +pub fn ast_to_unwrapped_html(ast: &Ast<'_>, mut output: impl io::Write) -> Result<(), PqLiteError> { + write!(output, "{}", ast)?; + Ok(()) +} + +pub fn ast_to_wrapped_html(ast: &Ast<'_>, mut output: impl io::Write) -> Result<(), PqLiteError> { + write!( + output, + "{}", + r#" + + + + + + + +
"# + )?; + ast_to_unwrapped_html(ast, &mut output)?; + write!( + output, + "{}", + r#"
+ + +"# + )?; + + Ok(()) +} diff --git a/rust-ast-based/tests/test_unwrapped_html.rs b/rust-ast-based/tests/test_unwrapped_html.rs new file mode 100644 index 0000000..997af53 --- /dev/null +++ b/rust-ast-based/tests/test_unwrapped_html.rs @@ -0,0 +1,33 @@ +use std::path::PathBuf; + +use ast_pqlite::pqlite_to_unwrapped_html_string; + +#[test] +fn test_from_tests_txt() { + let mut path = PathBuf::from(std::env::var_os("CARGO_MANIFEST_DIR").unwrap()); + path.push("../tests.txt"); + let data = std::fs::read_to_string(path).unwrap(); + let test_cases = data.split("|\n\n|"); + + let mut failure = false; + + for case in test_cases { + let (input, output) = case + .split_once(" (()) ") + .unwrap_or_else(|| panic!("badly formatted test case: {:?}", case)); + + println!("Running test case {:?}", input); + let actual_output = pqlite_to_unwrapped_html_string(input).unwrap(); + + if actual_output != output { + eprintln!( + "test failure:\n expected: {:?}\n actual: {:?}\nfor input: {:?}", + output, actual_output, input + ); + failure = true; + } + } + if failure { + panic!("one or more test cases from tests.txt failed."); + } +} From cbf0e66d47402e95a985570d3511d4e4d0e5dde3 Mon Sep 17 00:00:00 2001 From: David Ross Date: Sat, 5 Jun 2021 02:57:46 -0700 Subject: [PATCH 2/2] Fix producing recursing --- rust-ast-based/src/parse.rs | 52 ++++++++++++++++++++++++++++++++++--- rust-ast-based/src/print.rs | 2 -- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/rust-ast-based/src/parse.rs b/rust-ast-based/src/parse.rs index 7458037..251a9f6 100644 --- a/rust-ast-based/src/parse.rs +++ b/rust-ast-based/src/parse.rs @@ -18,6 +18,23 @@ impl<'a> Ast<'a> { Ok(()) } + /// If this Ast node contains children nodes, return a list of them. + fn children(&self) -> Option<&Vec>> { + match self { + Ast::Root(nodes) + | Ast::Quoted { inner: nodes, .. } + | Ast::Bracketed(nodes) + | Ast::CurlyBraced(nodes) + | Ast::BlockQuoted(nodes) + | Ast::CodeQuoted(nodes) + | Ast::ProcessedPrefixSuffix(_, nodes, _) + | Ast::Header(_, nodes) + | Ast::Tooltip { inner: nodes, .. } + | Ast::Link { inner: nodes, .. } => Some(nodes), + Ast::Text(_) | Ast::CowText(_) | Ast::NoBrText(_) | Ast::TooltipText(_) => None, + } + } + /// If this Ast node contains children nodes, return a list of them. fn children_mut(&mut self) -> Option<&mut Vec>> { match self { @@ -332,7 +349,21 @@ fn process_spoilers(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { Ok(()) } -fn is_url_tooltip(child2: &Ast<'_>) -> bool { +fn contains_link(node: &Ast<'_>) -> bool { + if let Ast::Link { .. } = node { + return true; + } + if let Some(children) = node.children() { + for child in children { + if contains_link(child) { + return true; + } + } + } + false +} + +fn is_pair_url_tooltip(child1: Option<&Ast<'_>>, child2: &Ast<'_>) -> bool { let bracketed_ast = match child2 { Ast::Bracketed(v) => v, _ => return false, @@ -346,6 +377,13 @@ fn is_url_tooltip(child2: &Ast<'_>) -> bool { Some(Ast::Text(_)) => true, _ => false, }; + if url { + if let Some(child1) = child1 { + if contains_link(child1) { + return false; + } + } + } match (tooltip, url, bracketed_ast.len()) { (true, false, 1) | (false, true, 1) | (true, true, 2) => true, _ => false, @@ -405,8 +443,14 @@ fn remove_comments_for_tooltip(original_text: &str) -> Result, PqLi /// of text inside. fn pull_out_tooltip_text(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { if let Some(children) = ast.children_mut() { - for child in children { - if is_url_tooltip(&child) { + for i in 0..children.len() { + let child1 = match i { + 0 => None, + i => Some(&children[i - 1]), + }; + let child2 = &children[i]; + if is_pair_url_tooltip(child1, child2) { + let child = &mut children[i]; let bracketed_ast = match child { Ast::Bracketed(v) => v, _ => unreachable!(), @@ -449,7 +493,7 @@ fn process_brackets(ast: &mut Ast<'_>) -> Result<(), PqLiteError> { _ => Some(&mut split_to_access_children.0[i - 1]), }; let child2 = &mut split_to_access_children.1[0]; - if is_url_tooltip(&child2) { + if is_pair_url_tooltip(child1.as_ref().map(|c| &**c), &child2) { let child2_owned = std::mem::replace(child2, Ast::Text("")); let (url, tooltip) = bracket_child_to_url_and_tooltip(child2_owned); let (extra_text, inner) = if let Some(child1) = &mut child1 { diff --git a/rust-ast-based/src/print.rs b/rust-ast-based/src/print.rs index 5462a07..0bff480 100644 --- a/rust-ast-based/src/print.rs +++ b/rust-ast-based/src/print.rs @@ -131,8 +131,6 @@ impl<'a> fmt::Display for Ast<'a> { } => { f.write_str("