From 7b99783f009cf73e45cd9d160cbae45739936e9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= <rene.kijewski@fu-berlin.de>
Date: Thu, 25 Jul 2024 21:34:09 +0200
Subject: [PATCH] parser: use `memchr` to speed-up `skip_till()`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`skip_till()` is used in the parser to find the next block `{%`,
comment `{#` or expression `{{`. At every character position, it is
tested if one of these three substrings follows. Using [`memchr3()`],
we could at least skip to the next candidate, a `{`. The syntax for
blocks, comments and expressions can be modified by the user, but that
does not matter much; we can simply supply `memchr3()` with the first
byte in each of these strings.

[`memchr3()`]: <https://docs.rs/memchr/2.7.4/memchr/fn.memrchr3.html>

```text
librustdoc/all          time:   [366.54 µs 366.78 µs 367.02 µs]
                        thrpt:  [38.475 MiB/s 38.500 MiB/s 38.525 MiB/s]
                 change:
                        time:   [-17.358% -17.065% -16.820%] (p = 0.00 < 0.05)
                        thrpt:  [+20.221% +20.576% +21.004%]
                        Performance has improved.

librustdoc/item_info    time:   [6.3315 µs 6.3400 µs 6.3495 µs]
                        thrpt:  [24.783 MiB/s 24.820 MiB/s 24.853 MiB/s]
                 change:
                        time:   [-6.5547% -6.4090% -6.2633%] (p = 0.00 < 0.05)
                        thrpt:  [+6.6818% +6.8479% +7.0144%]
                        Performance has improved.

librustdoc/item_union   time:   [39.377 µs 39.551 µs 39.720 µs]
                        thrpt:  [24.850 MiB/s 24.957 MiB/s 25.067 MiB/s]
                 change:
                        time:   [-6.9834% -6.2455% -5.5849%] (p = 0.00 < 0.05)
                        thrpt:  [+5.9153% +6.6616% +7.5077%]
                        Performance has improved.

librustdoc/page         time:   [170.83 µs 170.99 µs 171.23 µs]
                        thrpt:  [36.164 MiB/s 36.213 MiB/s 36.248 MiB/s]
                 change:
                        time:   [-12.413% -12.183% -11.968%] (p = 0.00 < 0.05)
                        thrpt:  [+13.595% +13.873% +14.173%]
                        Performance has improved.

librustdoc/print_item   time:   [21.163 µs 21.234 µs 21.322 µs]
                        thrpt:  [44.280 MiB/s 44.463 MiB/s 44.612 MiB/s]
                 change:
                        time:   [-19.848% -18.613% -17.491%] (p = 0.00 < 0.05)
                        thrpt:  [+21.198% +22.870% +24.763%]
                        Performance has improved.

librustdoc/short_item_info
                        time:   [19.781 µs 19.813 µs 19.846 µs]
                        thrpt:  [45.652 MiB/s 45.727 MiB/s 45.801 MiB/s]
                 change:
                        time:   [-18.027% -17.806% -17.574%] (p = 0.00 < 0.05)
                        thrpt:  [+21.321% +21.663% +21.991%]
                        Performance has improved.

librustdoc/sidebar      time:   [40.694 µs 40.806 µs 40.957 µs]
                        thrpt:  [30.131 MiB/s 30.242 MiB/s 30.325 MiB/s]
                 change:
                        time:   [-14.698% -14.069% -13.456%] (p = 0.00 < 0.05)
                        thrpt:  [+15.548% +16.372% +17.231%]
                        Performance has improved.

librustdoc/source       time:   [15.249 µs 15.264 µs 15.278 µs]
                        thrpt:  [48.251 MiB/s 48.295 MiB/s 48.343 MiB/s]
                 change:
                        time:   [-25.832% -25.678% -25.532%] (p = 0.00 < 0.05)
                        thrpt:  [+34.285% +34.550% +34.829%]
                        Performance has improved.

librustdoc/type_layout_size
                        time:   [9.0168 µs 9.0446 µs 9.0789 µs]
                        thrpt:  [29.832 MiB/s 29.945 MiB/s 30.038 MiB/s]
                 change:
                        time:   [-11.100% -10.437% -9.4426%] (p = 0.00 < 0.05)
                        thrpt:  [+10.427% +11.653% +12.486%]
                        Performance has improved.

librustdoc/type_layout  time:   [34.088 µs 34.114 µs 34.139 µs]
                        thrpt:  [78.860 MiB/s 78.919 MiB/s 78.979 MiB/s]
                 change:
                        time:   [-37.865% -37.723% -37.585%] (p = 0.00 < 0.05)
                        thrpt:  [+60.217% +60.573% +60.941%]
                        Performance has improved.
```
---
 rinja_parser/Cargo.toml             |   1 +
 rinja_parser/src/lib.rs             |  23 ++++---
 rinja_parser/src/memchr_splitter.rs | 101 ++++++++++++++++++++++++++++
 rinja_parser/src/node.rs            |  16 +++--
 rinja_parser/src/tests.rs           |  28 +++++++-
 5 files changed, 153 insertions(+), 16 deletions(-)
 create mode 100644 rinja_parser/src/memchr_splitter.rs

diff --git a/rinja_parser/Cargo.toml b/rinja_parser/Cargo.toml
index 82e75d25c..d4265dae3 100644
--- a/rinja_parser/Cargo.toml
+++ b/rinja_parser/Cargo.toml
@@ -14,6 +14,7 @@ edition = "2021"
 rust-version = "1.71"
 
 [dependencies]
+memchr = "2"
 nom = { version = "7", default-features = false, features = ["alloc"] }
 
 [dev-dependencies]
diff --git a/rinja_parser/src/lib.rs b/rinja_parser/src/lib.rs
index 5f6078f69..ffb34067d 100644
--- a/rinja_parser/src/lib.rs
+++ b/rinja_parser/src/lib.rs
@@ -20,6 +20,7 @@ use nom::{error_position, AsChar, InputTakeAtPosition};
 
 pub mod expr;
 pub use expr::{Expr, Filter};
+mod memchr_splitter;
 pub mod node;
 pub use node::Node;
 
@@ -362,22 +363,22 @@ fn ws<'a, O>(
 
 /// Skips input until `end` was found, but does not consume it.
 /// Returns tuple that would be returned when parsing `end`.
-fn skip_till<'a, O>(
+fn skip_till<'a, 'b, O>(
+    candidate_finder: impl crate::memchr_splitter::Splitter,
     end: impl FnMut(&'a str) -> ParseResult<'a, O>,
 ) -> impl FnMut(&'a str) -> ParseResult<'a, (&'a str, O)> {
-    enum Next<O> {
-        IsEnd(O),
-        NotEnd,
-    }
-    let mut next = alt((map(end, Next::IsEnd), map(anychar, |_| Next::NotEnd)));
+    let mut next = alt((map(end, Some), map(anychar, |_| None)));
     move |start: &'a str| {
         let mut i = start;
         loop {
-            let (j, is_end) = next(i)?;
-            match is_end {
-                Next::IsEnd(lookahead) => return Ok((i, (j, lookahead))),
-                Next::NotEnd => i = j,
-            }
+            i = match candidate_finder.split(i) {
+                Some((_, j)) => j,
+                None => return Err(nom::Err::Error(ErrorContext::new("`end` not found`", i))),
+            };
+            i = match next(i)? {
+                (j, Some(lookahead)) => return Ok((i, (j, lookahead))),
+                (j, None) => j,
+            };
         }
     }
 }
diff --git a/rinja_parser/src/memchr_splitter.rs b/rinja_parser/src/memchr_splitter.rs
new file mode 100644
index 000000000..0a149f89a
--- /dev/null
+++ b/rinja_parser/src/memchr_splitter.rs
@@ -0,0 +1,101 @@
+pub(crate) trait Splitter: Copy {
+    /// If any of the needles was found in the haystack, then split the haystack at the first hit.
+    ///
+    /// Since only the first byte of a needle is inspected, be aware that there can be
+    /// false-positives. Always compare the latter string of the output if it fits the expected
+    /// prefix.
+    fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)>;
+}
+
+impl<T: Splitter + ?Sized> Splitter for &T {
+    #[inline]
+    fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
+        T::split(self, haystack)
+    }
+}
+
+// define and implement a string splitter using memchr
+macro_rules! new_memchr_type {
+    ($struct:ident $split_unchecked:ident $memchr:ident $($field:ident)*) => {
+        #[derive(Debug, Clone, Copy)]
+        pub(crate) struct $struct {
+            $($field: u8,)*
+        }
+
+        impl $struct {
+            #[track_caller]
+            pub(crate) fn new($($field: &str),*) -> Self {
+                Self {
+                    $($field: $field.as_bytes()[0],)*
+                }
+            }
+
+            #[inline]
+            pub(crate) fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
+                // SAFETY: During the construction of `self` we used strings as inputs, and a
+                // string always starts with a byte at char boundary.
+                unsafe { $split_unchecked($(self.$field,)* haystack) }
+            }
+        }
+
+        impl Splitter for $struct {
+            #[inline]
+            fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
+                self.split(haystack)
+            }
+        }
+
+        /// SAFETY: caller has to ensure that the needle is at a char boundary
+        pub(crate) unsafe fn $split_unchecked(
+            $($field: u8,)*
+            haystack: &str,
+        ) -> Option<(&str, &str)> {
+            let idx = memchr::$memchr($($field,)* haystack.as_bytes())?;
+            // SAFETY: The caller ensures that the needles are at char boundary.
+            // The found index `< haystack.len()`.
+            Some((haystack.get_unchecked(..idx), haystack.get_unchecked(idx..)))
+        }
+    };
+}
+
+new_memchr_type!(Splitter1 split1_unchecked memchr a);
+new_memchr_type!(Splitter2 split2_unchecked memchr2 a b);
+new_memchr_type!(Splitter3 split3_unchecked memchr3 a b c);
+
+#[test]
+fn candidate_finder() {
+    assert_eq!(
+        Splitter1::new("test").split("abctefg"),
+        Some(("abc", "tefg")),
+    );
+    assert_eq!(Splitter1::new("xyz").split("abctefg"), None);
+
+    assert_eq!(
+        Splitter2::new("xyz", "foo").split("abctefg"),
+        Some(("abcte", "fg")),
+    );
+    assert_eq!(Splitter2::new("oof", "xyz").split("abctefg"), None);
+
+    assert_eq!(
+        Splitter3::new("oof", "apples", "xyz").split("abctefg"),
+        Some(("", "abctefg")),
+    );
+    assert_eq!(
+        Splitter3::new("oof", "peaches", "xyz").split("abctefg"),
+        None
+    );
+
+    assert_eq!(
+        Splitter3::new("test", "test", "test").split("abctefg"),
+        Some(("abc", "tefg")),
+    );
+
+    assert_eq!(
+        Splitter3::new("🧚‍♀️Life", "😀Laugh", "😻Love")
+            .split("sed diam nonumy eirmod tempor 🧚‍♀️Life ut labore et dolore magna aliquyam"),
+        Some((
+            "sed diam nonumy eirmod tempor ",
+            "🧚‍♀️Life ut labore et dolore magna aliquyam"
+        )),
+    );
+}
diff --git a/rinja_parser/src/node.rs b/rinja_parser/src/node.rs
index 4be2ec33a..5a822bb25 100644
--- a/rinja_parser/src/node.rs
+++ b/rinja_parser/src/node.rs
@@ -9,6 +9,7 @@ use nom::error_position;
 use nom::multi::{many0, many1, separated_list0};
 use nom::sequence::{delimited, pair, preceded, tuple};
 
+use crate::memchr_splitter::{Splitter1, Splitter2, Splitter3};
 use crate::{
     filter, identifier, is_ws, keyword, not_ws, skip_till, str_lit, ws, ErrorContext, Expr, Filter,
     ParseResult, State, Target, WithSpan,
@@ -755,14 +756,20 @@ pub struct Lit<'a> {
 impl<'a> Lit<'a> {
     fn parse(i: &'a str, s: &State<'_>) -> ParseResult<'a, WithSpan<'a, Self>> {
         let start = i;
+        let (i, _) = not(eof)(i)?;
+
+        let candidate_finder = Splitter3::new(
+            s.syntax.block_start,
+            s.syntax.comment_start,
+            s.syntax.expr_start,
+        );
         let p_start = alt((
             tag(s.syntax.block_start),
             tag(s.syntax.comment_start),
             tag(s.syntax.expr_start),
         ));
 
-        let (i, _) = not(eof)(i)?;
-        let (i, content) = opt(recognize(skip_till(p_start)))(i)?;
+        let (i, content) = opt(recognize(skip_till(candidate_finder, p_start)))(i)?;
         let (i, content) = match content {
             Some("") => {
                 // {block,comment,expr}_start follows immediately.
@@ -810,7 +817,7 @@ impl<'a> Raw<'a> {
             cut(tuple((
                 opt(Whitespace::parse),
                 |i| s.tag_block_end(i),
-                consumed(skip_till(endraw)),
+                consumed(skip_till(Splitter1::new(s.syntax.block_start), endraw)),
             ))),
         ));
 
@@ -989,7 +996,8 @@ impl<'a> Comment<'a> {
             let mut depth = 0usize;
             loop {
                 let start = i;
-                let (_, tag) = opt(skip_till(|i| tag(i, s)))(i)?;
+                let splitter = Splitter2::new(s.syntax.comment_start, s.syntax.comment_end);
+                let (_, tag) = opt(skip_till(splitter, |i| tag(i, s)))(i)?;
                 let Some((j, tag)) = tag else {
                     return Err(
                         ErrorContext::unclosed("comment", s.syntax.comment_end, start).into(),
diff --git a/rinja_parser/src/tests.rs b/rinja_parser/src/tests.rs
index 2860ce692..e2e9f6c53 100644
--- a/rinja_parser/src/tests.rs
+++ b/rinja_parser/src/tests.rs
@@ -371,10 +371,36 @@ fn change_delimiters_parse_filter() {
         expr_end: "=}",
         ..Syntax::default()
     };
-
     Ast::from_str("{= strvar|e =}", None, &syntax).unwrap();
 }
 
+#[test]
+fn unicode_delimiters_in_syntax() {
+    let syntax = Syntax {
+        expr_start: "🖎", // U+1F58E == b"\xf0\x9f\x96\x8e"
+        expr_end: "✍",   // U+270D = b'\xe2\x9c\x8d'
+        ..Syntax::default()
+    };
+    assert_eq!(
+        Ast::from_str("Here comes the expression: 🖎 e ✍.", None, &syntax)
+            .unwrap()
+            .nodes(),
+        [
+            Node::Lit(WithSpan::no_span(Lit {
+                lws: "",
+                val: "Here comes the expression:",
+                rws: " ",
+            })),
+            Node::Expr(Ws(None, None), WithSpan::no_span(Expr::Var("e")),),
+            Node::Lit(WithSpan::no_span(Lit {
+                lws: "",
+                val: ".",
+                rws: "",
+            })),
+        ],
+    );
+}
+
 #[test]
 fn test_precedence() {
     let syntax = Syntax::default();