rust-lang · tesuji · Jul 11, 2020 · Jul 11, 2020 · Sep 3, 2020 · Sep 3, 2020
diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs
@@ -1,6 +1,7 @@
 use rustc_span::source_map::SourceMap;
 use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
 
+mod block_comment;
 #[cfg(test)]
 mod tests;
 
@@ -26,73 +27,16 @@ pub struct Comment {
 /// Makes a doc string more presentable to users.
 /// Used by rustdoc and perhaps other tools, but not by rustc.
 pub fn beautify_doc_string(data: Symbol) -> String {
-    /// remove whitespace-only lines from the start/end of lines
-    fn vertical_trim(lines: Vec<String>) -> Vec<String> {
-        let mut i = 0;
-        let mut j = lines.len();
-        // first line of all-stars should be omitted
-        if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
-            i += 1;
-        }
-
-        while i < j && lines[i].trim().is_empty() {
-            i += 1;
-        }
-        // like the first, a last line of all stars should be omitted
-        if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
-            j -= 1;
-        }
-
-        while j > i && lines[j - 1].trim().is_empty() {
-            j -= 1;
-        }
-
-        lines[i..j].to_vec()
-    }
-
-    /// remove a "[ \t]*\*" block from each line, if possible
-    fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
-        let mut i = usize::MAX;
-        let mut can_trim = true;
-        let mut first = true;
-
-        for line in &lines {
-            for (j, c) in line.chars().enumerate() {
-                if j > i || !"* \t".contains(c) {
-                    can_trim = false;
-                    break;
-                }
-                if c == '*' {
-                    if first {
-                        i = j;
-                        first = false;
-                    } else if i != j {
-                        can_trim = false;
-                    }
-                    break;
-                }
-            }
-            if i >= line.len() {
-                can_trim = false;
-            }
-            if !can_trim {
-                break;
-            }
-        }
-
-        if can_trim {
-            lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
-        } else {
-            lines
-        }
-    }
+    use block_comment::{horizontal_trim, vertical_trim};
 
     let data = data.as_str();
     if data.contains('\n') {
-        let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>();
-        let lines = vertical_trim(lines);
-        let lines = horizontal_trim(lines);
-        lines.join("\n")
+        let lines = data.lines().collect::<Vec<&str>>();
+        let lines = vertical_trim(&lines);
+        match horizontal_trim(lines) {
+            Some(lines) => lines.join("\n"),
+            None => lines.join("\n"),
+        }
     } else {
         data.to_string()
     }

diff --git a/compiler/rustc_ast/src/util/comments/block_comment.rs b/compiler/rustc_ast/src/util/comments/block_comment.rs
@@ -0,0 +1,107 @@
+/*!
+ * Block comment helpers.
+ */
+
+#[cfg(test)]
+mod tests;
+
+/********************************************************
+ * Skip lines based on the following rules:
+ *
+ * * Skip first line of all stars ("*").
+ * * Skip consecutive empty lines top-bottom.
+ * * Skip consecutive empty lines bottom-top.
+ * * Skip last line contains pattern "^ ?\**$" in regex.
+ *******************************************************/
+pub fn vertical_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> &'arr [&'row str] {
+    let mut region = lines;
+    if let [first, tail @ ..] = region {
+        // Skip first line of all-stars.
+        if first.bytes().all(|c| c == b'*') {
+            region = tail;
+        }
+    }
+
+    // Skip consecutive empty lines.
+    loop {
+        match region {
+            [first, tail @ ..] if first.trim().is_empty() => region = tail,
+            _ => break,
+        }
+    }
+
+    // Skip last line contains pattern "^ ?*\**" in regex.
+    if let [head @ .., last] = region {
+        let s = match last.as_bytes() {
+            [b' ', tail @ ..] => tail,
+            all => all,
+        };
+        if s.iter().all(|&c| c == b'*') {
+            region = head;
+        }
+    }
+
+    // Skip consecutive empty lines from last line backward.
+    loop {
+        match region {
+            [head @ .., last] if last.trim().is_empty() => region = head,
+            _ => break,
+        }
+    }
+
+    region
+}
+
+/// Trim all "\s*\*" prefix from comment: all or nothing.
+///
+/// For example,
+/// ```text
+///   * one two three four five ...
+///   * one two three four five ...
+///   * one two three four five ...
+/// ```
+/// will be trimmed to
+/// ```text
+///  one two three four five ...
+///  one two three four five ...
+///  one two three four five ...
+/// ```
+pub fn horizontal_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> Option<Vec<&'row str>> {
+    let prefix = match lines {
+        [first, ..] => get_prefix(first)?,
+        _ => return None,
+    };
+
+    if lines.iter().any(|l| !l.starts_with(prefix)) {
+        return None;
+    }
+
+    let lines = lines
+        .iter()
+        // SAFETY: All lines have been checked if it starts with prefix
+        .map(|l| unsafe { l.get_unchecked(prefix.len()..) })
+        .collect();
+    Some(lines)
+}
+
+/// Get the prefix with pattern "\s*\*" of input `s`.
+fn get_prefix(s: &str) -> Option<&str> {
+    let mut bytes = s.as_bytes();
+    let dst: *const u8 = loop {
+        match bytes {
+            [b' ' | b'\t', end @ ..] => bytes = end,
+            [b'*', end @ ..] => break end.as_ptr(),
+            _ => return None,
+        }
+    };
+    let prefix = unsafe {
+        // SAFETY: Two invariants are followed.
+        // * length of `prefix` is the diff of two pointer from the same str `s`.
+        // * lifetime of `prefix` is the same as argument `s`.
+        let src: *const u8 = s.as_ptr();
+        let len = dst as usize - src as usize;
+        let slice = std::slice::from_raw_parts(src, len);
+        std::str::from_utf8_unchecked(slice)
+    };
+    Some(prefix)
+}
diff --git a/compiler/rustc_ast/src/util/comments/block_comment/tests.rs b/compiler/rustc_ast/src/util/comments/block_comment/tests.rs
@@ -0,0 +1,146 @@
+use super::*;
+
+// If vertical_trim trim first and last line.
+#[test]
+fn trim_vertically_first_or_line() {
+    // Accepted cases
+
+    let inp = &["*********************************", "* This is a module to do foo job."];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &["* This is a module to do foo job.", "*********************************"];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "*********************************",
+        "* This is a module to do foo job.",
+        "*********************************",
+    ];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "***********************",
+        "* This is a module to do foo job.",
+        "*********************************",
+    ];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "**************************",
+        " * one two three four five six seven",
+        " ****************",
+    ];
+    let out = &[" * one two three four five six seven"];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &["", " * one two three four five", " "];
+    let out = &[" * one two three four five"];
+    assert_eq!(vertical_trim(inp), out);
+
+    // Non-accepted cases
+
+    let inp = &["\t  *********************** \t", "* This is a module to do foo job."];
+    let out = &["\t  *********************** \t", "* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    // More than one space indentation.
+    let inp = &[
+        "******************************",
+        "  * This is a module to do foo job.",
+        "  **************",
+    ];
+    let out = &["  * This is a module to do foo job.", "  **************"];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+// Trim consecutive empty lines. Break if meet a non-empty line.
+#[test]
+fn trim_vertically_empty_lines_forward() {
+    let inp = &["    ", "    \t    \t  ", " * One two three four five six seven eight nine ten."];
+    let out = &[" * One two three four five six seven eight nine ten."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "    ",
+        " * One two three four five six seven eight nine ten.",
+        "    \t    \t  ",
+        " * One two three four five six seven eight nine ten.",
+    ];
+    let out = &[
+        " * One two three four five six seven eight nine ten.",
+        "    \t    \t  ",
+        " * One two three four five six seven eight nine ten.",
+    ];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+// Trim consecutive empty lines bottom-top. Break if meet a non-empty line.
+#[test]
+fn trim_vertically_empty_lines_backward() {
+    let inp = &[" * One two three four five six seven eight nine ten.", "    ", "    \t    \t  "];
+    let out = &[" * One two three four five six seven eight nine ten."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        " * One two three four five six seven eight nine ten.",
+        "    ",
+        " * One two three four five six seven eight nine ten.",
+        "    \t    \t  ",
+    ];
+    let out = &[
+        " * One two three four five six seven eight nine ten.",
+        "    ",
+        " * One two three four five six seven eight nine ten.",
+    ];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+// Test for any panic from wrong indexing.
+#[test]
+fn trim_vertically_empty() {
+    let inp = &[""];
+    let out: &[&str] = &[];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp: &[&str] = &[];
+    let out: &[&str] = &[];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+#[test]
+fn trim_horizontally() {
+    let inp = &[
+        " \t\t * one two three",
+        " \t\t * four fix six seven *",
+        " \t\t * forty two ",
+        " \t\t ** sixty nine",
+    ];
+    let out: &[&str] = &[" one two three", " four fix six seven *", " forty two ", "* sixty nine"];
+    assert_eq!(horizontal_trim(inp).as_deref(), Some(out));
+
+    // Test that we handle empty collection and collection with one item.
+    assert_eq!(horizontal_trim(&[]).as_deref(), None);
+    assert_eq!(horizontal_trim(&[""]).as_deref(), None);
+
+    // Non-accepted: "\t" will not equal to " "
+
+    let inp = &[
+        " \t * one two three",
+        "     * four fix six seven *",
+        " \t * forty two ",
+        " \t ** sixty nine",
+    ];
+    assert_eq!(horizontal_trim(inp).as_deref(), None);
+}
+
+#[test]
+fn test_get_prefix() {
+    assert_eq!(get_prefix(" \t **"), Some(" \t *"));
+    assert_eq!(get_prefix("*"), Some("*"));
+    assert_eq!(get_prefix(" \t ^*"), None);
+    assert_eq!(get_prefix("   "), None);
+}
diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
@@ -66,7 +66,7 @@ impl<'a> Cursor<'a> {
     }
 
     /// Returns a `Chars` iterator over the remaining characters.
-    fn chars(&self) -> Chars<'a> {
+    pub(crate) fn chars(&self) -> Chars<'a> {
         self.chars.clone()
     }
 

diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
@@ -422,11 +422,12 @@ impl Cursor<'_> {
         debug_assert!(self.prev() == '/' && self.first() == '/');
         self.bump();
 
-        let doc_style = match self.first() {
+        let doc_style = match self.chars().as_str().as_bytes() {
             // `//!` is an inner line doc comment.
-            '!' => Some(DocStyle::Inner),
+            [b'!', ..] => Some(DocStyle::Inner),
             // `////` (more than 3 slashes) is not considered a doc comment.
-            '/' if self.second() != '/' => Some(DocStyle::Outer),
+            [b'/', b'/', ..] => None,
+            [b'/', ..] => Some(DocStyle::Outer),
             _ => None,
         };
 
@@ -438,12 +439,13 @@ impl Cursor<'_> {
         debug_assert!(self.prev() == '/' && self.first() == '*');
         self.bump();
 
-        let doc_style = match self.first() {
+        let doc_style = match self.chars().as_str().as_bytes() {
             // `/*!` is an inner block doc comment.
-            '!' => Some(DocStyle::Inner),
+            [b'!', ..] => Some(DocStyle::Inner),
             // `/***` (more than 2 stars) is not considered a doc comment.
             // `/**/` is not considered a doc comment.
-            '*' if !matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
+            [b'*', b'*' | b'/', ..] => None,
+            [b'*', ..] => Some(DocStyle::Outer),
             _ => None,
         };
 
@@ -464,7 +466,7 @@ impl Cursor<'_> {
                         break;
                     }
                 }
-                _ => (),
+                _ => {}
             }
         }