Skip to content

Refactor doc comment parsing #74209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 8 additions & 64 deletions compiler/rustc_ast/src/util/comments.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};

mod block_comment;
#[cfg(test)]
mod tests;

Expand All @@ -26,73 +27,16 @@ pub struct Comment {
/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
pub fn beautify_doc_string(data: Symbol) -> String {
/// remove whitespace-only lines from the start/end of lines
fn vertical_trim(lines: Vec<String>) -> Vec<String> {
let mut i = 0;
let mut j = lines.len();
// first line of all-stars should be omitted
if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
i += 1;
}

while i < j && lines[i].trim().is_empty() {
i += 1;
}
// like the first, a last line of all stars should be omitted
if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
j -= 1;
}

while j > i && lines[j - 1].trim().is_empty() {
j -= 1;
}

lines[i..j].to_vec()
}

/// remove a "[ \t]*\*" block from each line, if possible
fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
let mut i = usize::MAX;
let mut can_trim = true;
let mut first = true;

for line in &lines {
for (j, c) in line.chars().enumerate() {
if j > i || !"* \t".contains(c) {
can_trim = false;
break;
}
if c == '*' {
if first {
i = j;
first = false;
} else if i != j {
can_trim = false;
}
break;
}
}
if i >= line.len() {
can_trim = false;
}
if !can_trim {
break;
}
}

if can_trim {
lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
} else {
lines
}
}
use block_comment::{horizontal_trim, vertical_trim};

let data = data.as_str();
if data.contains('\n') {
let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>();
let lines = vertical_trim(lines);
let lines = horizontal_trim(lines);
lines.join("\n")
let lines = data.lines().collect::<Vec<&str>>();
let lines = vertical_trim(&lines);
match horizontal_trim(lines) {
Some(lines) => lines.join("\n"),
None => lines.join("\n"),
}
} else {
data.to_string()
}
Expand Down
107 changes: 107 additions & 0 deletions compiler/rustc_ast/src/util/comments/block_comment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*!
* Block comment helpers.
*/

#[cfg(test)]
mod tests;

/********************************************************
* Skip lines based on the following rules:
*
* * Skip first line of all stars ("*").
* * Skip consecutive empty lines top-bottom.
* * Skip consecutive empty lines bottom-top.
* * Skip last line contains pattern "^ ?\**$" in regex.
*******************************************************/
pub fn vertical_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> &'arr [&'row str] {
let mut region = lines;
if let [first, tail @ ..] = region {
// Skip first line of all-stars.
if first.bytes().all(|c| c == b'*') {
region = tail;
}
}

// Skip consecutive empty lines.
loop {
match region {
[first, tail @ ..] if first.trim().is_empty() => region = tail,
_ => break,
}
}

// Skip last line contains pattern "^ ?*\**" in regex.
if let [head @ .., last] = region {
let s = match last.as_bytes() {
[b' ', tail @ ..] => tail,
all => all,
};
if s.iter().all(|&c| c == b'*') {
region = head;
}
}

// Skip consecutive empty lines from last line backward.
loop {
match region {
[head @ .., last] if last.trim().is_empty() => region = head,
_ => break,
}
}

region
}

/// Trim all "\s*\*" prefix from comment: all or nothing.
///
/// For example,
/// ```text
/// * one two three four five ...
/// * one two three four five ...
/// * one two three four five ...
/// ```
/// will be trimmed to
/// ```text
/// one two three four five ...
/// one two three four five ...
/// one two three four five ...
/// ```
pub fn horizontal_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> Option<Vec<&'row str>> {
let prefix = match lines {
[first, ..] => get_prefix(first)?,
_ => return None,
};

if lines.iter().any(|l| !l.starts_with(prefix)) {
return None;
}

let lines = lines
.iter()
// SAFETY: All lines have been checked if it starts with prefix
.map(|l| unsafe { l.get_unchecked(prefix.len()..) })
.collect();
Some(lines)
}

/// Get the prefix with pattern "\s*\*" of input `s`.
fn get_prefix(s: &str) -> Option<&str> {
let mut bytes = s.as_bytes();
let dst: *const u8 = loop {
match bytes {
[b' ' | b'\t', end @ ..] => bytes = end,
[b'*', end @ ..] => break end.as_ptr(),
_ => return None,
}
};
let prefix = unsafe {
// SAFETY: Two invariants are followed.
// * length of `prefix` is the diff of two pointer from the same str `s`.
// * lifetime of `prefix` is the same as argument `s`.
let src: *const u8 = s.as_ptr();
let len = dst as usize - src as usize;
let slice = std::slice::from_raw_parts(src, len);
std::str::from_utf8_unchecked(slice)
};
Some(prefix)
}
146 changes: 146 additions & 0 deletions compiler/rustc_ast/src/util/comments/block_comment/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
use super::*;

// If vertical_trim trim first and last line.
#[test]
fn trim_vertically_first_or_line() {
// Accepted cases

let inp = &["*********************************", "* This is a module to do foo job."];
let out = &["* This is a module to do foo job."];
assert_eq!(vertical_trim(inp), out);

let inp = &["* This is a module to do foo job.", "*********************************"];
let out = &["* This is a module to do foo job."];
assert_eq!(vertical_trim(inp), out);

let inp = &[
"*********************************",
"* This is a module to do foo job.",
"*********************************",
];
let out = &["* This is a module to do foo job."];
assert_eq!(vertical_trim(inp), out);

let inp = &[
"***********************",
"* This is a module to do foo job.",
"*********************************",
];
let out = &["* This is a module to do foo job."];
assert_eq!(vertical_trim(inp), out);

let inp = &[
"**************************",
" * one two three four five six seven",
" ****************",
];
let out = &[" * one two three four five six seven"];
assert_eq!(vertical_trim(inp), out);

let inp = &["", " * one two three four five", " "];
let out = &[" * one two three four five"];
assert_eq!(vertical_trim(inp), out);

// Non-accepted cases

let inp = &["\t *********************** \t", "* This is a module to do foo job."];
let out = &["\t *********************** \t", "* This is a module to do foo job."];
assert_eq!(vertical_trim(inp), out);

// More than one space indentation.
let inp = &[
"******************************",
" * This is a module to do foo job.",
" **************",
];
let out = &[" * This is a module to do foo job.", " **************"];
assert_eq!(vertical_trim(inp), out);
}

// Trim consecutive empty lines. Break if meet a non-empty line.
#[test]
fn trim_vertically_empty_lines_forward() {
let inp = &[" ", " \t \t ", " * One two three four five six seven eight nine ten."];
let out = &[" * One two three four five six seven eight nine ten."];
assert_eq!(vertical_trim(inp), out);

let inp = &[
" ",
" * One two three four five six seven eight nine ten.",
" \t \t ",
" * One two three four five six seven eight nine ten.",
];
let out = &[
" * One two three four five six seven eight nine ten.",
" \t \t ",
" * One two three four five six seven eight nine ten.",
];
assert_eq!(vertical_trim(inp), out);
}

// Trim consecutive empty lines bottom-top. Break if meet a non-empty line.
#[test]
fn trim_vertically_empty_lines_backward() {
let inp = &[" * One two three four five six seven eight nine ten.", " ", " \t \t "];
let out = &[" * One two three four five six seven eight nine ten."];
assert_eq!(vertical_trim(inp), out);

let inp = &[
" * One two three four five six seven eight nine ten.",
" ",
" * One two three four five six seven eight nine ten.",
" \t \t ",
];
let out = &[
" * One two three four five six seven eight nine ten.",
" ",
" * One two three four five six seven eight nine ten.",
];
assert_eq!(vertical_trim(inp), out);
}

// Test for any panic from wrong indexing.
#[test]
fn trim_vertically_empty() {
let inp = &[""];
let out: &[&str] = &[];
assert_eq!(vertical_trim(inp), out);

let inp: &[&str] = &[];
let out: &[&str] = &[];
assert_eq!(vertical_trim(inp), out);
}

#[test]
fn trim_horizontally() {
let inp = &[
" \t\t * one two three",
" \t\t * four fix six seven *",
" \t\t * forty two ",
" \t\t ** sixty nine",
];
let out: &[&str] = &[" one two three", " four fix six seven *", " forty two ", "* sixty nine"];
assert_eq!(horizontal_trim(inp).as_deref(), Some(out));

// Test that we handle empty collection and collection with one item.
assert_eq!(horizontal_trim(&[]).as_deref(), None);
assert_eq!(horizontal_trim(&[""]).as_deref(), None);

// Non-accepted: "\t" will not equal to " "

let inp = &[
" \t * one two three",
" * four fix six seven *",
" \t * forty two ",
" \t ** sixty nine",
];
assert_eq!(horizontal_trim(inp).as_deref(), None);
}

#[test]
fn test_get_prefix() {
assert_eq!(get_prefix(" \t **"), Some(" \t *"));
assert_eq!(get_prefix("*"), Some("*"));
assert_eq!(get_prefix(" \t ^*"), None);
assert_eq!(get_prefix(" "), None);
}
2 changes: 1 addition & 1 deletion compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl<'a> Cursor<'a> {
}

/// Returns a `Chars` iterator over the remaining characters.
fn chars(&self) -> Chars<'a> {
pub(crate) fn chars(&self) -> Chars<'a> {
self.chars.clone()
}

Expand Down
16 changes: 9 additions & 7 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,11 +422,12 @@ impl Cursor<'_> {
debug_assert!(self.prev() == '/' && self.first() == '/');
self.bump();

let doc_style = match self.first() {
let doc_style = match self.chars().as_str().as_bytes() {
// `//!` is an inner line doc comment.
'!' => Some(DocStyle::Inner),
[b'!', ..] => Some(DocStyle::Inner),
// `////` (more than 3 slashes) is not considered a doc comment.
'/' if self.second() != '/' => Some(DocStyle::Outer),
[b'/', b'/', ..] => None,
[b'/', ..] => Some(DocStyle::Outer),
_ => None,
};

Expand All @@ -438,12 +439,13 @@ impl Cursor<'_> {
debug_assert!(self.prev() == '/' && self.first() == '*');
self.bump();

let doc_style = match self.first() {
let doc_style = match self.chars().as_str().as_bytes() {
// `/*!` is an inner block doc comment.
'!' => Some(DocStyle::Inner),
[b'!', ..] => Some(DocStyle::Inner),
// `/***` (more than 2 stars) is not considered a doc comment.
// `/**/` is not considered a doc comment.
'*' if !matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
[b'*', b'*' | b'/', ..] => None,
[b'*', ..] => Some(DocStyle::Outer),
_ => None,
};

Expand All @@ -464,7 +466,7 @@ impl Cursor<'_> {
break;
}
}
_ => (),
_ => {}
}
}

Expand Down
Loading