Skip to content

Commit 8034261

Browse files
authored
Rollup merge of #144239 - xizheyin:clean-lexer, r=fee1-dead
Clean `rustc/parse/src/lexer` to improve maintainability This PR refactors the lexer code to improve maintainability and eliminate code duplication. In the first commit, I improve the error handling: - rename `make_unclosed_delims_error` to more appropriate `make_mismatched_closing_delims_errors` - changes return type from Option<Diag> to `Vec<Diag>` to avoid lengthy vec processing at `lex_token_trees` - use `splice` instead of `extend` to make the logic clearer, since `errs` sounds more generic and better suited as a return value In the second commit, I replace the magic number 5 with UNCLOSED_DELIMITER_SHOW_LIMIT constant. In the third commit, I moves `eof_err` function below parsing logic for better code flow. In the forth one, I extract `calculate_spacing` function to eliminate duplicate spacing logic between `bump` and `bump_minimal` functions. r? compiler
2 parents 546929f + 2832517 commit 8034261

File tree

3 files changed

+83
-85
lines changed

3 files changed

+83
-85
lines changed

compiler/rustc_parse/src/lexer/diagnostics.rs

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -126,23 +126,29 @@ pub(super) fn report_suspicious_mismatch_block(
126126
}
127127
}
128128

129-
pub(crate) fn make_unclosed_delims_error(
130-
unmatched: UnmatchedDelim,
131-
psess: &ParseSess,
132-
) -> Option<Diag<'_>> {
133-
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
134-
// `unmatched_delims` only for error recovery in the `Parser`.
135-
let found_delim = unmatched.found_delim?;
136-
let mut spans = vec![unmatched.found_span];
137-
if let Some(sp) = unmatched.unclosed_span {
138-
spans.push(sp);
139-
};
140-
let err = psess.dcx().create_err(MismatchedClosingDelimiter {
141-
spans,
142-
delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind()).to_string(),
143-
unmatched: unmatched.found_span,
144-
opening_candidate: unmatched.candidate_span,
145-
unclosed: unmatched.unclosed_span,
146-
});
147-
Some(err)
129+
pub(crate) fn make_errors_for_mismatched_closing_delims<'psess>(
130+
unmatcheds: &[UnmatchedDelim],
131+
psess: &'psess ParseSess,
132+
) -> Vec<Diag<'psess>> {
133+
unmatcheds
134+
.iter()
135+
.filter_map(|unmatched| {
136+
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
137+
// `unmatched_delims` only for error recovery in the `Parser`.
138+
let found_delim = unmatched.found_delim?;
139+
let mut spans = vec![unmatched.found_span];
140+
if let Some(sp) = unmatched.unclosed_span {
141+
spans.push(sp);
142+
};
143+
let err = psess.dcx().create_err(MismatchedClosingDelimiter {
144+
spans,
145+
delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind())
146+
.to_string(),
147+
unmatched: unmatched.found_span,
148+
opening_candidate: unmatched.candidate_span,
149+
unclosed: unmatched.unclosed_span,
150+
});
151+
Some(err)
152+
})
153+
.collect()
148154
}

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use diagnostics::make_unclosed_delims_error;
1+
use diagnostics::make_errors_for_mismatched_closing_delims;
22
use rustc_ast::ast::{self, AttrStyle};
33
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
44
use rustc_ast::tokenstream::TokenStream;
@@ -71,27 +71,23 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
7171
};
7272
let res = lexer.lex_token_trees(/* is_delimited */ false);
7373

74-
let mut unmatched_delims: Vec<_> = lexer
75-
.diag_info
76-
.unmatched_delims
77-
.into_iter()
78-
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
79-
.collect();
74+
let mut unmatched_closing_delims: Vec<_> =
75+
make_errors_for_mismatched_closing_delims(&lexer.diag_info.unmatched_delims, psess);
8076

8177
match res {
8278
Ok((_open_spacing, stream)) => {
83-
if unmatched_delims.is_empty() {
79+
if unmatched_closing_delims.is_empty() {
8480
Ok(stream)
8581
} else {
8682
// Return error if there are unmatched delimiters or unclosed delimiters.
87-
Err(unmatched_delims)
83+
Err(unmatched_closing_delims)
8884
}
8985
}
9086
Err(errs) => {
9187
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
9288
// because the delimiter mismatch is more likely to be the root cause of error
93-
unmatched_delims.extend(errs);
94-
Err(unmatched_delims)
89+
unmatched_closing_delims.extend(errs);
90+
Err(unmatched_closing_delims)
9591
}
9692
}
9793
}

compiler/rustc_parse/src/lexer/tokentrees.rs

Lines changed: 51 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -51,45 +51,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
5151
}
5252
}
5353

54-
fn eof_err(&mut self) -> Diag<'psess> {
55-
let msg = "this file contains an unclosed delimiter";
56-
let mut err = self.dcx().struct_span_err(self.token.span, msg);
57-
58-
let unclosed_delimiter_show_limit = 5;
59-
let len = usize::min(unclosed_delimiter_show_limit, self.diag_info.open_delimiters.len());
60-
for &(_, span) in &self.diag_info.open_delimiters[..len] {
61-
err.span_label(span, "unclosed delimiter");
62-
self.diag_info.unmatched_delims.push(UnmatchedDelim {
63-
found_delim: None,
64-
found_span: self.token.span,
65-
unclosed_span: Some(span),
66-
candidate_span: None,
67-
});
68-
}
69-
70-
if let Some((_, span)) = self.diag_info.open_delimiters.get(unclosed_delimiter_show_limit)
71-
&& self.diag_info.open_delimiters.len() >= unclosed_delimiter_show_limit + 2
72-
{
73-
err.span_label(
74-
*span,
75-
format!(
76-
"another {} unclosed delimiters begin from here",
77-
self.diag_info.open_delimiters.len() - unclosed_delimiter_show_limit
78-
),
79-
);
80-
}
81-
82-
if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
83-
report_suspicious_mismatch_block(
84-
&mut err,
85-
&self.diag_info,
86-
self.psess.source_map(),
87-
*delim,
88-
)
89-
}
90-
err
91-
}
92-
9354
fn lex_token_tree_open_delim(
9455
&mut self,
9556
open_delim: Delimiter,
@@ -206,13 +167,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
206167
} else if let Some(glued) = self.token.glue(&next_tok) {
207168
self.token = glued;
208169
} else {
209-
let this_spacing = if next_tok.is_punct() {
210-
Spacing::Joint
211-
} else if next_tok == token::Eof {
212-
Spacing::Alone
213-
} else {
214-
Spacing::JointHidden
215-
};
170+
let this_spacing = self.calculate_spacing(&next_tok);
216171
break (this_spacing, next_tok);
217172
}
218173
};
@@ -223,23 +178,64 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
223178
// Cut-down version of `bump` used when the token kind is known in advance.
224179
fn bump_minimal(&mut self) -> Spacing {
225180
let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
226-
227181
let this_spacing = if is_next_tok_preceded_by_whitespace {
228182
Spacing::Alone
229183
} else {
230-
if next_tok.is_punct() {
231-
Spacing::Joint
232-
} else if next_tok == token::Eof {
233-
Spacing::Alone
234-
} else {
235-
Spacing::JointHidden
236-
}
184+
self.calculate_spacing(&next_tok)
237185
};
238-
239186
self.token = next_tok;
240187
this_spacing
241188
}
242189

190+
fn calculate_spacing(&self, next_tok: &Token) -> Spacing {
191+
if next_tok.is_punct() {
192+
Spacing::Joint
193+
} else if *next_tok == token::Eof {
194+
Spacing::Alone
195+
} else {
196+
Spacing::JointHidden
197+
}
198+
}
199+
200+
fn eof_err(&mut self) -> Diag<'psess> {
201+
const UNCLOSED_DELIMITER_SHOW_LIMIT: usize = 5;
202+
let msg = "this file contains an unclosed delimiter";
203+
let mut err = self.dcx().struct_span_err(self.token.span, msg);
204+
205+
let len = usize::min(UNCLOSED_DELIMITER_SHOW_LIMIT, self.diag_info.open_delimiters.len());
206+
for &(_, span) in &self.diag_info.open_delimiters[..len] {
207+
err.span_label(span, "unclosed delimiter");
208+
self.diag_info.unmatched_delims.push(UnmatchedDelim {
209+
found_delim: None,
210+
found_span: self.token.span,
211+
unclosed_span: Some(span),
212+
candidate_span: None,
213+
});
214+
}
215+
216+
if let Some((_, span)) = self.diag_info.open_delimiters.get(UNCLOSED_DELIMITER_SHOW_LIMIT)
217+
&& self.diag_info.open_delimiters.len() >= UNCLOSED_DELIMITER_SHOW_LIMIT + 2
218+
{
219+
err.span_label(
220+
*span,
221+
format!(
222+
"another {} unclosed delimiters begin from here",
223+
self.diag_info.open_delimiters.len() - UNCLOSED_DELIMITER_SHOW_LIMIT
224+
),
225+
);
226+
}
227+
228+
if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
229+
report_suspicious_mismatch_block(
230+
&mut err,
231+
&self.diag_info,
232+
self.psess.source_map(),
233+
*delim,
234+
)
235+
}
236+
err
237+
}
238+
243239
fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
244240
// An unexpected closing delimiter (i.e., there is no matching opening delimiter).
245241
let token_str = token_to_string(&self.token);

0 commit comments

Comments
 (0)