Skip to content

Commit fe64282

Browse files
committed
mbe: Refactor the logic for ${concat}
* Now accept numbers, chars * Suffixes are stripped (needs more testing) * Report specific locations of errors * TODO: handle idents the same for expanded tokens
1 parent 4acc9fb commit fe64282

File tree

16 files changed

+529
-239
lines changed

16 files changed

+529
-239
lines changed

compiler/rustc_builtin_macros/src/concat.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ pub(crate) fn expand_concat(
2424
let mut guar = None;
2525
for e in es {
2626
match e.kind {
27+
// For consistent user experience, please keep this in sync with the handling of
28+
// literals in `rustc_expand::mbe::metavar_expr` `${concat()}`!
2729
ExprKind::Lit(token_lit) => match LitKind::from_token_lit(token_lit) {
2830
Ok(LitKind::Str(s, _) | LitKind::Float(s, _)) => {
2931
accumulator.push_str(s.as_str());

compiler/rustc_expand/messages.ftl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,21 @@ expand_module_multiple_candidates =
133133
expand_must_repeat_once =
134134
this must repeat at least once
135135

136+
expand_mve_concat_invalid =
137+
invalid item within a `{"${concat(...)}"}` expression
138+
.expr_ident = expanding this `concat(...)` expression
139+
.invalid_ident = this literal produced an invalid identifier
140+
.float_lit = float literals cannot be concatenated
141+
.c_str_lit = C string literals cannot be concatenated
142+
.b_str_lit = byte literals cannot be concatenated
143+
.raw_ident = raw identifiers cannot be concatenated
144+
.unsupported = unsupported input for `concat(...)`
145+
.valid_types = `concat` can join {$valid}
146+
.expected_metavar = expected an identifier; got `{$found}`
147+
.expected_metavar_dollar = todo
148+
.unexpected_group = todo
149+
.hi_label = todo
150+
136151
expand_mve_expected_ident =
137152
expected an identifier
138153
.not_ident = not a valid identifier

compiler/rustc_expand/src/errors.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,49 @@ mod metavar_exprs {
540540
pub valid_expr_list: &'static str,
541541
}
542542

543+
#[derive(Diagnostic)]
544+
#[diag(expand_mve_concat_invalid)]
545+
pub(crate) struct MveConcatInvalid {
546+
#[primary_span]
547+
pub span: Span,
548+
#[subdiagnostic]
549+
pub reason: MveConcatInvalidReason,
550+
#[help(expand_expr_ident)]
551+
pub ident_span: Span,
552+
pub valid: &'static str,
553+
}
554+
555+
// TODO: can these be labels rather than notes?
556+
#[derive(Subdiagnostic)]
557+
pub(crate) enum MveConcatInvalidReason {
558+
#[note(expand_invalid_ident)]
559+
InvalidIdent,
560+
#[note(expand_float_lit)]
561+
#[help(expand_valid_types)]
562+
FloatLit,
563+
#[note(expand_c_str_lit)]
564+
#[help(expand_valid_types)]
565+
CStrLit,
566+
#[note(expand_b_str_lit)]
567+
#[help(expand_valid_types)]
568+
ByteStrLit,
569+
#[note(expand_expected_metavar)]
570+
#[label(expand_expected_metavar_dollar)]
571+
ExpectedMetavarIdent {
572+
found: String,
573+
#[primary_span]
574+
dollar: Span,
575+
},
576+
#[note(expand_raw_ident)]
577+
RawIdentifier,
578+
#[note(expand_unsupported)]
579+
#[help(expand_valid_types)]
580+
UnsupportedInput,
581+
#[note(expand_unexpected_group)]
582+
UnexpectedGroup,
583+
InvalidLiteral,
584+
}
585+
543586
#[derive(Diagnostic)]
544587
#[diag(expand_mve_expected_ident)]
545588
pub(crate) struct MveExpectedIdent {

compiler/rustc_expand/src/mbe/metavar_expr.rs

Lines changed: 94 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
1-
use rustc_ast::token::{self, Delimiter, IdentIsRaw, Lit, Token, TokenKind};
1+
use rustc_ast::token::{self, Delimiter, IdentIsRaw, Token, TokenKind};
22
use rustc_ast::tokenstream::{TokenStream, TokenStreamIter, TokenTree};
3-
use rustc_ast::{LitIntType, LitKind};
3+
use rustc_ast::{self as ast, LitIntType, LitKind};
44
use rustc_ast_pretty::pprust;
5-
use rustc_errors::{Applicability, PResult};
5+
use rustc_errors::PResult;
6+
use rustc_lexer::is_id_continue;
67
use rustc_macros::{Decodable, Encodable};
8+
use rustc_session::errors::create_lit_error;
79
use rustc_session::parse::ParseSess;
810
use rustc_span::{Ident, Span, Symbol};
911

10-
use crate::errors::{self, MveExpectedIdentContext};
12+
use crate::errors::{self, MveConcatInvalidReason, MveExpectedIdentContext};
1113

1214
pub(crate) const RAW_IDENT_ERR: &str = "`${concat(..)}` currently does not support raw identifiers";
13-
pub(crate) const UNSUPPORTED_CONCAT_ELEM_ERR: &str = "expected identifier or string literal";
15+
pub(crate) const VALID_EXPR_CONCAT_TYPES: &str =
16+
"metavariables, identifiers, string literals, and integer literals";
1417

1518
/// Argument specification for a metavariable expression
1619
#[derive(Clone, Copy)]
@@ -190,7 +193,7 @@ fn iter_span(iter: &TokenStreamIter<'_>) -> Option<Span> {
190193
pub(crate) enum MetaVarExprConcatElem {
191194
/// Identifier WITHOUT a preceding dollar sign, which means that this identifier should be
192195
/// interpreted as a literal.
193-
Ident(Ident),
196+
Ident(String),
194197
/// For example, a number or a string.
195198
Literal(Symbol),
196199
/// Identifier WITH a preceding dollar sign, which means that this identifier should be
@@ -206,30 +209,92 @@ fn parse_concat<'psess>(
206209
expr_ident_span: Span,
207210
) -> PResult<'psess, MetaVarExpr> {
208211
let mut result = Vec::new();
212+
let dcx = psess.dcx();
209213
loop {
210-
let is_var = try_eat_dollar(iter);
211-
let token = parse_token(iter, psess, outer_span)?;
212-
let element = if is_var {
213-
MetaVarExprConcatElem::Var(parse_ident_from_token(psess, token)?)
214-
} else if let TokenKind::Literal(Lit { kind: token::LitKind::Str, symbol, suffix: None }) =
215-
token.kind
216-
{
217-
MetaVarExprConcatElem::Literal(symbol)
218-
} else {
219-
match parse_ident_from_token(psess, token) {
220-
Err(err) => {
221-
err.cancel();
222-
return Err(psess
223-
.dcx()
224-
.struct_span_err(token.span, UNSUPPORTED_CONCAT_ELEM_ERR));
214+
let dollar = try_eat_dollar(iter);
215+
let Some(tt) = iter.next() else {
216+
// May be hit only with the first iteration (peek is otherwise checked at the end).
217+
break;
218+
};
219+
220+
let make_err = |reason| {
221+
let err = errors::MveConcatInvalid {
222+
span: tt.span(),
223+
ident_span: expr_ident_span,
224+
reason,
225+
valid: VALID_EXPR_CONCAT_TYPES,
226+
};
227+
Err(dcx.create_err(err))
228+
};
229+
230+
let token = match tt {
231+
TokenTree::Token(token, _) => token,
232+
TokenTree::Delimited(..) => {
233+
return make_err(MveConcatInvalidReason::UnexpectedGroup);
234+
}
235+
};
236+
237+
let element = if let Some(dollar) = dollar {
238+
// Expecting a metavar
239+
let Some((ident, _)) = token.ident() else {
240+
return make_err(MveConcatInvalidReason::ExpectedMetavarIdent {
241+
found: pprust::token_to_string(token).into_owned(),
242+
dollar,
243+
});
244+
};
245+
246+
// Variables get passed untouched
247+
MetaVarExprConcatElem::Var(ident)
248+
} else if let TokenKind::Literal(lit) = token.kind {
249+
// Preprocess with `from_token_lit` to handle unescaping, float / int literal suffix
250+
// stripping.
251+
//
252+
// For consistent user experience, please keep this in sync with the handling of
253+
// literals in `rustc_builtin_macros::concat`!
254+
let s = match ast::LitKind::from_token_lit(lit.clone()) {
255+
Ok(ast::LitKind::Str(s, _)) => s.to_string(),
256+
Ok(ast::LitKind::Float(..)) => {
257+
return make_err(MveConcatInvalidReason::FloatLit);
258+
}
259+
Ok(ast::LitKind::Char(c)) => c.to_string(),
260+
Ok(ast::LitKind::Int(i, _)) => i.to_string(),
261+
Ok(ast::LitKind::Bool(b)) => b.to_string(),
262+
Ok(ast::LitKind::CStr(..)) => return make_err(MveConcatInvalidReason::CStrLit),
263+
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
264+
return make_err(MveConcatInvalidReason::ByteStrLit);
225265
}
226-
Ok(elem) => MetaVarExprConcatElem::Ident(elem),
266+
Ok(ast::LitKind::Err(_guarantee)) => {
267+
// REVIEW: a diagnostic was already emitted, should we just break?
268+
return make_err(MveConcatInvalidReason::InvalidLiteral);
269+
}
270+
Err(err) => return Err(create_lit_error(psess, err, lit, token.span)),
271+
};
272+
273+
if !s.chars().all(|ch| is_id_continue(ch)) {
274+
// Check that all characters are valid in the middle of an identifier. This doesn't
275+
// guarantee that the final identifier is valid (we still need to check it later),
276+
// but it allows us to catch errors with specific arguments before expansion time;
277+
// for example, string literal "foo.bar" gets flagged before the macro is invoked.
278+
return make_err(MveConcatInvalidReason::InvalidIdent);
279+
}
280+
281+
MetaVarExprConcatElem::Ident(s)
282+
} else if let Some((elem, is_raw)) = token.ident() {
283+
if is_raw == IdentIsRaw::Yes {
284+
return make_err(MveConcatInvalidReason::RawIdentifier);
227285
}
286+
MetaVarExprConcatElem::Ident(elem.as_str().to_string())
287+
} else {
288+
return make_err(MveConcatInvalidReason::UnsupportedInput);
228289
};
290+
229291
result.push(element);
292+
230293
if iter.peek().is_none() {
294+
// break before trying to eat the comma
231295
break;
232296
}
297+
233298
if !try_eat_comma(iter) {
234299
return Err(psess.dcx().struct_span_err(outer_span, "expected comma"));
235300
}
@@ -315,43 +380,6 @@ fn parse_ident<'psess>(
315380
Ok(elem)
316381
}
317382

318-
fn parse_ident_from_token<'psess>(
319-
psess: &'psess ParseSess,
320-
token: &Token,
321-
) -> PResult<'psess, Ident> {
322-
if let Some((elem, is_raw)) = token.ident() {
323-
if let IdentIsRaw::Yes = is_raw {
324-
return Err(psess.dcx().struct_span_err(elem.span, RAW_IDENT_ERR));
325-
}
326-
return Ok(elem);
327-
}
328-
let token_str = pprust::token_to_string(token);
329-
let mut err = psess
330-
.dcx()
331-
.struct_span_err(token.span, format!("expected identifier, found `{token_str}`"));
332-
err.span_suggestion(
333-
token.span,
334-
format!("try removing `{token_str}`"),
335-
"",
336-
Applicability::MaybeIncorrect,
337-
);
338-
Err(err)
339-
}
340-
341-
fn parse_token<'psess, 't>(
342-
iter: &mut TokenStreamIter<'t>,
343-
psess: &'psess ParseSess,
344-
fallback_span: Span,
345-
) -> PResult<'psess, &'t Token> {
346-
let Some(tt) = iter.next() else {
347-
return Err(psess.dcx().struct_span_err(fallback_span, UNSUPPORTED_CONCAT_ELEM_ERR));
348-
};
349-
let TokenTree::Token(token, _) = tt else {
350-
return Err(psess.dcx().struct_span_err(tt.span(), UNSUPPORTED_CONCAT_ELEM_ERR));
351-
};
352-
Ok(token)
353-
}
354-
355383
/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
356384
/// iterator is not modified and the result is `false`.
357385
fn try_eat_comma(iter: &mut TokenStreamIter<'_>) -> bool {
@@ -362,14 +390,14 @@ fn try_eat_comma(iter: &mut TokenStreamIter<'_>) -> bool {
362390
false
363391
}
364392

365-
/// Tries to move the iterator forward returning `true` if there is a dollar sign. If not, then the
366-
/// iterator is not modified and the result is `false`.
367-
fn try_eat_dollar(iter: &mut TokenStreamIter<'_>) -> bool {
368-
if let Some(TokenTree::Token(Token { kind: token::Dollar, .. }, _)) = iter.peek() {
393+
/// Tries to move the iterator forward returning `Some(dollar_span)` if there is a dollar sign. If
394+
/// not, then the iterator is not modified and the result is `None`.
395+
fn try_eat_dollar(iter: &mut TokenStreamIter<'_>) -> Option<Span> {
396+
if let Some(TokenTree::Token(Token { kind: token::Dollar, span }, _)) = iter.peek() {
369397
let _ = iter.next();
370-
return true;
398+
return Some(*span);
371399
}
372-
false
400+
None
373401
}
374402

375403
/// Expects that the next item is a dollar sign.
@@ -378,7 +406,7 @@ fn eat_dollar<'psess>(
378406
psess: &'psess ParseSess,
379407
span: Span,
380408
) -> PResult<'psess, ()> {
381-
if try_eat_dollar(iter) {
409+
if try_eat_dollar(iter).is_some() {
382410
return Ok(());
383411
}
384412
Err(psess.dcx().struct_span_err(

compiler/rustc_expand/src/mbe/transcribe.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -545,9 +545,10 @@ fn metavar_expr_concat<'tx>(
545545
let dcx = tscx.psess.dcx();
546546
let mut concatenated = String::new();
547547
for element in elements.into_iter() {
548-
let symbol = match element {
549-
MetaVarExprConcatElem::Ident(elem) => elem.name,
550-
MetaVarExprConcatElem::Literal(elem) => *elem,
548+
let tmp_sym;
549+
let sym_str = match element {
550+
MetaVarExprConcatElem::Ident(elem) => elem.as_str(),
551+
MetaVarExprConcatElem::Literal(elem) => elem.as_str(),
551552
MetaVarExprConcatElem::Var(ident) => {
552553
match matched_from_ident(dcx, *ident, tscx.interp)? {
553554
NamedMatch::MatchedSeq(named_matches) => {
@@ -557,16 +558,20 @@ fn metavar_expr_concat<'tx>(
557558
match &named_matches[*curr_idx] {
558559
// FIXME(c410-f3r) Nested repetitions are unimplemented
559560
MatchedSeq(_) => unimplemented!(),
560-
MatchedSingle(pnr) => extract_symbol_from_pnr(dcx, pnr, ident.span)?,
561+
MatchedSingle(pnr) => {
562+
tmp_sym = extract_symbol_from_pnr(tscx, pnr, ident.span)?;
563+
tmp_sym.as_str()
564+
}
561565
}
562566
}
563567
NamedMatch::MatchedSingle(pnr) => {
564-
extract_symbol_from_pnr(dcx, pnr, ident.span)?
568+
tmp_sym = extract_symbol_from_pnr(tscx, pnr, ident.span)?;
569+
tmp_sym.as_str()
565570
}
566571
}
567572
}
568573
};
569-
concatenated.push_str(symbol.as_str());
574+
concatenated.push_str(sym_str);
570575
}
571576
let symbol = nfc_normalize(&concatenated);
572577
let concatenated_span = tscx.visited_dspan(dspan);
@@ -900,11 +905,13 @@ fn out_of_bounds_err<'a>(dcx: DiagCtxtHandle<'a>, max: usize, span: Span, ty: &s
900905
}
901906

902907
/// Extracts an metavariable symbol that can be an identifier, a token tree or a literal.
903-
fn extract_symbol_from_pnr<'a>(
904-
dcx: DiagCtxtHandle<'a>,
908+
// TODO: use the same logic as for metavar_expr
909+
fn extract_symbol_from_pnr<'tx>(
910+
tscx: &mut TranscrCtx<'tx, '_>,
905911
pnr: &ParseNtResult,
906912
span_err: Span,
907-
) -> PResult<'a, Symbol> {
913+
) -> PResult<'tx, Symbol> {
914+
let dcx = tscx.psess.dcx();
908915
match pnr {
909916
ParseNtResult::Ident(nt_ident, is_raw) => {
910917
if let IdentIsRaw::Yes = is_raw {

0 commit comments

Comments
 (0)