Skip to content

Commit 7b17511

Browse files
committed
added error for invalid char cast
1 parent f838cbc commit 7b17511

File tree

5 files changed

+222
-21
lines changed

5 files changed

+222
-21
lines changed

compiler/rustc_lint/messages.ftl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ lint_invalid_asm_label_named = avoid using named labels in inline assembly
440440
.help = only local labels of the form `<number>:` should be used in inline asm
441441
.note = see the asm section of Rust By Example <https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels> for more information
442442
lint_invalid_asm_label_no_span = the label may be declared in the expansion of a macro
443+
443444
lint_invalid_crate_type_value = invalid `crate_type` value
444445
.suggestion = did you mean
445446
@@ -790,6 +791,9 @@ lint_supertrait_as_deref_target = this `Deref` implementation is covered by an i
790791
.label2 = target type is a supertrait of `{$self_ty}`
791792
.help = consider removing this implementation or replacing it with a method instead
792793
794+
lint_surrogate_char_cast = surrogate values are not valid for `char`
795+
.note = surrogate code points [0xD800, 0xDFFF] are reserved for UTF-16 and cannot be used in Rust `char`
796+
793797
lint_suspicious_double_ref_clone =
794798
using `.clone()` on a double reference, which returns `{$ty}` instead of cloning the inner type
795799
@@ -799,6 +803,9 @@ lint_suspicious_double_ref_deref =
799803
lint_symbol_intern_string_literal = using `Symbol::intern` on a string literal
800804
.help = consider adding the symbol to `compiler/rustc_span/src/symbol.rs`
801805
806+
lint_too_large_char_cast = value exceeds maximum `char` value
807+
.note = maximum valid `char` value is 0x10FFFF
808+
802809
lint_trailing_semi_macro = trailing semicolon in macro used in expression position
803810
.note1 = macro invocations at the end of a block are treated as expressions
804811
.note2 = to ignore the value produced by the macro, add a semicolon after the invocation of `{$name}`

compiler/rustc_lint/src/lints.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,20 @@ pub(crate) struct OverflowingLiteral<'a> {
17461746
pub lit: String,
17471747
}
17481748

1749+
#[derive(LintDiagnostic)]
1750+
#[diag(lint_surrogate_char_cast)]
1751+
#[note]
1752+
pub(crate) struct SurrogateCharCast {
1753+
pub literal: u128,
1754+
}
1755+
1756+
#[derive(LintDiagnostic)]
1757+
#[diag(lint_too_large_char_cast)]
1758+
#[note]
1759+
pub(crate) struct TooLargeCharCast {
1760+
pub literal: u128,
1761+
}
1762+
17491763
#[derive(LintDiagnostic)]
17501764
#[diag(lint_uses_power_alignment)]
17511765
pub(crate) struct UsesPowerAlignment;

compiler/rustc_lint/src/types/literal.rs

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use crate::context::LintContext;
1212
use crate::lints::{
1313
OnlyCastu8ToChar, OverflowingBinHex, OverflowingBinHexSign, OverflowingBinHexSignBitSub,
1414
OverflowingBinHexSub, OverflowingInt, OverflowingIntHelp, OverflowingLiteral, OverflowingUInt,
15-
RangeEndpointOutOfRange, UseInclusiveRange,
15+
RangeEndpointOutOfRange, SurrogateCharCast, TooLargeCharCast, UseInclusiveRange,
1616
};
1717
use crate::types::{OVERFLOWING_LITERALS, TypeLimits};
1818

@@ -38,12 +38,18 @@ fn lint_overflowing_range_endpoint<'tcx>(
3838

3939
// We only want to handle exclusive (`..`) ranges,
4040
// which are represented as `ExprKind::Struct`.
41-
let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else { return false };
42-
let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else { return false };
41+
let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else {
42+
return false;
43+
};
44+
let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else {
45+
return false;
46+
};
4347
if !is_range_literal(struct_expr) {
4448
return false;
4549
};
46-
let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else { return false };
50+
let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else {
51+
return false;
52+
};
4753

4854
// We can suggest using an inclusive range
4955
// (`..=`) instead only if it is the `end` that is
@@ -61,7 +67,9 @@ fn lint_overflowing_range_endpoint<'tcx>(
6167
};
6268

6369
let sub_sugg = if span.lo() == lit_span.lo() {
64-
let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else { return false };
70+
let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else {
71+
return false;
72+
};
6573
UseInclusiveRange::WithoutParen {
6674
sugg: struct_expr.span.shrink_to_lo().to(lit_span.shrink_to_hi()),
6775
start,
@@ -316,11 +324,25 @@ fn lint_uint_literal<'tcx>(
316324
match par_e.kind {
317325
hir::ExprKind::Cast(..) => {
318326
if let ty::Char = cx.typeck_results().expr_ty(par_e).kind() {
319-
cx.emit_span_lint(
320-
OVERFLOWING_LITERALS,
321-
par_e.span,
322-
OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
323-
);
327+
if lit_val <= 0x10FFFF && !(0xD800..=0xDFFF).contains(&lit_val) {
328+
cx.emit_span_lint(
329+
OVERFLOWING_LITERALS,
330+
par_e.span,
331+
OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
332+
);
333+
} else if (0xD800..=0xDFFF).contains(&lit_val) {
334+
cx.emit_span_lint(
335+
OVERFLOWING_LITERALS,
336+
par_e.span,
337+
SurrogateCharCast { literal: lit_val },
338+
);
339+
} else {
340+
cx.emit_span_lint(
341+
OVERFLOWING_LITERALS,
342+
par_e.span,
343+
TooLargeCharCast { literal: lit_val },
344+
);
345+
}
324346
return;
325347
}
326348
}

tests/ui/cast/cast-char.rs

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,58 @@
11
#![deny(overflowing_literals)]
22

33
fn main() {
4-
const XYZ: char = 0x1F888 as char;
4+
// Valid cases - should suggest char literal
5+
6+
// u8 range (0-255)
7+
const VALID_U8_1: char = 0x41 as char; // 'A'
8+
const VALID_U8_2: char = 0xFF as char; // maximum u8
9+
const VALID_U8_3: char = 0x00 as char; // minimum u8
10+
11+
// Valid Unicode in lower range [0x0, 0xD7FF]
12+
const VALID_LOW_1: char = 0x1000 as char; // 4096
13+
//~^ ERROR: only `u8` can be cast into `char`
14+
const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
15+
//~^ ERROR: only `u8` can be cast into `char`
16+
const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
17+
//~^ ERROR: only `u8` can be cast into `char`
18+
19+
// Valid Unicode in upper range [0xE000, 0x10FFFF]
20+
const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
21+
//~^ ERROR only `u8` can be cast into `char`
22+
const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
23+
//~^ ERROR only `u8` can be cast into `char`
24+
const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
25+
//~^ ERROR only `u8` can be cast into `char`
26+
const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
27+
//~^ ERROR only `u8` can be cast into `char`
28+
const VALID_HIGH_5: char = 0x1F600 as char; // emoji
29+
//~^ ERROR only `u8` can be cast into `char`
30+
31+
// Invalid cases - should show InvalidCharCast
32+
33+
// Surrogate range [0xD800, 0xDFFF] - reserved for UTF-16
34+
const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
35+
//~^ ERROR: surrogate values are not valid
36+
const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
37+
//~^ ERROR: surrogate values are not valid
38+
const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
39+
//~^ ERROR: surrogate values are not valid
40+
41+
// Too large values (> 0x10FFFF)
42+
const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
43+
//~^ ERROR: value exceeds maximum `char` value
44+
const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
45+
//~^ ERROR: value exceeds maximum `char` value
46+
const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
47+
//~^ ERROR: value exceeds maximum `char` value
48+
const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
49+
//~^ ERROR: value exceeds maximum `char` value
50+
51+
// Boundary cases
52+
const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
53+
//~^ ERROR only `u8` can be cast into `char`
54+
const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
555
//~^ ERROR only `u8` can be cast into `char`
6-
const XY: char = 129160 as char;
56+
const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
757
//~^ ERROR only `u8` can be cast into `char`
8-
const ZYX: char = '\u{01F888}';
9-
println!("{}", XYZ);
1058
}

tests/ui/cast/cast-char.stderr

Lines changed: 117 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
error: only `u8` can be cast into `char`
2-
--> $DIR/cast-char.rs:4:23
2+
--> $DIR/cast-char.rs:12:31
33
|
4-
LL | const XYZ: char = 0x1F888 as char;
5-
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
4+
LL | const VALID_LOW_1: char = 0x1000 as char; // 4096
5+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1000}'`
66
|
77
note: the lint level is defined here
88
--> $DIR/cast-char.rs:1:9
@@ -11,10 +11,120 @@ LL | #![deny(overflowing_literals)]
1111
| ^^^^^^^^^^^^^^^^^^^^
1212

1313
error: only `u8` can be cast into `char`
14-
--> $DIR/cast-char.rs:6:22
14+
--> $DIR/cast-char.rs:14:31
1515
|
16-
LL | const XY: char = 129160 as char;
17-
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
16+
LL | const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
17+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FF}'`
1818

19-
error: aborting due to 2 previous errors
19+
error: only `u8` can be cast into `char`
20+
--> $DIR/cast-char.rs:16:31
21+
|
22+
LL | const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
23+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{500}'`
24+
25+
error: only `u8` can be cast into `char`
26+
--> $DIR/cast-char.rs:20:32
27+
|
28+
LL | const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
29+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E000}'`
30+
31+
error: only `u8` can be cast into `char`
32+
--> $DIR/cast-char.rs:22:32
33+
|
34+
LL | const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
35+
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
36+
37+
error: only `u8` can be cast into `char`
38+
--> $DIR/cast-char.rs:24:32
39+
|
40+
LL | const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
41+
| ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFF}'`
42+
43+
error: only `u8` can be cast into `char`
44+
--> $DIR/cast-char.rs:26:32
45+
|
46+
LL | const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
47+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{FFFD}'`
48+
49+
error: only `u8` can be cast into `char`
50+
--> $DIR/cast-char.rs:28:32
51+
|
52+
LL | const VALID_HIGH_5: char = 0x1F600 as char; // emoji
53+
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F600}'`
54+
55+
error: surrogate values are not valid for `char`
56+
--> $DIR/cast-char.rs:34:39
57+
|
58+
LL | const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
59+
| ^^^^^^^^^^^^^^
60+
|
61+
= note: surrogate code points [0xD800, 0xDFFF] are reserved for UTF-16 and cannot be used in Rust `char`
62+
63+
error: surrogate values are not valid for `char`
64+
--> $DIR/cast-char.rs:36:39
65+
|
66+
LL | const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
67+
| ^^^^^^^^^^^^^^
68+
|
69+
= note: surrogate code points [0xD800, 0xDFFF] are reserved for UTF-16 and cannot be used in Rust `char`
70+
71+
error: surrogate values are not valid for `char`
72+
--> $DIR/cast-char.rs:38:39
73+
|
74+
LL | const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
75+
| ^^^^^^^^^^^^^^
76+
|
77+
= note: surrogate code points [0xD800, 0xDFFF] are reserved for UTF-16 and cannot be used in Rust `char`
78+
79+
error: value exceeds maximum `char` value
80+
--> $DIR/cast-char.rs:42:37
81+
|
82+
LL | const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
83+
| ^^^^^^^^^^^^^^^^
84+
|
85+
= note: maximum valid `char` value is 0x10FFFF
86+
87+
error: value exceeds maximum `char` value
88+
--> $DIR/cast-char.rs:44:37
89+
|
90+
LL | const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
91+
| ^^^^^^^^^^^^^^^^
92+
|
93+
= note: maximum valid `char` value is 0x10FFFF
94+
95+
error: value exceeds maximum `char` value
96+
--> $DIR/cast-char.rs:46:37
97+
|
98+
LL | const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
99+
| ^^^^^^^^^^^^^^^^
100+
|
101+
= note: maximum valid `char` value is 0x10FFFF
102+
103+
error: value exceeds maximum `char` value
104+
--> $DIR/cast-char.rs:48:37
105+
|
106+
LL | const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
107+
| ^^^^^^^^^^^^^^^^
108+
|
109+
= note: maximum valid `char` value is 0x10FFFF
110+
111+
error: only `u8` can be cast into `char`
112+
--> $DIR/cast-char.rs:52:30
113+
|
114+
LL | const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
115+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FE}'`
116+
117+
error: only `u8` can be cast into `char`
118+
--> $DIR/cast-char.rs:54:30
119+
|
120+
LL | const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
121+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E001}'`
122+
123+
error: only `u8` can be cast into `char`
124+
--> $DIR/cast-char.rs:56:30
125+
|
126+
LL | const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
127+
| ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFE}'`
128+
129+
error: aborting due to 18 previous errors
20130

0 commit comments

Comments
 (0)