Skip to content

Added error for invalid char cast #143678

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions compiler/rustc_lint/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ lint_invalid_asm_label_named = avoid using named labels in inline assembly
.help = only local labels of the form `<number>:` should be used in inline asm
.note = see the asm section of Rust By Example <https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels> for more information
lint_invalid_asm_label_no_span = the label may be declared in the expansion of a macro

lint_invalid_crate_type_value = invalid `crate_type` value
.suggestion = did you mean

Expand Down Expand Up @@ -790,6 +791,9 @@ lint_supertrait_as_deref_target = this `Deref` implementation is covered by an i
.label2 = target type is a supertrait of `{$self_ty}`
.help = consider removing this implementation or replacing it with a method instead

lint_surrogate_char_cast = surrogate values are not valid for `char`
.note = `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values

lint_suspicious_double_ref_clone =
using `.clone()` on a double reference, which returns `{$ty}` instead of cloning the inner type

Expand All @@ -799,6 +803,9 @@ lint_suspicious_double_ref_deref =
lint_symbol_intern_string_literal = using `Symbol::intern` on a string literal
.help = consider adding the symbol to `compiler/rustc_span/src/symbol.rs`

lint_too_large_char_cast = value exceeds maximum `char` value
.note = maximum valid `char` value is `0x10FFFF`

lint_trailing_semi_macro = trailing semicolon in macro used in expression position
.note1 = macro invocations at the end of a block are treated as expressions
.note2 = to ignore the value produced by the macro, add a semicolon after the invocation of `{$name}`
Expand Down
14 changes: 14 additions & 0 deletions compiler/rustc_lint/src/lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1746,6 +1746,20 @@ pub(crate) struct OverflowingLiteral<'a> {
pub lit: String,
}

#[derive(LintDiagnostic)]
#[diag(lint_surrogate_char_cast)]
#[note]
pub(crate) struct SurrogateCharCast {
pub literal: u128,
}

#[derive(LintDiagnostic)]
#[diag(lint_too_large_char_cast)]
#[note]
pub(crate) struct TooLargeCharCast {
pub literal: u128,
}

#[derive(LintDiagnostic)]
#[diag(lint_uses_power_alignment)]
pub(crate) struct UsesPowerAlignment;
Expand Down
42 changes: 32 additions & 10 deletions compiler/rustc_lint/src/types/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::context::LintContext;
use crate::lints::{
OnlyCastu8ToChar, OverflowingBinHex, OverflowingBinHexSign, OverflowingBinHexSignBitSub,
OverflowingBinHexSub, OverflowingInt, OverflowingIntHelp, OverflowingLiteral, OverflowingUInt,
RangeEndpointOutOfRange, UseInclusiveRange,
RangeEndpointOutOfRange, SurrogateCharCast, TooLargeCharCast, UseInclusiveRange,
};
use crate::types::{OVERFLOWING_LITERALS, TypeLimits};

Expand All @@ -38,12 +38,18 @@ fn lint_overflowing_range_endpoint<'tcx>(

// We only want to handle exclusive (`..`) ranges,
// which are represented as `ExprKind::Struct`.
let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else { return false };
let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else { return false };
let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else {
return false;
};
let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else {
return false;
};
if !is_range_literal(struct_expr) {
return false;
};
let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else { return false };
let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else {
return false;
};

// We can suggest using an inclusive range
// (`..=`) instead only if it is the `end` that is
Expand All @@ -61,7 +67,9 @@ fn lint_overflowing_range_endpoint<'tcx>(
};

let sub_sugg = if span.lo() == lit_span.lo() {
let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else { return false };
let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else {
return false;
};
UseInclusiveRange::WithoutParen {
sugg: struct_expr.span.shrink_to_lo().to(lit_span.shrink_to_hi()),
start,
Expand Down Expand Up @@ -316,11 +324,25 @@ fn lint_uint_literal<'tcx>(
match par_e.kind {
hir::ExprKind::Cast(..) => {
if let ty::Char = cx.typeck_results().expr_ty(par_e).kind() {
cx.emit_span_lint(
OVERFLOWING_LITERALS,
par_e.span,
OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
);
if lit_val > 0x10FFFF {
cx.emit_span_lint(
OVERFLOWING_LITERALS,
par_e.span,
TooLargeCharCast { literal: lit_val },
);
} else if (0xD800..=0xDFFF).contains(&lit_val) {
cx.emit_span_lint(
OVERFLOWING_LITERALS,
par_e.span,
SurrogateCharCast { literal: lit_val },
);
} else {
cx.emit_span_lint(
OVERFLOWING_LITERALS,
par_e.span,
OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
);
}
return;
}
}
Expand Down
56 changes: 52 additions & 4 deletions tests/ui/cast/cast-char.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,58 @@
#![deny(overflowing_literals)]

fn main() {
const XYZ: char = 0x1F888 as char;
// Valid cases - should suggest char literal

// u8 range (0-255)
const VALID_U8_1: char = 0x41 as char; // 'A'
const VALID_U8_2: char = 0xFF as char; // maximum u8
const VALID_U8_3: char = 0x00 as char; // minimum u8

// Valid Unicode in lower range [0x0, 0xD7FF]
const VALID_LOW_1: char = 0x1000 as char; // 4096
//~^ ERROR: only `u8` can be cast into `char`
const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
//~^ ERROR: only `u8` can be cast into `char`
const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
//~^ ERROR: only `u8` can be cast into `char`

// Valid Unicode in upper range [0xE000, 0x10FFFF]
const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
//~^ ERROR only `u8` can be cast into `char`
const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
//~^ ERROR only `u8` can be cast into `char`
const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
//~^ ERROR only `u8` can be cast into `char`
const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
//~^ ERROR only `u8` can be cast into `char`
const VALID_HIGH_5: char = 0x1F600 as char; // emoji
//~^ ERROR only `u8` can be cast into `char`

// Invalid cases - should show InvalidCharCast

// Surrogate range [0xD800, 0xDFFF] - reserved for UTF-16
const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
//~^ ERROR: surrogate values are not valid
const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
//~^ ERROR: surrogate values are not valid
const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
//~^ ERROR: surrogate values are not valid

// Too large values (> 0x10FFFF)
const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
//~^ ERROR: value exceeds maximum `char` value
const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
//~^ ERROR: value exceeds maximum `char` value
const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
//~^ ERROR: value exceeds maximum `char` value
const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
//~^ ERROR: value exceeds maximum `char` value

// Boundary cases
const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
//~^ ERROR only `u8` can be cast into `char`
const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
//~^ ERROR only `u8` can be cast into `char`
const XY: char = 129160 as char;
const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
//~^ ERROR only `u8` can be cast into `char`
const ZYX: char = '\u{01F888}';
println!("{}", XYZ);
}
124 changes: 117 additions & 7 deletions tests/ui/cast/cast-char.stderr
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:4:23
--> $DIR/cast-char.rs:12:31
|
LL | const XYZ: char = 0x1F888 as char;
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
LL | const VALID_LOW_1: char = 0x1000 as char; // 4096
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1000}'`
|
note: the lint level is defined here
--> $DIR/cast-char.rs:1:9
Expand All @@ -11,10 +11,120 @@ LL | #![deny(overflowing_literals)]
| ^^^^^^^^^^^^^^^^^^^^

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:6:22
--> $DIR/cast-char.rs:14:31
|
LL | const XY: char = 129160 as char;
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
LL | const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FF}'`

error: aborting due to 2 previous errors
error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:16:31
|
LL | const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{500}'`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:20:32
|
LL | const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E000}'`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:22:32
|
LL | const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:24:32
|
LL | const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
| ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFF}'`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:26:32
|
LL | const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{FFFD}'`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:28:32
|
LL | const VALID_HIGH_5: char = 0x1F600 as char; // emoji
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F600}'`

error: surrogate values are not valid for `char`
--> $DIR/cast-char.rs:34:39
|
LL | const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
| ^^^^^^^^^^^^^^
|
= note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values

error: surrogate values are not valid for `char`
--> $DIR/cast-char.rs:36:39
|
LL | const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
| ^^^^^^^^^^^^^^
|
= note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values

error: surrogate values are not valid for `char`
--> $DIR/cast-char.rs:38:39
|
LL | const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
| ^^^^^^^^^^^^^^
|
= note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values

error: value exceeds maximum `char` value
--> $DIR/cast-char.rs:42:37
|
LL | const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
| ^^^^^^^^^^^^^^^^
|
= note: maximum valid `char` value is `0x10FFFF`

error: value exceeds maximum `char` value
--> $DIR/cast-char.rs:44:37
|
LL | const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
| ^^^^^^^^^^^^^^^^
|
= note: maximum valid `char` value is `0x10FFFF`

error: value exceeds maximum `char` value
--> $DIR/cast-char.rs:46:37
|
LL | const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
| ^^^^^^^^^^^^^^^^
|
= note: maximum valid `char` value is `0x10FFFF`

error: value exceeds maximum `char` value
--> $DIR/cast-char.rs:48:37
|
LL | const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
| ^^^^^^^^^^^^^^^^
|
= note: maximum valid `char` value is `0x10FFFF`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:52:30
|
LL | const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FE}'`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:54:30
|
LL | const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E001}'`

error: only `u8` can be cast into `char`
--> $DIR/cast-char.rs:56:30
|
LL | const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
| ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFE}'`

error: aborting due to 18 previous errors

Loading