diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index 35edb458478..e707ac41a05 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -304,6 +304,10 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re lint_improper_ctypes_union_layout_reason = this union has unspecified layout lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive +# FIXME: we should ordinalize $valid_up_to when we add support for doing so +lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error + .label = the literal was valid UTF-8 up to the {$valid_up_to} bytes + # FIXME: we should ordinalize $valid_up_to when we add support for doing so lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior .label = the literal was valid UTF-8 up to the {$valid_up_to} bytes diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs index 2118deba5c7..3291286ad67 100644 --- a/compiler/rustc_lint/src/invalid_from_utf8.rs +++ b/compiler/rustc_lint/src/invalid_from_utf8.rs @@ -5,7 +5,7 @@ use rustc_hir::{Expr, ExprKind}; use rustc_span::source_map::Spanned; use rustc_span::sym; -use crate::lints::InvalidFromUtf8UncheckedDiag; +use crate::lints::InvalidFromUtf8Diag; use crate::{LateContext, LateLintPass, LintContext}; declare_lint! { @@ -33,7 +33,30 @@ declare_lint! { "using a non UTF-8 literal in `std::str::from_utf8_unchecked`" } -declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]); +declare_lint! { + /// The `invalid_from_utf8` lint checks for calls to + /// `std::str::from_utf8` and `std::str::from_utf8_mut` + /// with an invalid UTF-8 literal. + /// + /// ### Example + /// + /// ```rust + /// # #[allow(unused)] + /// std::str::from_utf8(b"Ru\x82st"); + /// ``` + /// + /// {{produces}} + /// + /// ### Explanation + /// + /// Trying to create such a `str` would always return an error as per documentation + /// for `std::str::from_utf8` and `std::str::from_utf8_mut`. + pub INVALID_FROM_UTF8, + Warn, + "using a non UTF-8 literal in `std::str::from_utf8`" +} + +declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]); impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 { fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) { @@ -41,15 +64,25 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 { && let ExprKind::Path(ref qpath) = path.kind && let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id() && let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id) - && [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item) + && [sym::str_from_utf8, sym::str_from_utf8_mut, + sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item) { let lint = |utf8_error: Utf8Error| { + let label = arg.span; let method = diag_item.as_str().strip_prefix("str_").unwrap(); - cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag { - method: format!("std::str::{method}"), - valid_up_to: utf8_error.valid_up_to(), - label: arg.span, - }) + let method = format!("std::str::{method}"); + let valid_up_to = utf8_error.valid_up_to(); + let is_unchecked_variant = diag_item.as_str().contains("unchecked"); + + cx.emit_spanned_lint( + if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 }, + expr.span, + if is_unchecked_variant { + InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label } + } else { + InvalidFromUtf8Diag::Checked { method, valid_up_to, label } + } + ) }; match &arg.kind { diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index 5969bc5ca5a..dcfef84f550 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -701,12 +701,21 @@ pub struct ForgetCopyDiag<'a> { // invalid_from_utf8.rs #[derive(LintDiagnostic)] -#[diag(lint_invalid_from_utf8_unchecked)] -pub struct InvalidFromUtf8UncheckedDiag { - pub method: String, - pub valid_up_to: usize, - #[label] - pub label: Span, +pub enum InvalidFromUtf8Diag { + #[diag(lint_invalid_from_utf8_unchecked)] + Unchecked { + method: String, + valid_up_to: usize, + #[label] + label: Span, + }, + #[diag(lint_invalid_from_utf8_checked)] + Checked { + method: String, + valid_up_to: usize, + #[label] + label: Span, + }, } // hidden_unicode_codepoints.rs diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 4fc73c4ae86..1185563ea80 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1454,6 +1454,8 @@ symbols! { stop_after_dataflow, store, str, + str_from_utf8, + str_from_utf8_mut, str_from_utf8_unchecked, str_from_utf8_unchecked_mut, str_split_whitespace, diff --git a/library/core/src/str/converts.rs b/library/core/src/str/converts.rs index 12fac0567cb..0f23cf7ae23 100644 --- a/library/core/src/str/converts.rs +++ b/library/core/src/str/converts.rs @@ -84,6 +84,7 @@ use super::Utf8Error; #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")] #[rustc_allow_const_fn_unstable(str_internals)] +#[rustc_diagnostic_item = "str_from_utf8"] pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { // FIXME: This should use `?` again, once it's `const` match run_utf8_validation(v) { @@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { /// errors that can be returned. #[stable(feature = "str_mut_extras", since = "1.20.0")] #[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")] +#[rustc_diagnostic_item = "str_from_utf8_mut"] pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { // This should use `?` again, once it's `const` match run_utf8_validation(v) { diff --git a/tests/ui/lint/invalid_from_utf8.rs b/tests/ui/lint/invalid_from_utf8.rs index 4a27c659c73..9c8c636812e 100644 --- a/tests/ui/lint/invalid_from_utf8.rs +++ b/tests/ui/lint/invalid_from_utf8.rs @@ -2,6 +2,7 @@ #![feature(concat_bytes)] #![warn(invalid_from_utf8_unchecked)] +#![warn(invalid_from_utf8)] pub fn from_utf8_unchecked_mut() { // Valid @@ -46,4 +47,47 @@ pub fn from_utf8_unchecked() { } } +pub fn from_utf8_mut() { + // Valid + { + std::str::from_utf8_mut(&mut [99, 108, 105, 112, 112, 121]); + std::str::from_utf8_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']); + + let x = 0xa0; + std::str::from_utf8_mut(&mut [0xc0, x]); + } + + // Invalid + { + std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]); + //~^ WARN calls to `std::str::from_utf8_mut` + std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + //~^ WARN calls to `std::str::from_utf8_mut` + } +} + +pub fn from_utf8() { + // Valid + { + std::str::from_utf8(&[99, 108, 105, 112, 112, 121]); + std::str::from_utf8(&[b'c', b'l', b'i', b'p', b'p', b'y']); + std::str::from_utf8(b"clippy"); + + let x = 0xA0; + std::str::from_utf8(&[0xC0, x]); + } + + // Invalid + { + std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]); + //~^ WARN calls to `std::str::from_utf8` + std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + //~^ WARN calls to `std::str::from_utf8` + std::str::from_utf8(b"cl\x82ippy"); + //~^ WARN calls to `std::str::from_utf8` + std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy")); + //~^ WARN calls to `std::str::from_utf8` + } +} + fn main() {} diff --git a/tests/ui/lint/invalid_from_utf8.stderr b/tests/ui/lint/invalid_from_utf8.stderr index 63cd906237d..8e00d3bf872 100644 --- a/tests/ui/lint/invalid_from_utf8.stderr +++ b/tests/ui/lint/invalid_from_utf8.stderr @@ -1,5 +1,5 @@ warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:18:9 + --> $DIR/invalid_from_utf8.rs:19:9 | LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^ @@ -13,7 +13,7 @@ LL | #![warn(invalid_from_utf8_unchecked)] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:20:9 + --> $DIR/invalid_from_utf8.rs:21:9 | LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^ @@ -21,7 +21,7 @@ LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:38:9 + --> $DIR/invalid_from_utf8.rs:39:9 | LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^ @@ -29,7 +29,7 @@ LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:40:9 + --> $DIR/invalid_from_utf8.rs:41:9 | LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^ @@ -37,7 +37,7 @@ LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b' | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:42:9 + --> $DIR/invalid_from_utf8.rs:43:9 | LL | std::str::from_utf8_unchecked(b"cl\x82ippy"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^ @@ -45,12 +45,66 @@ LL | std::str::from_utf8_unchecked(b"cl\x82ippy"); | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:44:9 + --> $DIR/invalid_from_utf8.rs:45:9 | LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy")); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^ | | | the literal was valid UTF-8 up to the 2 bytes -warning: 6 warnings emitted +warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:62:9 + | +LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]); + | ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + | +note: the lint level is defined here + --> $DIR/invalid_from_utf8.rs:5:9 + | +LL | #![warn(invalid_from_utf8)] + | ^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:64:9 + | +LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + | ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:82:9 + | +LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]); + | ^^^^^^^^^^^^^^^^^^^^-----------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:84:9 + | +LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + | ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:86:9 + | +LL | std::str::from_utf8(b"cl\x82ippy"); + | ^^^^^^^^^^^^^^^^^^^^-------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:88:9 + | +LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy")); + | ^^^^^^^^^^^^^^^^^^^^---------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: 12 warnings emitted