From f156d3bc5720603b8ce7a6a33bbfd464f9ca2e84 Mon Sep 17 00:00:00 2001 From: Urgau Date: Sat, 26 Aug 2023 22:09:18 +0200 Subject: [PATCH] Improve invalid UTF-8 lint by finding the expression initializer --- compiler/rustc_lint/src/invalid_from_utf8.rs | 25 ++-- tests/ui/lint/invalid_from_utf8.rs | 27 ++++ tests/ui/lint/invalid_from_utf8.stderr | 147 +++++++++++++------ 3 files changed, 143 insertions(+), 56 deletions(-) diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs index 3291286ad67..1841e7c85a8 100644 --- a/compiler/rustc_lint/src/invalid_from_utf8.rs +++ b/compiler/rustc_lint/src/invalid_from_utf8.rs @@ -1,6 +1,6 @@ use std::str::Utf8Error; -use rustc_ast::{BorrowKind, LitKind}; +use rustc_ast::LitKind; use rustc_hir::{Expr, ExprKind}; use rustc_span::source_map::Spanned; use rustc_span::sym; @@ -11,7 +11,7 @@ declare_lint! { /// The `invalid_from_utf8_unchecked` lint checks for calls to /// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut` - /// with an invalid UTF-8 literal. + /// with a known invalid UTF-8 value. /// /// ### Example /// @@ -36,7 +36,7 @@ declare_lint! { /// The `invalid_from_utf8` lint checks for calls to /// `std::str::from_utf8` and `std::str::from_utf8_mut` - /// with an invalid UTF-8 literal. + /// with a known invalid UTF-8 value. /// /// ### Example /// @@ -67,8 +67,7 @@ fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) { && [sym::str_from_utf8, sym::str_from_utf8_mut, sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item) { - let lint = |utf8_error: Utf8Error| { - let label = arg.span; + let lint = |label, utf8_error: Utf8Error| { let method = diag_item.as_str().strip_prefix("str_").unwrap(); let method = format!("std::str::{method}"); let valid_up_to = utf8_error.valid_up_to(); @@ -78,22 +77,26 @@ fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) { if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 }, expr.span, if is_unchecked_variant { - InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label } + InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label } } else { - InvalidFromUtf8Diag::Checked { method, valid_up_to, label } + InvalidFromUtf8Diag::Checked { method, valid_up_to, label } } ) }; - match &arg.kind { + let mut init = cx.expr_or_init(arg); + while let ExprKind::AddrOf(.., inner) = init.kind { + init = cx.expr_or_init(inner); + } + match init.kind { ExprKind::Lit(Spanned { node: lit, .. }) => { if let LitKind::ByteStr(bytes, _) = &lit && let Err(utf8_error) = std::str::from_utf8(bytes) { - lint(utf8_error); + lint(init.span, utf8_error); } }, - ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => { + ExprKind::Array(args) => { let elements = args.iter().map(|e|{ match &e.kind { ExprKind::Lit(Spanned { node: lit, .. }) => match lit { @@ -108,7 +111,7 @@ fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) { if let Some(elements) = elements && let Err(utf8_error) = std::str::from_utf8(&elements) { - lint(utf8_error); + lint(init.span, utf8_error); } } _ => {} diff --git a/tests/ui/lint/invalid_from_utf8.rs b/tests/ui/lint/invalid_from_utf8.rs index 9c8c636812e..43ceffb71e5 100644 --- a/tests/ui/lint/invalid_from_utf8.rs +++ b/tests/ui/lint/invalid_from_utf8.rs @@ -1,6 +1,8 @@ // check-pass +#![feature(inline_const)] #![feature(concat_bytes)] + #![warn(invalid_from_utf8_unchecked)] #![warn(invalid_from_utf8)] @@ -90,4 +92,29 @@ pub fn from_utf8() { } } +pub fn from_utf8_with_indirections() { + let mut a = [99, 108, 130, 105, 112, 112, 121]; + std::str::from_utf8_mut(&mut a); + //~^ WARN calls to `std::str::from_utf8_mut` + let mut b = &mut a; + let mut c = b; + std::str::from_utf8_mut(c); + //~^ WARN calls to `std::str::from_utf8_mut` + let mut c = &[99, 108, 130, 105, 112, 112, 121]; + std::str::from_utf8(c); + //~^ WARN calls to `std::str::from_utf8` + const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121]; + std::str::from_utf8(&INVALID_1); + //~^ WARN calls to `std::str::from_utf8` + static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121]; + std::str::from_utf8(&INVALID_2); + //~^ WARN calls to `std::str::from_utf8` + const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121]; + std::str::from_utf8(INVALID_3); + //~^ WARN calls to `std::str::from_utf8` + const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] }; + std::str::from_utf8(INVALID_4); + //~^ WARN calls to `std::str::from_utf8` +} + fn main() {} diff --git a/tests/ui/lint/invalid_from_utf8.stderr b/tests/ui/lint/invalid_from_utf8.stderr index 8e00d3bf872..884165d4f12 100644 --- a/tests/ui/lint/invalid_from_utf8.stderr +++ b/tests/ui/lint/invalid_from_utf8.stderr @@ -1,51 +1,51 @@ warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:19:9 + --> $DIR/invalid_from_utf8.rs:21:9 | LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes | note: the lint level is defined here - --> $DIR/invalid_from_utf8.rs:4:9 + --> $DIR/invalid_from_utf8.rs:6:9 | LL | #![warn(invalid_from_utf8_unchecked)] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:21:9 + --> $DIR/invalid_from_utf8.rs:23:9 | LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes - -warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:39:9 - | -LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior --> $DIR/invalid_from_utf8.rs:41:9 | -LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior --> $DIR/invalid_from_utf8.rs:43:9 | +LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior + --> $DIR/invalid_from_utf8.rs:45:9 + | LL | std::str::from_utf8_unchecked(b"cl\x82ippy"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^ | | | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior - --> $DIR/invalid_from_utf8.rs:45:9 + --> $DIR/invalid_from_utf8.rs:47:9 | LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy")); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^ @@ -53,58 +53,115 @@ LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy")); | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error - --> $DIR/invalid_from_utf8.rs:62:9 + --> $DIR/invalid_from_utf8.rs:64:9 | LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]); - | ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes | note: the lint level is defined here - --> $DIR/invalid_from_utf8.rs:5:9 + --> $DIR/invalid_from_utf8.rs:7:9 | LL | #![warn(invalid_from_utf8)] | ^^^^^^^^^^^^^^^^^ warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error - --> $DIR/invalid_from_utf8.rs:64:9 + --> $DIR/invalid_from_utf8.rs:66:9 | LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); - | ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes - -warning: calls to `std::str::from_utf8` with a invalid literal always return an error - --> $DIR/invalid_from_utf8.rs:82:9 - | -LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]); - | ^^^^^^^^^^^^^^^^^^^^-----------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8` with a invalid literal always return an error --> $DIR/invalid_from_utf8.rs:84:9 | -LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); - | ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^ - | | - | the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]); + | ^^^^^^^^^^^^^^^^^^^^^----------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8` with a invalid literal always return an error --> $DIR/invalid_from_utf8.rs:86:9 | +LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + | ^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:88:9 + | LL | std::str::from_utf8(b"cl\x82ippy"); | ^^^^^^^^^^^^^^^^^^^^-------------^ | | | the literal was valid UTF-8 up to the 2 bytes warning: calls to `std::str::from_utf8` with a invalid literal always return an error - --> $DIR/invalid_from_utf8.rs:88:9 + --> $DIR/invalid_from_utf8.rs:90:9 | LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy")); | ^^^^^^^^^^^^^^^^^^^^---------------------------------^ | | | the literal was valid UTF-8 up to the 2 bytes -warning: 12 warnings emitted +warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:97:5 + | +LL | let mut a = [99, 108, 130, 105, 112, 112, 121]; + | ---------------------------------- the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8_mut(&mut a); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:101:5 + | +LL | let mut a = [99, 108, 130, 105, 112, 112, 121]; + | ---------------------------------- the literal was valid UTF-8 up to the 2 bytes +... +LL | std::str::from_utf8_mut(c); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:104:5 + | +LL | let mut c = &[99, 108, 130, 105, 112, 112, 121]; + | ---------------------------------- the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8(c); + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:107:5 + | +LL | const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121]; + | ---------------------------------- the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8(&INVALID_1); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:110:5 + | +LL | static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121]; + | ---------------------------------- the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8(&INVALID_2); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:113:5 + | +LL | const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121]; + | ---------------------------------- the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8(INVALID_3); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8` with a invalid literal always return an error + --> $DIR/invalid_from_utf8.rs:116:5 + | +LL | const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] }; + | ---------------------------------- the literal was valid UTF-8 up to the 2 bytes +LL | std::str::from_utf8(INVALID_4); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: 19 warnings emitted