From fe35e08e9f96f9c829add20351b2fbf28faac324 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Sun, 17 Dec 2023 18:46:49 +0100 Subject: [PATCH] 8733: Suggest `str.lines` when splitting at hard-coded newlines Adds a new `splitting_strings_at_newlines` lint that suggests to use `str.lines` instead of splitting a trimmed string at hard-coded newlines. --- CHANGELOG.md | 1 + clippy_lints/src/declared_lints.rs | 1 + clippy_lints/src/methods/mod.rs | 35 +++++++ clippy_lints/src/methods/str_split.rs | 38 +++++++ tests/ui/str_split.fixed | 145 ++++++++++++++++++++++++++ tests/ui/str_split.rs | 145 ++++++++++++++++++++++++++ tests/ui/str_split.stderr | 65 ++++++++++++ 7 files changed, 430 insertions(+) create mode 100644 clippy_lints/src/methods/str_split.rs create mode 100644 tests/ui/str_split.fixed create mode 100644 tests/ui/str_split.rs create mode 100644 tests/ui/str_split.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index e4e068f19d4..b9e441029b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5583,6 +5583,7 @@ Released 2018-09-13 [`stable_sort_primitive`]: https://rust-lang.github.io/rust-clippy/master/index.html#stable_sort_primitive [`std_instead_of_alloc`]: https://rust-lang.github.io/rust-clippy/master/index.html#std_instead_of_alloc [`std_instead_of_core`]: https://rust-lang.github.io/rust-clippy/master/index.html#std_instead_of_core +[`str_split_at_newline`]: https://rust-lang.github.io/rust-clippy/master/index.html#str_split_at_newline [`str_to_string`]: https://rust-lang.github.io/rust-clippy/master/index.html#str_to_string [`string_add`]: https://rust-lang.github.io/rust-clippy/master/index.html#string_add [`string_add_assign`]: https://rust-lang.github.io/rust-clippy/master/index.html#string_add_assign diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index 9ab38dab1e5..02543ae912a 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -435,6 +435,7 @@ crate::methods::STABLE_SORT_PRIMITIVE_INFO, crate::methods::STRING_EXTEND_CHARS_INFO, crate::methods::STRING_LIT_CHARS_ANY_INFO, + crate::methods::STR_SPLIT_AT_NEWLINE_INFO, crate::methods::SUSPICIOUS_COMMAND_ARG_SPACE_INFO, crate::methods::SUSPICIOUS_MAP_INFO, crate::methods::SUSPICIOUS_SPLITN_INFO, diff --git a/clippy_lints/src/methods/mod.rs b/clippy_lints/src/methods/mod.rs index 0cde17ef5ad..711cad1dcf6 100644 --- a/clippy_lints/src/methods/mod.rs +++ b/clippy_lints/src/methods/mod.rs @@ -94,6 +94,7 @@ mod single_char_push_string; mod skip_while_next; mod stable_sort_primitive; +mod str_split; mod str_splitn; mod string_extend_chars; mod string_lit_chars_any; @@ -3856,6 +3857,36 @@ "using `.map(f).unwrap_or_default()`, which is more succinctly expressed as `is_some_and(f)` or `is_ok_and(f)`" } +declare_clippy_lint! { + /// ### What it does + /// + /// Checks for usages of `str.trim().split("\n")` and `str.trim().split("\r\n")`. + /// + /// ### Why is this bad? + /// + /// Hard-coding the line endings makes the code less compatible. `str.lines` should be used instead. + /// + /// ### Example + /// ```no_run + /// "some\ntext\nwith\nnewlines\n".trim().split('\n'); + /// ``` + /// Use instead: + /// ```no_run + /// "some\ntext\nwith\nnewlines\n".lines(); + /// ``` + /// + /// ### Known Problems + /// + /// This lint cannot detect if the split is intentionally restricted to a single type of newline (`"\n"` or + /// `"\r\n"`), for example during the parsing of a specific file format in which precisely one newline type is + /// valid. + /// ``` + #[clippy::version = "1.76.0"] + pub STR_SPLIT_AT_NEWLINE, + pedantic, + "splitting a trimmed string at hard-coded newlines" +} + pub struct Methods { avoid_breaking_exported_api: bool, msrv: Msrv, @@ -4011,6 +4042,7 @@ pub fn new( ITER_FILTER_IS_SOME, ITER_FILTER_IS_OK, MANUAL_IS_VARIANT_AND, + STR_SPLIT_AT_NEWLINE, ]); /// Extracts a method call name, args, and `Span` of the method name. @@ -4597,6 +4629,9 @@ fn check_methods<'tcx>(&self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>) { ("sort_unstable_by", [arg]) => { unnecessary_sort_by::check(cx, expr, recv, arg, true); }, + ("split", [arg]) => { + str_split::check(cx, expr, recv, arg); + }, ("splitn" | "rsplitn", [count_arg, pat_arg]) => { if let Some(Constant::Int(count)) = constant(cx, cx.typeck_results(), count_arg) { suspicious_splitn::check(cx, name, expr, recv, count); diff --git a/clippy_lints/src/methods/str_split.rs b/clippy_lints/src/methods/str_split.rs new file mode 100644 index 00000000000..3586e11f56a --- /dev/null +++ b/clippy_lints/src/methods/str_split.rs @@ -0,0 +1,38 @@ +use clippy_utils::diagnostics::span_lint_and_sugg; +use clippy_utils::source::snippet_with_context; +use clippy_utils::visitors::is_const_evaluatable; +use rustc_ast::ast::LitKind; +use rustc_errors::Applicability; +use rustc_hir::{Expr, ExprKind}; +use rustc_lint::LateContext; + +use super::STR_SPLIT_AT_NEWLINE; + +pub(super) fn check<'a>(cx: &LateContext<'a>, expr: &'_ Expr<'_>, split_recv: &'a Expr<'_>, split_arg: &'_ Expr<'_>) { + // We're looking for `A.trim().split(B)`, where the adjusted type of `A` is `&str` (e.g. an + // expression returning `String`), and `B` is a `Pattern` that hard-codes a newline (either `"\n"` + // or `"\r\n"`). There are a lot of ways to specify a pattern, and this lint only checks the most + // basic ones: a `'\n'`, `"\n"`, and `"\r\n"`. + if let ExprKind::MethodCall(trim_method_name, trim_recv, [], _) = split_recv.kind + && trim_method_name.ident.as_str() == "trim" + && cx.typeck_results().expr_ty_adjusted(trim_recv).peel_refs().is_str() + && !is_const_evaluatable(cx, trim_recv) + && let ExprKind::Lit(split_lit) = split_arg.kind + && (matches!(split_lit.node, LitKind::Char('\n')) + || matches!(split_lit.node, LitKind::Str(sym, _) if (sym.as_str() == "\n" || sym.as_str() == "\r\n"))) + { + let mut app = Applicability::MaybeIncorrect; + span_lint_and_sugg( + cx, + STR_SPLIT_AT_NEWLINE, + expr.span, + "using `str.trim().split()` with hard-coded newlines", + "use `str.lines()` instead", + format!( + "{}.lines()", + snippet_with_context(cx, trim_recv.span, expr.span.ctxt(), "..", &mut app).0 + ), + app, + ); + } +} diff --git a/tests/ui/str_split.fixed b/tests/ui/str_split.fixed new file mode 100644 index 00000000000..4f33241da7a --- /dev/null +++ b/tests/ui/str_split.fixed @@ -0,0 +1,145 @@ +#![warn(clippy::str_split_at_newline)] + +use core::str::Split; +use std::ops::Deref; + +struct NotStr<'a> { + s: &'a str, +} + +impl<'a> NotStr<'a> { + fn trim(&'a self) -> &'a str { + self.s + } +} + +struct DerefsIntoNotStr<'a> { + not_str: &'a NotStr<'a>, +} + +impl<'a> Deref for DerefsIntoNotStr<'a> { + type Target = NotStr<'a>; + + fn deref(&self) -> &Self::Target { + self.not_str + } +} + +struct DerefsIntoStr<'a> { + s: &'a str, +} + +impl<'a> Deref for DerefsIntoStr<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.s + } +} + +macro_rules! trim_split { + ( $x:expr, $y:expr ) => { + $x.trim().split($y); + }; +} + +macro_rules! make_str { + ( $x: expr ) => { + format!("x={}", $x) + }; +} + +fn main() { + let s1 = "hello\nworld\n"; + let s2 = s1.to_owned(); + + // CASES THAT SHOULD EMIT A LINT + + // Splitting a `str` variable at "\n" or "\r\n" after trimming should warn + let _ = s1.lines(); + #[allow(clippy::single_char_pattern)] + let _ = s1.lines(); + let _ = s1.lines(); + + // Splitting a `String` variable at "\n" or "\r\n" after trimming should warn + let _ = s2.lines(); + #[allow(clippy::single_char_pattern)] + let _ = s2.lines(); + let _ = s2.lines(); + + // Splitting a variable that derefs into `str` at "\n" or "\r\n" after trimming should warn. + let s3 = DerefsIntoStr { s: s1 }; + let _ = s3.lines(); + #[allow(clippy::single_char_pattern)] + let _ = s3.lines(); + let _ = s3.lines(); + + // If the `&str` is generated by a macro then the macro should not be expanded in the suggested fix. + let _ = make_str!(s1).lines(); + + // CASES THAT SHOULD NOT EMIT A LINT + + // Splitting a `str` constant at "\n" or "\r\n" after trimming should not warn + let _ = "hello\nworld\n".trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = "hello\nworld\n".trim().split("\n"); + let _ = "hello\nworld\n".trim().split("\r\n"); + + // Splitting a `str` variable at "\n" or "\r\n" without trimming should not warn, since it is not + // equivalent + let _ = s1.split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s1.split("\n"); + let _ = s1.split("\r\n"); + + // Splitting a `String` variable at "\n" or "\r\n" without trimming should not warn. + let _ = s2.split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s2.split("\n"); + + // Splitting a variable that derefs into `str` at "\n" or "\r\n" without trimming should not warn. + let _ = s3.split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s3.split("\n"); + let _ = s3.split("\r\n"); + let _ = s2.split("\r\n"); + + // Splitting a `str` variable at other separators should not warn + let _ = s1.trim().split('\r'); + #[allow(clippy::single_char_pattern)] + let _ = s1.trim().split("\r"); + let _ = s1.trim().split("\n\r"); + let _ = s1.trim().split("\r \n"); + + // Splitting a `String` variable at other separators should not warn + let _ = s2.trim().split('\r'); + #[allow(clippy::single_char_pattern)] + let _ = s2.trim().split("\r"); + let _ = s2.trim().split("\n\r"); + + // Splitting a variable that derefs into `str` at other separators should not warn + let _ = s3.trim().split('\r'); + #[allow(clippy::single_char_pattern)] + let _ = s3.trim().split("\r"); + let _ = s3.trim().split("\n\r"); + let _ = s3.trim().split("\r \n"); + let _ = s2.trim().split("\r \n"); + + // Using `trim` and `split` on other types should not warn + let not_str = NotStr { s: s1 }; + let _ = not_str.trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = not_str.trim().split("\n"); + let _ = not_str.trim().split("\r\n"); + let derefs_into_not_str = DerefsIntoNotStr { not_str: ¬_str }; + let _ = derefs_into_not_str.trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = derefs_into_not_str.trim().split("\n"); + let _ = derefs_into_not_str.trim().split("\r\n"); + + // Code generated by macros should not create a warning + trim_split!(s1, "\r\n"); + trim_split!("hello\nworld\n", "\r\n"); + trim_split!(s2, "\r\n"); + trim_split!(s3, "\r\n"); +} diff --git a/tests/ui/str_split.rs b/tests/ui/str_split.rs new file mode 100644 index 00000000000..f24caa61c30 --- /dev/null +++ b/tests/ui/str_split.rs @@ -0,0 +1,145 @@ +#![warn(clippy::str_split_at_newline)] + +use core::str::Split; +use std::ops::Deref; + +struct NotStr<'a> { + s: &'a str, +} + +impl<'a> NotStr<'a> { + fn trim(&'a self) -> &'a str { + self.s + } +} + +struct DerefsIntoNotStr<'a> { + not_str: &'a NotStr<'a>, +} + +impl<'a> Deref for DerefsIntoNotStr<'a> { + type Target = NotStr<'a>; + + fn deref(&self) -> &Self::Target { + self.not_str + } +} + +struct DerefsIntoStr<'a> { + s: &'a str, +} + +impl<'a> Deref for DerefsIntoStr<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.s + } +} + +macro_rules! trim_split { + ( $x:expr, $y:expr ) => { + $x.trim().split($y); + }; +} + +macro_rules! make_str { + ( $x: expr ) => { + format!("x={}", $x) + }; +} + +fn main() { + let s1 = "hello\nworld\n"; + let s2 = s1.to_owned(); + + // CASES THAT SHOULD EMIT A LINT + + // Splitting a `str` variable at "\n" or "\r\n" after trimming should warn + let _ = s1.trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s1.trim().split("\n"); + let _ = s1.trim().split("\r\n"); + + // Splitting a `String` variable at "\n" or "\r\n" after trimming should warn + let _ = s2.trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s2.trim().split("\n"); + let _ = s2.trim().split("\r\n"); + + // Splitting a variable that derefs into `str` at "\n" or "\r\n" after trimming should warn. + let s3 = DerefsIntoStr { s: s1 }; + let _ = s3.trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s3.trim().split("\n"); + let _ = s3.trim().split("\r\n"); + + // If the `&str` is generated by a macro then the macro should not be expanded in the suggested fix. + let _ = make_str!(s1).trim().split('\n'); + + // CASES THAT SHOULD NOT EMIT A LINT + + // Splitting a `str` constant at "\n" or "\r\n" after trimming should not warn + let _ = "hello\nworld\n".trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = "hello\nworld\n".trim().split("\n"); + let _ = "hello\nworld\n".trim().split("\r\n"); + + // Splitting a `str` variable at "\n" or "\r\n" without trimming should not warn, since it is not + // equivalent + let _ = s1.split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s1.split("\n"); + let _ = s1.split("\r\n"); + + // Splitting a `String` variable at "\n" or "\r\n" without trimming should not warn. + let _ = s2.split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s2.split("\n"); + + // Splitting a variable that derefs into `str` at "\n" or "\r\n" without trimming should not warn. + let _ = s3.split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = s3.split("\n"); + let _ = s3.split("\r\n"); + let _ = s2.split("\r\n"); + + // Splitting a `str` variable at other separators should not warn + let _ = s1.trim().split('\r'); + #[allow(clippy::single_char_pattern)] + let _ = s1.trim().split("\r"); + let _ = s1.trim().split("\n\r"); + let _ = s1.trim().split("\r \n"); + + // Splitting a `String` variable at other separators should not warn + let _ = s2.trim().split('\r'); + #[allow(clippy::single_char_pattern)] + let _ = s2.trim().split("\r"); + let _ = s2.trim().split("\n\r"); + + // Splitting a variable that derefs into `str` at other separators should not warn + let _ = s3.trim().split('\r'); + #[allow(clippy::single_char_pattern)] + let _ = s3.trim().split("\r"); + let _ = s3.trim().split("\n\r"); + let _ = s3.trim().split("\r \n"); + let _ = s2.trim().split("\r \n"); + + // Using `trim` and `split` on other types should not warn + let not_str = NotStr { s: s1 }; + let _ = not_str.trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = not_str.trim().split("\n"); + let _ = not_str.trim().split("\r\n"); + let derefs_into_not_str = DerefsIntoNotStr { not_str: ¬_str }; + let _ = derefs_into_not_str.trim().split('\n'); + #[allow(clippy::single_char_pattern)] + let _ = derefs_into_not_str.trim().split("\n"); + let _ = derefs_into_not_str.trim().split("\r\n"); + + // Code generated by macros should not create a warning + trim_split!(s1, "\r\n"); + trim_split!("hello\nworld\n", "\r\n"); + trim_split!(s2, "\r\n"); + trim_split!(s3, "\r\n"); +} diff --git a/tests/ui/str_split.stderr b/tests/ui/str_split.stderr new file mode 100644 index 00000000000..ee0a9653711 --- /dev/null +++ b/tests/ui/str_split.stderr @@ -0,0 +1,65 @@ +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:59:13 + | +LL | let _ = s1.trim().split('\n'); + | ^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s1.lines()` + | + = note: `-D clippy::str-split-at-newline` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::str_split_at_newline)]` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:61:13 + | +LL | let _ = s1.trim().split("\n"); + | ^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s1.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:62:13 + | +LL | let _ = s1.trim().split("\r\n"); + | ^^^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s1.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:65:13 + | +LL | let _ = s2.trim().split('\n'); + | ^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s2.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:67:13 + | +LL | let _ = s2.trim().split("\n"); + | ^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s2.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:68:13 + | +LL | let _ = s2.trim().split("\r\n"); + | ^^^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s2.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:72:13 + | +LL | let _ = s3.trim().split('\n'); + | ^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s3.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:74:13 + | +LL | let _ = s3.trim().split("\n"); + | ^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s3.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:75:13 + | +LL | let _ = s3.trim().split("\r\n"); + | ^^^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `s3.lines()` + +error: using `str.trim().split()` with hard-coded newlines + --> $DIR/str_split.rs:78:13 + | +LL | let _ = make_str!(s1).trim().split('\n'); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: use `str.lines()` instead: `make_str!(s1).lines()` + +error: aborting due to 10 previous errors +