diff --git a/CHANGELOG.md b/CHANGELOG.md index ec204cc064e..e2b0069279b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3147,6 +3147,7 @@ Released 2018-09-13 [`string_extend_chars`]: https://rust-lang.github.io/rust-clippy/master/index.html#string_extend_chars [`string_from_utf8_as_bytes`]: https://rust-lang.github.io/rust-clippy/master/index.html#string_from_utf8_as_bytes [`string_lit_as_bytes`]: https://rust-lang.github.io/rust-clippy/master/index.html#string_lit_as_bytes +[`string_slice`]: https://rust-lang.github.io/rust-clippy/master/index.html#string_slice [`string_to_string`]: https://rust-lang.github.io/rust-clippy/master/index.html#string_to_string [`strlen_on_c_strings`]: https://rust-lang.github.io/rust-clippy/master/index.html#strlen_on_c_strings [`struct_excessive_bools`]: https://rust-lang.github.io/rust-clippy/master/index.html#struct_excessive_bools diff --git a/clippy_lints/src/lib.register_lints.rs b/clippy_lints/src/lib.register_lints.rs index e8dd3708c8e..ff0a0f97397 100644 --- a/clippy_lints/src/lib.register_lints.rs +++ b/clippy_lints/src/lib.register_lints.rs @@ -431,6 +431,7 @@ strings::STRING_ADD_ASSIGN, strings::STRING_FROM_UTF8_AS_BYTES, strings::STRING_LIT_AS_BYTES, + strings::STRING_SLICE, strings::STRING_TO_STRING, strings::STR_TO_STRING, strlen_on_c_strings::STRLEN_ON_C_STRINGS, diff --git a/clippy_lints/src/lib.register_restriction.rs b/clippy_lints/src/lib.register_restriction.rs index 3d68a6e9009..4929bbecde0 100644 --- a/clippy_lints/src/lib.register_restriction.rs +++ b/clippy_lints/src/lib.register_restriction.rs @@ -53,6 +53,7 @@ LintId::of(shadow::SHADOW_SAME), LintId::of(shadow::SHADOW_UNRELATED), LintId::of(strings::STRING_ADD), + LintId::of(strings::STRING_SLICE), LintId::of(strings::STRING_TO_STRING), LintId::of(strings::STR_TO_STRING), LintId::of(types::RC_BUFFER), diff --git a/clippy_lints/src/strings.rs b/clippy_lints/src/strings.rs index 35b6bde5696..6435107b8b4 100644 --- a/clippy_lints/src/strings.rs +++ b/clippy_lints/src/strings.rs @@ -107,51 +107,87 @@ "calling `as_bytes` on a string literal instead of using a byte string literal" } -declare_lint_pass!(StringAdd => [STRING_ADD, STRING_ADD_ASSIGN]); +declare_clippy_lint! { + /// ### What it does + /// Checks for slice operations on strings + /// + /// ### Why is this bad? + /// UTF-8 characters span multiple bytes, and it is easy to inadvertently confuse character + /// counts and string indices. This may lead to panics, and should warrant some test cases + /// containing wide UTF-8 characters. This lint is most useful in code that should avoid + /// panics at all costs. + /// + /// ### Known problems + /// Probably lots of false positives. If an index comes from a known valid position (e.g. + /// obtained via `char_indices` over the same string), it is totally OK. + /// + /// # Example + /// ```rust,should_panic + /// &"Ölkanne"[1..]; + /// ``` + pub STRING_SLICE, + restriction, + "slicing a string" +} + +declare_lint_pass!(StringAdd => [STRING_ADD, STRING_ADD_ASSIGN, STRING_SLICE]); impl<'tcx> LateLintPass<'tcx> for StringAdd { fn check_expr(&mut self, cx: &LateContext<'tcx>, e: &'tcx Expr<'_>) { if in_external_macro(cx.sess(), e.span) { return; } - - if let ExprKind::Binary( - Spanned { - node: BinOpKind::Add, .. - }, - left, - _, - ) = e.kind - { - if is_string(cx, left) { - if !is_lint_allowed(cx, STRING_ADD_ASSIGN, e.hir_id) { - let parent = get_parent_expr(cx, e); - if let Some(p) = parent { - if let ExprKind::Assign(target, _, _) = p.kind { - // avoid duplicate matches - if SpanlessEq::new(cx).eq_expr(target, left) { - return; + match e.kind { + ExprKind::Binary( + Spanned { + node: BinOpKind::Add, .. + }, + left, + _, + ) => { + if is_string(cx, left) { + if !is_lint_allowed(cx, STRING_ADD_ASSIGN, e.hir_id) { + let parent = get_parent_expr(cx, e); + if let Some(p) = parent { + if let ExprKind::Assign(target, _, _) = p.kind { + // avoid duplicate matches + if SpanlessEq::new(cx).eq_expr(target, left) { + return; + } } } } + span_lint( + cx, + STRING_ADD, + e.span, + "you added something to a string. Consider using `String::push_str()` instead", + ); } - span_lint( - cx, - STRING_ADD, - e.span, - "you added something to a string. Consider using `String::push_str()` instead", - ); - } - } else if let ExprKind::Assign(target, src, _) = e.kind { - if is_string(cx, target) && is_add(cx, src, target) { - span_lint( - cx, - STRING_ADD_ASSIGN, - e.span, - "you assigned the result of adding something to this string. Consider using \ - `String::push_str()` instead", - ); - } + }, + ExprKind::Assign(target, src, _) => { + if is_string(cx, target) && is_add(cx, src, target) { + span_lint( + cx, + STRING_ADD_ASSIGN, + e.span, + "you assigned the result of adding something to this string. Consider using \ + `String::push_str()` instead", + ); + } + }, + ExprKind::Index(target, _idx) => { + let e_ty = cx.typeck_results().expr_ty(target).peel_refs(); + if matches!(e_ty.kind(), ty::Str) || is_type_diagnostic_item(cx, e_ty, sym::String) { + span_lint( + cx, + STRING_SLICE, + e.span, + "indexing into a string may panic if the index is within a UTF-8 character", + ); + } + }, + _ => {}, } } } diff --git a/tests/ui/string_slice.rs b/tests/ui/string_slice.rs new file mode 100644 index 00000000000..be4dfc8816c --- /dev/null +++ b/tests/ui/string_slice.rs @@ -0,0 +1,10 @@ +#[warn(clippy::string_slice)] +#[allow(clippy::no_effect)] + +fn main() { + &"Ölkanne"[1..]; + let m = "Mötörhead"; + &m[2..5]; + let s = String::from(m); + &s[0..2]; +} diff --git a/tests/ui/string_slice.stderr b/tests/ui/string_slice.stderr new file mode 100644 index 00000000000..55040bf5df2 --- /dev/null +++ b/tests/ui/string_slice.stderr @@ -0,0 +1,22 @@ +error: indexing into a string may panic if the index is within a UTF-8 character + --> $DIR/string_slice.rs:5:6 + | +LL | &"Ölkanne"[1..]; + | ^^^^^^^^^^^^^^ + | + = note: `-D clippy::string-slice` implied by `-D warnings` + +error: indexing into a string may panic if the index is within a UTF-8 character + --> $DIR/string_slice.rs:7:6 + | +LL | &m[2..5]; + | ^^^^^^^ + +error: indexing into a string may panic if the index is within a UTF-8 character + --> $DIR/string_slice.rs:9:6 + | +LL | &s[0..2]; + | ^^^^^^^ + +error: aborting due to 3 previous errors +