Auto merge of #6105 - bugadani:sus-char, r=ebroto
Lint for invisible Unicode characters other than ZWSP This PR extends the existing `zero_width_space` lint to look for other invisible characters as well (in this case, `\\u{ad}` soft hyphen. I feel like this lint is the logical place to add the check, but I also realize the lint name is not particularly flexible, but I also understand that it shouldn't be renamed for compatibility reasons. Open questions: - What other characters should trigger the lint? - What should be done with the lint name? - How to indicate the change in functionality? Motivation behind this PR: https://github.com/rust-lang/rust/issues/77417 - I managed to shoot myself in the foot by an invisible character pasted into my test case. changelog: rename [`zero_width_space`] to [`invisible_characters`] and add SHY and WJ to the list.
This commit is contained in:
commit
9408c68c1d
@ -1647,6 +1647,7 @@ Released 2018-09-13
|
||||
[`invalid_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_ref
|
||||
[`invalid_regex`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_regex
|
||||
[`invalid_upcast_comparisons`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_upcast_comparisons
|
||||
[`invisible_characters`]: https://rust-lang.github.io/rust-clippy/master/index.html#invisible_characters
|
||||
[`items_after_statements`]: https://rust-lang.github.io/rust-clippy/master/index.html#items_after_statements
|
||||
[`iter_cloned_collect`]: https://rust-lang.github.io/rust-clippy/master/index.html#iter_cloned_collect
|
||||
[`iter_next_loop`]: https://rust-lang.github.io/rust-clippy/master/index.html#iter_next_loop
|
||||
@ -1922,6 +1923,5 @@ Released 2018-09-13
|
||||
[`zero_divided_by_zero`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_divided_by_zero
|
||||
[`zero_prefixed_literal`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_prefixed_literal
|
||||
[`zero_ptr`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_ptr
|
||||
[`zero_width_space`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_width_space
|
||||
[`zst_offset`]: https://rust-lang.github.io/rust-clippy/master/index.html#zst_offset
|
||||
<!-- end autogenerated links to lint list -->
|
||||
|
@ -854,9 +854,9 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
|
||||
&types::UNIT_CMP,
|
||||
&types::UNNECESSARY_CAST,
|
||||
&types::VEC_BOX,
|
||||
&unicode::INVISIBLE_CHARACTERS,
|
||||
&unicode::NON_ASCII_LITERAL,
|
||||
&unicode::UNICODE_NOT_NFC,
|
||||
&unicode::ZERO_WIDTH_SPACE,
|
||||
&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD,
|
||||
&unnamed_address::FN_ADDRESS_COMPARISONS,
|
||||
&unnamed_address::VTABLE_ADDRESS_COMPARISONS,
|
||||
@ -1511,7 +1511,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
|
||||
LintId::of(&types::UNIT_CMP),
|
||||
LintId::of(&types::UNNECESSARY_CAST),
|
||||
LintId::of(&types::VEC_BOX),
|
||||
LintId::of(&unicode::ZERO_WIDTH_SPACE),
|
||||
LintId::of(&unicode::INVISIBLE_CHARACTERS),
|
||||
LintId::of(&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD),
|
||||
LintId::of(&unnamed_address::FN_ADDRESS_COMPARISONS),
|
||||
LintId::of(&unnamed_address::VTABLE_ADDRESS_COMPARISONS),
|
||||
@ -1779,7 +1779,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
|
||||
LintId::of(&types::ABSURD_EXTREME_COMPARISONS),
|
||||
LintId::of(&types::CAST_REF_TO_MUT),
|
||||
LintId::of(&types::UNIT_CMP),
|
||||
LintId::of(&unicode::ZERO_WIDTH_SPACE),
|
||||
LintId::of(&unicode::INVISIBLE_CHARACTERS),
|
||||
LintId::of(&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD),
|
||||
LintId::of(&unnamed_address::FN_ADDRESS_COMPARISONS),
|
||||
LintId::of(&unnamed_address::VTABLE_ADDRESS_COMPARISONS),
|
||||
@ -1910,6 +1910,7 @@ pub fn register_renamed(ls: &mut rustc_lint::LintStore) {
|
||||
ls.register_renamed("clippy::for_loop_over_option", "clippy::for_loops_over_fallibles");
|
||||
ls.register_renamed("clippy::for_loop_over_result", "clippy::for_loops_over_fallibles");
|
||||
ls.register_renamed("clippy::identity_conversion", "clippy::useless_conversion");
|
||||
ls.register_renamed("clippy::zero_width_space", "clippy::invisible_characters");
|
||||
}
|
||||
|
||||
// only exists to let the dogfood integration test works.
|
||||
|
@ -8,18 +8,18 @@
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
declare_clippy_lint! {
|
||||
/// **What it does:** Checks for the Unicode zero-width space in the code.
|
||||
/// **What it does:** Checks for invisible Unicode characters in the code.
|
||||
///
|
||||
/// **Why is this bad?** Having an invisible character in the code makes for all
|
||||
/// sorts of April fools, but otherwise is very much frowned upon.
|
||||
///
|
||||
/// **Known problems:** None.
|
||||
///
|
||||
/// **Example:** You don't see it, but there may be a zero-width space
|
||||
/// somewhere in this text.
|
||||
pub ZERO_WIDTH_SPACE,
|
||||
/// **Example:** You don't see it, but there may be a zero-width space or soft hyphen
|
||||
/// somewhere in this text.
|
||||
pub INVISIBLE_CHARACTERS,
|
||||
correctness,
|
||||
"using a zero-width space in a string literal, which is confusing"
|
||||
"using an invisible character in a string literal, which is confusing"
|
||||
}
|
||||
|
||||
declare_clippy_lint! {
|
||||
@ -63,7 +63,7 @@
|
||||
"using a Unicode literal not in NFC normal form (see [Unicode tr15](http://www.unicode.org/reports/tr15/) for further information)"
|
||||
}
|
||||
|
||||
declare_lint_pass!(Unicode => [ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC]);
|
||||
declare_lint_pass!(Unicode => [INVISIBLE_CHARACTERS, NON_ASCII_LITERAL, UNICODE_NOT_NFC]);
|
||||
|
||||
impl LateLintPass<'_> for Unicode {
|
||||
fn check_expr(&mut self, cx: &LateContext<'_>, expr: &'_ Expr<'_>) {
|
||||
@ -91,14 +91,17 @@ fn escape<T: Iterator<Item = char>>(s: T) -> String {
|
||||
|
||||
fn check_str(cx: &LateContext<'_>, span: Span, id: HirId) {
|
||||
let string = snippet(cx, span, "");
|
||||
if string.contains('\u{200B}') {
|
||||
if string.chars().any(|c| ['\u{200B}', '\u{ad}', '\u{2060}'].contains(&c)) {
|
||||
span_lint_and_sugg(
|
||||
cx,
|
||||
ZERO_WIDTH_SPACE,
|
||||
INVISIBLE_CHARACTERS,
|
||||
span,
|
||||
"zero-width space detected",
|
||||
"invisible character detected",
|
||||
"consider replacing the string with",
|
||||
string.replace("\u{200B}", "\\u{200B}"),
|
||||
string
|
||||
.replace("\u{200B}", "\\u{200B}")
|
||||
.replace("\u{ad}", "\\u{AD}")
|
||||
.replace("\u{2060}", "\\u{2060}"),
|
||||
Applicability::MachineApplicable,
|
||||
);
|
||||
}
|
||||
|
@ -969,6 +969,13 @@
|
||||
deprecation: None,
|
||||
module: "types",
|
||||
},
|
||||
Lint {
|
||||
name: "invisible_characters",
|
||||
group: "correctness",
|
||||
desc: "using an invisible character in a string literal, which is confusing",
|
||||
deprecation: None,
|
||||
module: "unicode",
|
||||
},
|
||||
Lint {
|
||||
name: "items_after_statements",
|
||||
group: "pedantic",
|
||||
@ -2810,13 +2817,6 @@
|
||||
deprecation: None,
|
||||
module: "misc",
|
||||
},
|
||||
Lint {
|
||||
name: "zero_width_space",
|
||||
group: "correctness",
|
||||
desc: "using a zero-width space in a string literal, which is confusing",
|
||||
deprecation: None,
|
||||
module: "unicode",
|
||||
},
|
||||
Lint {
|
||||
name: "zst_offset",
|
||||
group: "correctness",
|
||||
|
@ -1,7 +1,11 @@
|
||||
#[warn(clippy::zero_width_space)]
|
||||
#[warn(clippy::invisible_characters)]
|
||||
fn zero() {
|
||||
print!("Here >< is a ZWS, and another");
|
||||
print!("This\u{200B}is\u{200B}fine");
|
||||
print!("Here >< is a SHY, and another");
|
||||
print!("This\u{ad}is\u{ad}fine");
|
||||
print!("Here >< is a WJ, and another");
|
||||
print!("This\u{2060}is\u{2060}fine");
|
||||
}
|
||||
|
||||
#[warn(clippy::unicode_not_nfc)]
|
||||
|
@ -1,13 +1,25 @@
|
||||
error: zero-width space detected
|
||||
error: invisible character detected
|
||||
--> $DIR/unicode.rs:3:12
|
||||
|
|
||||
LL | print!("Here >< is a ZWS, and another");
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{200B}< is a ZWS, and /u{200B}another"`
|
||||
|
|
||||
= note: `-D clippy::zero-width-space` implied by `-D warnings`
|
||||
= note: `-D clippy::invisible-characters` implied by `-D warnings`
|
||||
|
||||
error: invisible character detected
|
||||
--> $DIR/unicode.rs:5:12
|
||||
|
|
||||
LL | print!("Here >< is a SHY, and another");
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{AD}< is a SHY, and /u{AD}another"`
|
||||
|
||||
error: invisible character detected
|
||||
--> $DIR/unicode.rs:7:12
|
||||
|
|
||||
LL | print!("Here >< is a WJ, and another");
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{2060}< is a WJ, and /u{2060}another"`
|
||||
|
||||
error: non-NFC Unicode sequence detected
|
||||
--> $DIR/unicode.rs:9:12
|
||||
--> $DIR/unicode.rs:13:12
|
||||
|
|
||||
LL | print!("̀àh?");
|
||||
| ^^^^^ help: consider replacing the string with: `"̀àh?"`
|
||||
@ -15,12 +27,12 @@ LL | print!("̀àh?");
|
||||
= note: `-D clippy::unicode-not-nfc` implied by `-D warnings`
|
||||
|
||||
error: literal non-ASCII character detected
|
||||
--> $DIR/unicode.rs:15:12
|
||||
--> $DIR/unicode.rs:19:12
|
||||
|
|
||||
LL | print!("Üben!");
|
||||
| ^^^^^^^ help: consider replacing the string with: `"/u{dc}ben!"`
|
||||
|
|
||||
= note: `-D clippy::non-ascii-literal` implied by `-D warnings`
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
error: aborting due to 5 previous errors
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user