rust/clippy_lints/src/unicode.rs

use crate::utils::{is_allowed, snippet, span_lint_and_sugg};
use rustc::hir::*;
use rustc::lint::{LateContext, LateLintPass, LintArray, LintPass};
use rustc::{declare_lint_pass, declare_tool_lint};
use rustc_errors::Applicability;
use syntax::ast::LitKind;
use syntax::source_map::Span;
use unicode_normalization::UnicodeNormalization;

declare_clippy_lint! {
    /// **What it does:** Checks for the Unicode zero-width space in the code.
    ///
    /// **Why is this bad?** Having an invisible character in the code makes for all
    /// sorts of April fools, but otherwise is very much frowned upon.
    ///
    /// **Known problems:** None.
    ///
    /// **Example:** You don't see it, but there may be a zero-width space
    /// somewhere in this text.
    pub ZERO_WIDTH_SPACE,
    correctness,
    "using a zero-width space in a string literal, which is confusing"
}

declare_clippy_lint! {
    /// **What it does:** Checks for non-ASCII characters in string literals.
    ///
    /// **Why is this bad?** Yeah, we know, the 90's called and wanted their charset
    /// back. Even so, there still are editors and other programs out there that
    /// don't work well with Unicode. So if the code is meant to be used
    /// internationally, on multiple operating systems, or has other portability
    /// requirements, activating this lint could be useful.
    ///
    /// **Known problems:** None.
    ///
    /// **Example:**
    /// ```rust
    /// let x = String::from("€");
    /// ```
    /// Could be written as:
    /// ```rust
    /// let x = String::from("\u{20ac}");
    /// ```
    pub NON_ASCII_LITERAL,
    pedantic,
    "using any literal non-ASCII chars in a string literal instead of using the `\\u` escape"
}

declare_clippy_lint! {
    /// **What it does:** Checks for string literals that contain Unicode in a form
    /// that is not equal to its
    /// [NFC-recomposition](http://www.unicode.org/reports/tr15/#Norm_Forms).
    ///
    /// **Why is this bad?** If such a string is compared to another, the results
    /// may be surprising.
    ///
    /// **Known problems** None.
    ///
    /// **Example:** You may not see it, but “à” and “à” aren't the same string. The
    /// former when escaped is actually `"a\u{300}"` while the latter is `"\u{e0}"`.
    pub UNICODE_NOT_NFC,
    pedantic,
    "using a unicode literal not in NFC normal form (see [unicode tr15](http://www.unicode.org/reports/tr15/) for further information)"
}

declare_lint_pass!(Unicode => [ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC]);

impl<'a, 'tcx> LateLintPass<'a, 'tcx> for Unicode {
    fn check_expr(&mut self, cx: &LateContext<'a, 'tcx>, expr: &'tcx Expr) {
        if let ExprKind::Lit(ref lit) = expr.node {
            if let LitKind::Str(_, _) = lit.node {
                check_str(cx, lit.span, expr.hir_id)
            }
        }
    }
}

fn escape<T: Iterator<Item = char>>(s: T) -> String {
    let mut result = String::new();
    for c in s {
        if c as u32 > 0x7F {
            for d in c.escape_unicode() {
                result.push(d)
            }
        } else {
            result.push(c);
        }
    }
    result
}

fn check_str(cx: &LateContext<'_, '_>, span: Span, id: HirId) {
    let string = snippet(cx, span, "");
    if string.contains('\u{200B}') {
        span_lint_and_sugg(
            cx,
            ZERO_WIDTH_SPACE,
            span,
            "zero-width space detected",
            "consider replacing the string with",
            string.replace("\u{200B}", "\\u{200B}"),
            Applicability::MachineApplicable,
        );
    }
    if string.chars().any(|c| c as u32 > 0x7F) {
        span_lint_and_sugg(
            cx,
            NON_ASCII_LITERAL,
            span,
            "literal non-ASCII character detected",
            "consider replacing the string with",
            if is_allowed(cx, UNICODE_NOT_NFC, id) {
                escape(string.chars())
            } else {
                escape(string.nfc())
            },
            Applicability::MachineApplicable,
        );
    }
    if is_allowed(cx, NON_ASCII_LITERAL, id) && string.chars().zip(string.nfc()).any(|(a, b)| a != b) {
        span_lint_and_sugg(
            cx,
            UNICODE_NOT_NFC,
            span,
            "non-nfc unicode sequence detected",
            "consider replacing the string with",
            string.nfc().collect::<String>(),
            Applicability::MachineApplicable,
        );
    }
}
Fix tests and make other ascii lints auto-fixable 2019-05-20 09:02:50 -05:00			`use crate::utils::{is_allowed, snippet, span_lint_and_sugg};`
Remove crate:: prefixes from crate paths This is somewhat misleading, as those are actually external crates, and don't need a crate:: prefix. 2018-12-29 09:04:45 -06:00			`use rustc::hir::*;`
			`use rustc::lint::{LateContext, LateLintPass, LintArray, LintPass};`
Use lint pass macros Fixes #3917. 2019-04-08 15:43:55 -05:00			`use rustc::{declare_lint_pass, declare_tool_lint};`
Make non_ascii_literal auto-fixable 2019-05-20 08:23:38 -05:00			`use rustc_errors::Applicability;`
HirIdify some lints 2019-02-24 12:43:15 -06:00			`use syntax::ast::LitKind;`
Remove crate:: prefixes from crate paths This is somewhat misleading, as those are actually external crates, and don't need a crate:: prefix. 2018-12-29 09:04:45 -06:00			`use syntax::source_map::Span;`
Run rustfmt on clippy_lints 2018-11-27 14:14:15 -06:00			`use unicode_normalization::UnicodeNormalization;`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00
Categorize all the lints! 2018-03-28 08:24:26 -05:00			`declare_clippy_lint! {`
move lint documentation into macro invocations 2019-03-05 10:50:33 -06:00			`/// What it does: Checks for the Unicode zero-width space in the code.`
			`///`
			`/// Why is this bad? Having an invisible character in the code makes for all`
			`/// sorts of April fools, but otherwise is very much frowned upon.`
			`///`
			`/// Known problems: None.`
			`///`
			`/// Example: You don't see it, but there may be a zero-width space`
			`/// somewhere in this text.`
Make lint descriptions short and to the point; always fitting the column "triggers on". 2016-08-06 03:18:36 -05:00			`pub ZERO_WIDTH_SPACE,`
Categorize all the lints! 2018-03-28 08:24:26 -05:00			`correctness,`
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`"using a zero-width space in a string literal, which is confusing"`
			`}`

Categorize all the lints! 2018-03-28 08:24:26 -05:00			`declare_clippy_lint! {`
move lint documentation into macro invocations 2019-03-05 10:50:33 -06:00			`/// What it does: Checks for non-ASCII characters in string literals.`
			`///`
			`/// Why is this bad? Yeah, we know, the 90's called and wanted their charset`
			`/// back. Even so, there still are editors and other programs out there that`
			`/// don't work well with Unicode. So if the code is meant to be used`
			`/// internationally, on multiple operating systems, or has other portability`
			`/// requirements, activating this lint could be useful.`
			`///`
			`/// Known problems: None.`
			`///`
			`/// Example:`
			/// ```rust
Improve verbosity of non_ascii_literal lint example 2019-05-20 08:08:53 -05:00			`/// let x = String::from("€");`
			/// ```
			`/// Could be written as:`
			/// ```rust
			`/// let x = String::from("\u{20ac}");`
move lint documentation into macro invocations 2019-03-05 10:50:33 -06:00			/// ```
Fix some formatting issues 2018-11-27 14:49:09 -06:00			`pub NON_ASCII_LITERAL,`
			`pedantic,`
			"using any literal non-ASCII chars in a string literal instead of using the `\\u` escape"
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`}`

Categorize all the lints! 2018-03-28 08:24:26 -05:00			`declare_clippy_lint! {`
move lint documentation into macro invocations 2019-03-05 10:50:33 -06:00			`/// What it does: Checks for string literals that contain Unicode in a form`
			`/// that is not equal to its`
			`/// [NFC-recomposition](http://www.unicode.org/reports/tr15/#Norm_Forms).`
			`///`
			`/// Why is this bad? If such a string is compared to another, the results`
			`/// may be surprising.`
			`///`
			`/// Known problems None.`
			`///`
			`/// Example: You may not see it, but “à” and “à” aren't the same string. The`
			/// former when escaped is actually `"a\u{300}"` while the latter is `"\u{e0}"`.
Fix some formatting issues 2018-11-27 14:49:09 -06:00			`pub UNICODE_NOT_NFC,`
			`pedantic,`
			`"using a unicode literal not in NFC normal form (see [unicode tr15](http://www.unicode.org/reports/tr15/) for further information)"`
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`}`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00
Use lint pass macros Fixes #3917. 2019-04-08 15:43:55 -05:00			`declare_lint_pass!(Unicode => [ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC]);`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00
update to the rust-PR that unblocks clippy 2016-12-07 06:13:40 -06:00			`impl<'a, 'tcx> LateLintPass<'a, 'tcx> for Unicode {`
			`fn check_expr(&mut self, cx: &LateContext<'a, 'tcx>, expr: &'tcx Expr) {`
ExprKind 2018-07-12 02:30:57 -05:00			`if let ExprKind::Lit(ref lit) = expr.node {`
fix nightly breakage 2016-02-12 11:35:44 -06:00			`if let LitKind::Str(_, _) = lit.node {`
HirIdify some lints 2019-02-24 12:43:15 -06:00			`check_str(cx, lit.span, expr.hir_id)`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00			`}`
			`}`
			`}`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`}`

fmt clippy 2016-01-03 22:26:12 -06:00			`fn escape<T: Iterator<Item = char>>(s: T) -> String {`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`let mut result = String::new();`
			`for c in s {`
unicode: add lint against non-ascii chars in literals (Allow by default), #85 2015-08-12 13:36:10 -05:00			`if c as u32 > 0x7F {`
fmt clippy 2016-01-03 22:26:12 -06:00			`for d in c.escape_unicode() {`
			`result.push(d)`
			`}`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`} else {`
			`result.push(c);`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00			`}`
			`}`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`result`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`}`

HirIdify some lints 2019-02-24 12:43:15 -06:00			`fn check_str(cx: &LateContext<'_, '_>, span: Span, id: HirId) {`
fixed dogfood by using snippet instead of the (escaped) literal string 2015-09-04 07:24:49 -05:00			`let string = snippet(cx, span, "");`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`if string.contains('\u{200B}') {`
Fix tests and make other ascii lints auto-fixable 2019-05-20 09:02:50 -05:00			`span_lint_and_sugg(`
Rustfmt 2017-08-09 02:30:56 -05:00			`cx,`
			`ZERO_WIDTH_SPACE,`
			`span,`
			`"zero-width space detected",`
Fix tests and make other ascii lints auto-fixable 2019-05-20 09:02:50 -05:00			`"consider replacing the string with",`
			`string.replace("\u{200B}", "\\u{200B}"),`
			`Applicability::MachineApplicable,`
Rustfmt 2017-08-09 02:30:56 -05:00			`);`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`}`
			`if string.chars().any(\|c\| c as u32 > 0x7F) {`
Make non_ascii_literal auto-fixable 2019-05-20 08:23:38 -05:00			`span_lint_and_sugg(`
Rustfmt 2017-08-09 02:30:56 -05:00			`cx,`
			`NON_ASCII_LITERAL,`
			`span,`
			`"literal non-ASCII character detected",`
Fix tests and make other ascii lints auto-fixable 2019-05-20 09:02:50 -05:00			`"consider replacing the string with",`
			`if is_allowed(cx, UNICODE_NOT_NFC, id) {`
			`escape(string.chars())`
			`} else {`
			`escape(string.nfc())`
			`},`
Make non_ascii_literal auto-fixable 2019-05-20 08:23:38 -05:00			`Applicability::MachineApplicable,`
Rustfmt 2017-08-09 02:30:56 -05:00			`);`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`}`
Rustup 2017-08-11 07:11:46 -05:00			`if is_allowed(cx, NON_ASCII_LITERAL, id) && string.chars().zip(string.nfc()).any(\|(a, b)\| a != b) {`
Fix tests and make other ascii lints auto-fixable 2019-05-20 09:02:50 -05:00			`span_lint_and_sugg(`
Rustfmt 2017-08-09 02:30:56 -05:00			`cx,`
			`UNICODE_NOT_NFC,`
			`span,`
			`"non-nfc unicode sequence detected",`
Fix tests and make other ascii lints auto-fixable 2019-05-20 09:02:50 -05:00			`"consider replacing the string with",`
			`string.nfc().collect::<String>(),`
			`Applicability::MachineApplicable,`
Rustfmt 2017-08-09 02:30:56 -05:00			`);`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`}`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`}`