rust/clippy_lints/src/unicode.rs

use rustc::lint::*;
use rustc::hir::*;
use syntax::ast::LitKind;
use syntax::codemap::Span;
use unicode_normalization::UnicodeNormalization;
use utils::{snippet, span_help_and_lint};

/// **What it does:** This lint checks for the Unicode zero-width space in the code.
///
/// **Why is this bad?** Having an invisible character in the code makes for all sorts of April
/// fools, but otherwise is very much frowned upon.
///
/// **Known problems:** None
///
/// **Example:** You don't see it, but there may be a zero-width space somewhere in this text.
declare_lint! {
    pub ZERO_WIDTH_SPACE, Deny,
    "using a zero-width space in a string literal, which is confusing"
}

/// **What it does:** This lint checks for non-ASCII characters in string literals.
///
/// **Why is this bad?** Yeah, we know, the 90's called and wanted their charset back. Even so,
/// there still are editors and other programs out there that don't work well with Unicode. So if
/// the code is meant to be used internationally, on multiple operating systems, or has other
/// portability requirements, activating this lint could be useful.
///
/// **Known problems:** None
///
/// **Example:**
/// ```rust
/// let x = "Hä?"
/// ```
declare_lint! {
    pub NON_ASCII_LITERAL, Allow,
    "using any literal non-ASCII chars in a string literal; suggests \
     using the `\\u` escape instead"
}

/// **What it does:** This lint checks for string literals that contain Unicode in a form that is
/// not equal to its [NFC-recomposition](http://www.unicode.org/reports/tr15/#Norm_Forms).
///
/// **Why is this bad?** If such a string is compared to another, the results may be surprising.
///
/// **Known problems** None
///
/// **Example:** You may not see it, but “à” and “à” aren't the same string. The former when
/// escaped is actually `"a\u{300}"` while the latter is `"\u{e0}"`.
declare_lint! {
    pub UNICODE_NOT_NFC, Allow,
    "using a unicode literal not in NFC normal form (see \
     [unicode tr15](http://www.unicode.org/reports/tr15/) for further information)"
}


#[derive(Copy, Clone)]
pub struct Unicode;

impl LintPass for Unicode {
    fn get_lints(&self) -> LintArray {
        lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC)
    }
}

impl LateLintPass for Unicode {
    fn check_expr(&mut self, cx: &LateContext, expr: &Expr) {
        if let ExprLit(ref lit) = expr.node {
            if let LitKind::Str(_, _) = lit.node {
                check_str(cx, lit.span)
            }
        }
    }
}

fn escape<T: Iterator<Item = char>>(s: T) -> String {
    let mut result = String::new();
    for c in s {
        if c as u32 > 0x7F {
            for d in c.escape_unicode() {
                result.push(d)
            }
        } else {
            result.push(c);
        }
    }
    result
}

fn check_str(cx: &LateContext, span: Span) {
    let string = snippet(cx, span, "");
    if string.contains('\u{200B}') {
        span_help_and_lint(cx,
                           ZERO_WIDTH_SPACE,
                           span,
                           "zero-width space detected",
                           &format!("Consider replacing the string with:\n\"{}\"",
                                    string.replace("\u{200B}", "\\u{200B}")));
    }
    if string.chars().any(|c| c as u32 > 0x7F) {
        span_help_and_lint(cx,
                           NON_ASCII_LITERAL,
                           span,
                           "literal non-ASCII character detected",
                           &format!("Consider replacing the string with:\n\"{}\"",
                                    if cx.current_level(UNICODE_NOT_NFC) == Level::Allow {
                                        escape(string.chars())
                                    } else {
                                        escape(string.nfc())
                                    }));
    }
    if cx.current_level(NON_ASCII_LITERAL) == Level::Allow && string.chars().zip(string.nfc()).any(|(a, b)| a != b) {
        span_help_and_lint(cx,
                           UNICODE_NOT_NFC,
                           span,
                           "non-nfc unicode sequence detected",
                           &format!("Consider replacing the string with:\n\"{}\"", string.nfc().collect::<String>()));
    }
}
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`use rustc::lint::*;`
Rustup to 1.9.0-nightly (bf5da36f1 2016-04-06) 2016-04-07 10:46:48 -05:00			`use rustc::hir::*;`
fix nightly breakage 2016-02-12 11:35:44 -06:00			`use syntax::ast::LitKind;`
Rustfmt and sort all `use` items 2016-02-24 10:38:57 -06:00			`use syntax::codemap::Span;`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`use unicode_normalization::UnicodeNormalization;`
fixed dogfood by using snippet instead of the (escaped) literal string 2015-09-04 07:24:49 -05:00			`use utils::{snippet, span_help_and_lint};`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00
Improve docs 2016-07-15 17:25:44 -05:00			`/// What it does: This lint checks for the Unicode zero-width space in the code.`
added wiki comments + wiki-generating python script 2015-12-10 18:22:27 -06:00			`///`
Improve docs 2016-07-15 17:25:44 -05:00			`/// Why is this bad? Having an invisible character in the code makes for all sorts of April`
			`/// fools, but otherwise is very much frowned upon.`
added wiki comments + wiki-generating python script 2015-12-10 18:22:27 -06:00			`///`
			`/// Known problems: None`
			`///`
			`/// Example: You don't see it, but there may be a zero-width space somewhere in this text.`
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`declare_lint! {`
			`pub ZERO_WIDTH_SPACE, Deny,`
			`"using a zero-width space in a string literal, which is confusing"`
			`}`

Improve docs 2016-07-15 17:25:44 -05:00			`/// What it does: This lint checks for non-ASCII characters in string literals.`
added wiki comments + wiki-generating python script 2015-12-10 18:22:27 -06:00			`///`
Improve docs 2016-07-15 17:25:44 -05:00			`/// Why is this bad? Yeah, we know, the 90's called and wanted their charset back. Even so,`
			`/// there still are editors and other programs out there that don't work well with Unicode. So if`
			`/// the code is meant to be used internationally, on multiple operating systems, or has other`
			`/// portability requirements, activating this lint could be useful.`
added wiki comments + wiki-generating python script 2015-12-10 18:22:27 -06:00			`///`
			`/// Known problems: None`
			`///`
Improve docs 2016-07-15 17:25:44 -05:00			`/// Example:`
			/// ```rust
			`/// let x = "Hä?"`
			/// ```
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`declare_lint! {`
			`pub NON_ASCII_LITERAL, Allow,`
			`"using any literal non-ASCII chars in a string literal; suggests \`
Improve docs 2016-07-15 17:25:44 -05:00			using the `\\u` escape instead"
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`}`

Improve docs 2016-07-15 17:25:44 -05:00			`/// What it does: This lint checks for string literals that contain Unicode in a form that is`
			`/// not equal to its [NFC-recomposition](http://www.unicode.org/reports/tr15/#Norm_Forms).`
added wiki comments + wiki-generating python script 2015-12-10 18:22:27 -06:00			`///`
			`/// Why is this bad? If such a string is compared to another, the results may be surprising.`
			`///`
			`/// Known problems None`
			`///`
Improve docs 2016-07-15 17:25:44 -05:00			`/// Example: You may not see it, but “à” and “à” aren't the same string. The former when`
			/// escaped is actually `"a\u{300}"` while the latter is `"\u{e0}"`.
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`declare_lint! {`
			`pub UNICODE_NOT_NFC, Allow,`
			`"using a unicode literal not in NFC normal form (see \`
fix markdown generated from code 2016-05-06 09:09:05 -05:00			`[unicode tr15](http://www.unicode.org/reports/tr15/) for further information)"`
Fix util/update_wiki.py warnings and be consistent in declare_lint! invocations 2016-02-05 17:13:29 -06:00			`}`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00
			`#[derive(Copy, Clone)]`
			`pub struct Unicode;`

			`impl LintPass for Unicode {`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00			`fn get_lints(&self) -> LintArray {`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC)`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`}`
Upgrade Rust to rustc 1.5.0-nightly (cff041170 2015-09-17) LintPass was split and ExprParen was removed from the HIR Fixes #338 2015-09-18 21:53:04 -05:00			`}`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00
Upgrade Rust to rustc 1.5.0-nightly (cff041170 2015-09-17) LintPass was split and ExprParen was removed from the HIR Fixes #338 2015-09-18 21:53:04 -05:00			`impl LateLintPass for Unicode {`
			`fn check_expr(&mut self, cx: &LateContext, expr: &Expr) {`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00			`if let ExprLit(ref lit) = expr.node {`
fix nightly breakage 2016-02-12 11:35:44 -06:00			`if let LitKind::Str(_, _) = lit.node {`
fixed dogfood by using snippet instead of the (escaped) literal string 2015-09-04 07:24:49 -05:00			`check_str(cx, lit.span)`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00			`}`
			`}`
			`}`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`}`

fmt clippy 2016-01-03 22:26:12 -06:00			`fn escape<T: Iterator<Item = char>>(s: T) -> String {`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`let mut result = String::new();`
			`for c in s {`
unicode: add lint against non-ascii chars in literals (Allow by default), #85 2015-08-12 13:36:10 -05:00			`if c as u32 > 0x7F {`
fmt clippy 2016-01-03 22:26:12 -06:00			`for d in c.escape_unicode() {`
			`result.push(d)`
			`}`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`} else {`
			`result.push(c);`
all: whitespace cleanup * 4-space indentation * no trailing whitespace * no tabs 2015-08-11 13:22:20 -05:00			`}`
			`}`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`result`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`}`

Upgrade Rust to rustc 1.5.0-nightly (cff041170 2015-09-17) LintPass was split and ExprParen was removed from the HIR Fixes #338 2015-09-18 21:53:04 -05:00			`fn check_str(cx: &LateContext, span: Span) {`
fixed dogfood by using snippet instead of the (escaped) literal string 2015-09-04 07:24:49 -05:00			`let string = snippet(cx, span, "");`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`if string.contains('\u{200B}') {`
fmt clippy 2016-01-03 22:26:12 -06:00			`span_help_and_lint(cx,`
			`ZERO_WIDTH_SPACE,`
			`span,`
			`"zero-width space detected",`
			`&format!("Consider replacing the string with:\n\"{}\"",`
			`string.replace("\u{200B}", "\\u{200B}")));`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`}`
			`if string.chars().any(\|c\| c as u32 > 0x7F) {`
fmt clippy 2016-01-03 22:26:12 -06:00			`span_help_and_lint(cx,`
			`NON_ASCII_LITERAL,`
			`span,`
			`"literal non-ASCII character detected",`
			`&format!("Consider replacing the string with:\n\"{}\"",`
			`if cx.current_level(UNICODE_NOT_NFC) == Level::Allow {`
			`escape(string.chars())`
			`} else {`
			`escape(string.nfc())`
			`}));`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`}`
fmt clippy 2016-01-03 22:26:12 -06:00			`if cx.current_level(NON_ASCII_LITERAL) == Level::Allow && string.chars().zip(string.nfc()).any(\|(a, b)\| a != b) {`
			`span_help_and_lint(cx,`
			`UNICODE_NOT_NFC,`
			`span,`
			`"non-nfc unicode sequence detected",`
			`&format!("Consider replacing the string with:\n\"{}\"", string.nfc().collect::<String>()));`
Unicode lints, second attempt: Lint whole strings, help with replacement 2015-09-04 02:08:07 -05:00			`}`
first unicode lint: zero_width_space 2015-06-11 04:35:00 -05:00			`}`