Implement 'disallowed_script_idents' lint
This commit is contained in:
parent
b286b38a29
commit
018be41dee
@ -2487,6 +2487,7 @@ Released 2018-09-13
|
||||
[`derive_hash_xor_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_hash_xor_eq
|
||||
[`derive_ord_xor_partial_ord`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_ord_xor_partial_ord
|
||||
[`disallowed_method`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_method
|
||||
[`disallowed_script_idents`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_script_idents
|
||||
[`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type
|
||||
[`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression
|
||||
[`doc_markdown`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_markdown
|
||||
|
@ -23,6 +23,7 @@ serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = { version = "1.0", optional = true }
|
||||
toml = "0.5.3"
|
||||
unicode-normalization = "0.1"
|
||||
unicode-script = { version = "0.5.3", default-features = false }
|
||||
semver = "0.11"
|
||||
rustc-semver = "1.1.0"
|
||||
# NOTE: cargo requires serde feat in its url dep
|
||||
|
112
clippy_lints/src/disallowed_script_idents.rs
Normal file
112
clippy_lints/src/disallowed_script_idents.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use clippy_utils::diagnostics::span_lint;
|
||||
use rustc_ast::ast;
|
||||
use rustc_data_structures::fx::FxHashSet;
|
||||
use rustc_lint::{EarlyContext, EarlyLintPass, Level};
|
||||
use rustc_session::{declare_tool_lint, impl_lint_pass};
|
||||
use unicode_script::{Script, UnicodeScript};
|
||||
|
||||
declare_clippy_lint! {
|
||||
/// **What it does:** Checks for usage of unicode scripts other than those explicitly allowed
|
||||
/// by the lint config.
|
||||
///
|
||||
/// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`.
|
||||
/// It also ignores the `Common` script type.
|
||||
/// While configuring, be sure to use official script name [aliases] from
|
||||
/// [the list of supported scripts][supported_scripts].
|
||||
///
|
||||
/// See also: [`non_ascii_idents`].
|
||||
///
|
||||
/// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases
|
||||
/// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html
|
||||
///
|
||||
/// **Why is this bad?** It may be not desired to have many different scripts for
|
||||
/// identifiers in the codebase.
|
||||
///
|
||||
/// Note that if you only want to allow plain English, you might want to use
|
||||
/// built-in [`non_ascii_idents`] lint instead.
|
||||
///
|
||||
/// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents
|
||||
///
|
||||
/// **Known problems:** None.
|
||||
///
|
||||
/// **Example:**
|
||||
/// ```rust
|
||||
/// // Assuming that `clippy.toml` contains the following line:
|
||||
/// // allowed-locales = ["Latin", "Cyrillic"]
|
||||
/// let counter = 10; // OK, latin is allowed.
|
||||
/// let счётчик = 10; // OK, cyrillic is allowed.
|
||||
/// let zähler = 10; // OK, it's still latin.
|
||||
/// let カウンタ = 10; // Will spawn the lint.
|
||||
/// ```
|
||||
pub DISALLOWED_SCRIPT_IDENTS,
|
||||
restriction,
|
||||
"usage of non-allowed Unicode scripts"
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DisallowedScriptIdents {
|
||||
whitelist: FxHashSet<Script>,
|
||||
}
|
||||
|
||||
impl DisallowedScriptIdents {
|
||||
pub fn new(whitelist: &[String]) -> Self {
|
||||
let whitelist = whitelist
|
||||
.iter()
|
||||
.map(String::as_str)
|
||||
.filter_map(Script::from_full_name)
|
||||
.collect();
|
||||
Self { whitelist }
|
||||
}
|
||||
}
|
||||
|
||||
impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]);
|
||||
|
||||
impl EarlyLintPass for DisallowedScriptIdents {
|
||||
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
||||
// Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint:
|
||||
// https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs
|
||||
|
||||
let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow;
|
||||
if !check_disallowed_script_idents {
|
||||
return;
|
||||
}
|
||||
|
||||
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
|
||||
// Sort by `Span` so that error messages make sense with respect to the
|
||||
// order of identifier locations in the code.
|
||||
let mut symbols: Vec<_> = symbols.iter().collect();
|
||||
symbols.sort_unstable_by_key(|k| k.1);
|
||||
|
||||
for (symbol, &span) in &symbols {
|
||||
// Note: `symbol.as_str()` is an expensive operation, thus should not be called
|
||||
// more than once for a single symbol.
|
||||
let symbol_str = symbol.as_str();
|
||||
if symbol_str.is_ascii() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for c in symbol_str.chars() {
|
||||
// We want to iterate through all the scripts associated with this character
|
||||
// and check whether at least of one scripts is in the whitelist.
|
||||
let forbidden_script = c
|
||||
.script_extension()
|
||||
.iter()
|
||||
.find(|script| !self.whitelist.contains(script));
|
||||
if let Some(script) = forbidden_script {
|
||||
span_lint(
|
||||
cx,
|
||||
DISALLOWED_SCRIPT_IDENTS,
|
||||
span,
|
||||
&format!(
|
||||
"identifier `{}` has a Unicode script that is not allowed by configuration: {}",
|
||||
symbol_str,
|
||||
script.full_name()
|
||||
),
|
||||
);
|
||||
// We don't want to spawn warning multiple times over a single identifier.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -187,6 +187,7 @@ macro_rules! declare_clippy_lint {
|
||||
mod dereference;
|
||||
mod derive;
|
||||
mod disallowed_method;
|
||||
mod disallowed_script_idents;
|
||||
mod disallowed_type;
|
||||
mod doc;
|
||||
mod double_comparison;
|
||||
@ -585,6 +586,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
|
||||
derive::EXPL_IMPL_CLONE_ON_COPY,
|
||||
derive::UNSAFE_DERIVE_DESERIALIZE,
|
||||
disallowed_method::DISALLOWED_METHOD,
|
||||
disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS,
|
||||
disallowed_type::DISALLOWED_TYPE,
|
||||
doc::DOC_MARKDOWN,
|
||||
doc::MISSING_ERRORS_DOC,
|
||||
@ -995,6 +997,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
|
||||
LintId::of(create_dir::CREATE_DIR),
|
||||
LintId::of(dbg_macro::DBG_MACRO),
|
||||
LintId::of(default_numeric_fallback::DEFAULT_NUMERIC_FALLBACK),
|
||||
LintId::of(disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS),
|
||||
LintId::of(else_if_without_else::ELSE_IF_WITHOUT_ELSE),
|
||||
LintId::of(exhaustive_items::EXHAUSTIVE_ENUMS),
|
||||
LintId::of(exhaustive_items::EXHAUSTIVE_STRUCTS),
|
||||
@ -2082,7 +2085,8 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
|
||||
store.register_late_pass(move || box disallowed_type::DisallowedType::new(&disallowed_types));
|
||||
let import_renames = conf.enforced_import_renames.clone();
|
||||
store.register_late_pass(move || box missing_enforced_import_rename::ImportRename::new(import_renames.clone()));
|
||||
|
||||
let scripts = conf.allowed_scripts.clone();
|
||||
store.register_early_pass(move || box disallowed_script_idents::DisallowedScriptIdents::new(&scripts));
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
|
@ -212,6 +212,8 @@ pub(crate) fn get_configuration_metadata() -> Vec<ClippyConfiguration> {
|
||||
(standard_macro_braces: Vec<crate::nonstandard_macro_braces::MacroMatcher> = Vec::new()),
|
||||
/// Lint: MISSING_ENFORCED_IMPORT_RENAMES. The list of imports to always rename, a fully qualified path followed by the rename.
|
||||
(enforced_import_renames: Vec<crate::utils::conf::Rename> = Vec::new()),
|
||||
/// Lint: RESTRICTED_SCRIPTS. The list of unicode scripts allowed to be used in the scope.
|
||||
(allowed_scripts: Vec<String> = vec!["Latin".to_string()]),
|
||||
}
|
||||
|
||||
/// Search for the configuration file.
|
||||
|
@ -1,4 +1,4 @@
|
||||
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `third-party` at line 5 column 1
|
||||
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `allowed-scripts`, `third-party` at line 5 column 1
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
10
tests/ui/disallowed_script_idents.rs
Normal file
10
tests/ui/disallowed_script_idents.rs
Normal file
@ -0,0 +1,10 @@
|
||||
#![deny(clippy::disallowed_script_idents)]
|
||||
#![allow(dead_code)]
|
||||
|
||||
fn main() {
|
||||
let counter = 10; // OK, latin is allowed.
|
||||
let zähler = 10; // OK, it's still latin.
|
||||
|
||||
let счётчик = 10; // Cyrillic is not allowed by default.
|
||||
let カウンタ = 10; // Same for japanese.
|
||||
}
|
20
tests/ui/disallowed_script_idents.stderr
Normal file
20
tests/ui/disallowed_script_idents.stderr
Normal file
@ -0,0 +1,20 @@
|
||||
error: identifier `счётчик` has a Unicode script that is not allowed by configuration: Cyrillic
|
||||
--> $DIR/disallowed_script_idents.rs:8:9
|
||||
|
|
||||
LL | let счётчик = 10; // Cyrillic is not allowed by default.
|
||||
| ^^^^^^^
|
||||
|
|
||||
note: the lint level is defined here
|
||||
--> $DIR/disallowed_script_idents.rs:1:9
|
||||
|
|
||||
LL | #![deny(clippy::disallowed_script_idents)]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
error: identifier `カウンタ` has a Unicode script that is not allowed by configuration: Katakana
|
||||
--> $DIR/disallowed_script_idents.rs:9:9
|
||||
|
|
||||
LL | let カウンタ = 10; // Same for japanese.
|
||||
| ^^^^^^^^
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
Loading…
Reference in New Issue
Block a user