Refactor non_ascii_idents lints, exclude ascii pair for confusable_idents lint.
This commit is contained in:
parent
1a4e2b6f9c
commit
ef24faf130
@ -1,9 +1,7 @@
|
|||||||
use crate::{EarlyContext, EarlyLintPass, LintContext};
|
use crate::{EarlyContext, EarlyLintPass, LintContext};
|
||||||
use rustc_ast::ast;
|
use rustc_ast::ast;
|
||||||
use rustc_data_structures::fx::FxHashMap;
|
use rustc_data_structures::fx::FxHashMap;
|
||||||
use rustc_span::symbol::{Ident, SymbolStr};
|
use rustc_span::symbol::SymbolStr;
|
||||||
use std::hash::{Hash, Hasher};
|
|
||||||
use std::ops::Deref;
|
|
||||||
|
|
||||||
declare_lint! {
|
declare_lint! {
|
||||||
pub NON_ASCII_IDENTS,
|
pub NON_ASCII_IDENTS,
|
||||||
@ -19,158 +17,133 @@ declare_lint! {
|
|||||||
crate_level_only
|
crate_level_only
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Change this to warn.
|
|
||||||
declare_lint! {
|
declare_lint! {
|
||||||
pub CONFUSABLE_IDENTS,
|
pub CONFUSABLE_IDENTS,
|
||||||
Allow,
|
Warn,
|
||||||
"detects visually confusable pairs between identifiers",
|
"detects visually confusable pairs between identifiers",
|
||||||
crate_level_only
|
crate_level_only
|
||||||
}
|
}
|
||||||
|
|
||||||
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
|
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
|
||||||
|
|
||||||
enum CowBoxSymStr {
|
|
||||||
Interned(SymbolStr),
|
|
||||||
Owned(Box<str>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for CowBoxSymStr {
|
|
||||||
type Target = str;
|
|
||||||
|
|
||||||
fn deref(&self) -> &str {
|
|
||||||
match self {
|
|
||||||
CowBoxSymStr::Interned(interned) => interned,
|
|
||||||
CowBoxSymStr::Owned(ref owned) => owned,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Hash for CowBoxSymStr {
|
|
||||||
#[inline]
|
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
||||||
Hash::hash(&**self, state)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
|
|
||||||
#[inline]
|
|
||||||
fn eq(&self, other: &CowBoxSymStr) -> bool {
|
|
||||||
PartialEq::eq(&**self, &**other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Eq for CowBoxSymStr {}
|
|
||||||
|
|
||||||
fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
|
|
||||||
use std::mem::swap;
|
|
||||||
use unicode_security::confusable_detection::skeleton;
|
|
||||||
buffer.clear();
|
|
||||||
buffer.extend(skeleton(&symbol_str));
|
|
||||||
if symbol_str == *buffer {
|
|
||||||
CowBoxSymStr::Interned(symbol_str)
|
|
||||||
} else {
|
|
||||||
let mut owned = String::new();
|
|
||||||
swap(buffer, &mut owned);
|
|
||||||
CowBoxSymStr::Owned(owned.into_boxed_str())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_in_ascii_confusable_closure(c: char) -> bool {
|
|
||||||
// FIXME: move this table to `unicode_security` crate.
|
|
||||||
// data here corresponds to Unicode 13.
|
|
||||||
const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
|
|
||||||
let c = c as u64;
|
|
||||||
for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
|
|
||||||
if c >= range_start && c <= range_end {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
|
|
||||||
// FIXME: move this table to `unicode_security` crate.
|
|
||||||
// data here corresponds to Unicode 13.
|
|
||||||
const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
|
|
||||||
0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
|
|
||||||
0x2080,
|
|
||||||
];
|
|
||||||
let c = c as u64;
|
|
||||||
for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
|
|
||||||
if c == item {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EarlyLintPass for NonAsciiIdents {
|
impl EarlyLintPass for NonAsciiIdents {
|
||||||
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
||||||
use rustc_session::lint::Level;
|
use rustc_session::lint::Level;
|
||||||
if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
|
use rustc_span::Span;
|
||||||
return;
|
|
||||||
}
|
|
||||||
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
|
|
||||||
let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
|
|
||||||
let mut in_fast_path = true;
|
|
||||||
for (symbol, sp) in symbols.iter() {
|
|
||||||
// fast path
|
|
||||||
let symbol_str = symbol.as_str();
|
|
||||||
if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
|
|
||||||
// fallback to slow path.
|
|
||||||
symbol_strs_and_spans.clear();
|
|
||||||
in_fast_path = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
|
|
||||||
symbol_strs_and_spans.push((symbol_str, *sp));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !in_fast_path {
|
|
||||||
// slow path
|
|
||||||
for (symbol, sp) in symbols.iter() {
|
|
||||||
let symbol_str = symbol.as_str();
|
|
||||||
symbol_strs_and_spans.push((symbol_str, *sp));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
drop(symbols);
|
|
||||||
symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
|
|
||||||
let mut skeleton_map =
|
|
||||||
FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
|
|
||||||
let mut str_buf = String::new();
|
|
||||||
for (symbol_str, sp) in symbol_strs_and_spans {
|
|
||||||
let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
|
|
||||||
skeleton_map
|
|
||||||
.entry(skeleton)
|
|
||||||
.and_modify(|(existing_symbolstr, existing_span)| {
|
|
||||||
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
|
|
||||||
lint.build(&format!(
|
|
||||||
"identifier pair considered confusable between `{}` and `{}`",
|
|
||||||
existing_symbolstr, symbol_str
|
|
||||||
))
|
|
||||||
.span_label(
|
|
||||||
*existing_span,
|
|
||||||
"this is where the previous identifier occurred",
|
|
||||||
)
|
|
||||||
.emit();
|
|
||||||
});
|
|
||||||
})
|
|
||||||
.or_insert((symbol_str, sp));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) {
|
|
||||||
use unicode_security::GeneralSecurityProfile;
|
use unicode_security::GeneralSecurityProfile;
|
||||||
let name_str = ident.name.as_str();
|
use utils::CowBoxSymStr;
|
||||||
if name_str.is_ascii() {
|
|
||||||
|
let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
|
||||||
|
let check_uncommon_codepoints =
|
||||||
|
cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow;
|
||||||
|
let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow;
|
||||||
|
|
||||||
|
if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| {
|
|
||||||
lint.build("identifier contains non-ASCII characters").emit()
|
let mut has_non_ascii_idents = false;
|
||||||
});
|
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
|
||||||
if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) {
|
for (symbol, &sp) in symbols.iter() {
|
||||||
cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| {
|
let symbol_str = symbol.as_str();
|
||||||
lint.build("identifier contains uncommon Unicode codepoints").emit()
|
if symbol_str.is_ascii() {
|
||||||
})
|
continue;
|
||||||
|
}
|
||||||
|
has_non_ascii_idents = true;
|
||||||
|
cx.struct_span_lint(NON_ASCII_IDENTS, sp, |lint| {
|
||||||
|
lint.build("identifier contains non-ASCII characters").emit()
|
||||||
|
});
|
||||||
|
if check_uncommon_codepoints
|
||||||
|
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
|
||||||
|
{
|
||||||
|
cx.struct_span_lint(UNCOMMON_CODEPOINTS, sp, |lint| {
|
||||||
|
lint.build("identifier contains uncommon Unicode codepoints").emit()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_non_ascii_idents && check_confusable_idents {
|
||||||
|
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
|
||||||
|
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
|
||||||
|
let mut str_buf = String::new();
|
||||||
|
for (symbol, &sp) in symbols.iter() {
|
||||||
|
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
|
||||||
|
use std::mem::replace;
|
||||||
|
use unicode_security::confusable_detection::skeleton;
|
||||||
|
buffer.clear();
|
||||||
|
buffer.extend(skeleton(symbol_str));
|
||||||
|
if *symbol_str == *buffer {
|
||||||
|
CowBoxSymStr::Interned(symbol_str.clone())
|
||||||
|
} else {
|
||||||
|
let owned = replace(buffer, String::new());
|
||||||
|
CowBoxSymStr::Owned(owned.into_boxed_str())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let symbol_str = symbol.as_str();
|
||||||
|
let is_ascii = symbol_str.is_ascii();
|
||||||
|
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
|
||||||
|
skeleton_map
|
||||||
|
.entry(skeleton)
|
||||||
|
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
|
||||||
|
if !*existing_is_ascii || !is_ascii {
|
||||||
|
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
|
||||||
|
lint.build(&format!(
|
||||||
|
"identifier pair considered confusable between `{}` and `{}`",
|
||||||
|
existing_symbolstr, symbol_str
|
||||||
|
))
|
||||||
|
.span_label(
|
||||||
|
*existing_span,
|
||||||
|
"this is where the previous identifier occurred",
|
||||||
|
)
|
||||||
|
.emit();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if *existing_is_ascii && !is_ascii {
|
||||||
|
*existing_symbolstr = symbol_str.clone();
|
||||||
|
*existing_span = sp;
|
||||||
|
*existing_is_ascii = is_ascii;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.or_insert((symbol_str, sp, is_ascii));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mod utils {
|
||||||
|
use rustc_span::symbol::SymbolStr;
|
||||||
|
use std::hash::{Hash, Hasher};
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
pub(super) enum CowBoxSymStr {
|
||||||
|
Interned(SymbolStr),
|
||||||
|
Owned(Box<str>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for CowBoxSymStr {
|
||||||
|
type Target = str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
CowBoxSymStr::Interned(interned) => interned,
|
||||||
|
CowBoxSymStr::Owned(ref owned) => owned,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Hash for CowBoxSymStr {
|
||||||
|
#[inline]
|
||||||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
|
Hash::hash(&**self, state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
|
||||||
|
#[inline]
|
||||||
|
fn eq(&self, other: &CowBoxSymStr) -> bool {
|
||||||
|
PartialEq::eq(&**self, &**other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for CowBoxSymStr {}
|
||||||
|
}
|
||||||
|
@ -13,6 +13,7 @@ use rustc_span::hygiene::ExpnId;
|
|||||||
use rustc_span::source_map::{FilePathMapping, SourceMap};
|
use rustc_span::source_map::{FilePathMapping, SourceMap};
|
||||||
use rustc_span::{MultiSpan, Span, Symbol};
|
use rustc_span::{MultiSpan, Span, Symbol};
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
@ -63,7 +64,7 @@ impl GatedSpans {
|
|||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct SymbolGallery {
|
pub struct SymbolGallery {
|
||||||
/// All symbols occurred and their first occurrance span.
|
/// All symbols occurred and their first occurrance span.
|
||||||
pub symbols: Lock<FxHashMap<Symbol, Span>>,
|
pub symbols: Lock<BTreeMap<Symbol, Span>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SymbolGallery {
|
impl SymbolGallery {
|
||||||
|
@ -2,8 +2,14 @@
|
|||||||
#![deny(confusable_idents)]
|
#![deny(confusable_idents)]
|
||||||
#![allow(uncommon_codepoints, non_upper_case_globals)]
|
#![allow(uncommon_codepoints, non_upper_case_globals)]
|
||||||
|
|
||||||
const s: usize = 42; //~ ERROR identifier pair considered confusable
|
const s: usize = 42;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let s = "rust";
|
let s = "rust"; //~ ERROR identifier pair considered confusable
|
||||||
|
not_affected();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn not_affected() {
|
||||||
|
let s1 = 1;
|
||||||
|
let sl = 'l';
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
error: identifier pair considered confusable between `s` and `s`
|
error: identifier pair considered confusable between `s` and `s`
|
||||||
--> $DIR/lint-confusable-idents.rs:5:7
|
--> $DIR/lint-confusable-idents.rs:8:9
|
||||||
|
|
|
|
||||||
LL | const s: usize = 42;
|
LL | const s: usize = 42;
|
||||||
| ^^
|
| -- this is where the previous identifier occurred
|
||||||
...
|
...
|
||||||
LL | let s = "rust";
|
LL | let s = "rust";
|
||||||
| - this is where the previous identifier occurred
|
| ^
|
||||||
|
|
|
|
||||||
note: the lint level is defined here
|
note: the lint level is defined here
|
||||||
--> $DIR/lint-confusable-idents.rs:2:9
|
--> $DIR/lint-confusable-idents.rs:2:9
|
||||||
|
@ -7,5 +7,7 @@ fn coöperation() {} //~ ERROR identifier contains non-ASCII characters
|
|||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let naïveté = 2; //~ ERROR identifier contains non-ASCII characters
|
let naïveté = 2; //~ ERROR identifier contains non-ASCII characters
|
||||||
println!("{}", naïveté); //~ ERROR identifier contains non-ASCII characters
|
|
||||||
|
// using the same identifier the second time won't trigger the lint.
|
||||||
|
println!("{}", naïveté);
|
||||||
}
|
}
|
||||||
|
@ -22,11 +22,5 @@ error: identifier contains non-ASCII characters
|
|||||||
LL | let naïveté = 2;
|
LL | let naïveté = 2;
|
||||||
| ^^^^^^^
|
| ^^^^^^^
|
||||||
|
|
||||||
error: identifier contains non-ASCII characters
|
error: aborting due to 3 previous errors
|
||||||
--> $DIR/lint-non-ascii-idents.rs:10:20
|
|
||||||
|
|
|
||||||
LL | println!("{}", naïveté);
|
|
||||||
| ^^^^^^^
|
|
||||||
|
|
||||||
error: aborting due to 4 previous errors
|
|
||||||
|
|
||||||
|
@ -7,5 +7,7 @@ fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints
|
|||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
|
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
|
||||||
println!("{}", ㇻㇲㇳ); //~ ERROR identifier contains uncommon Unicode codepoints
|
|
||||||
|
// using the same identifier the second time won't trigger the lint.
|
||||||
|
println!("{}", ㇻㇲㇳ);
|
||||||
}
|
}
|
||||||
|
@ -22,11 +22,5 @@ error: identifier contains uncommon Unicode codepoints
|
|||||||
LL | let ㇻㇲㇳ = "rust";
|
LL | let ㇻㇲㇳ = "rust";
|
||||||
| ^^^^^^
|
| ^^^^^^
|
||||||
|
|
||||||
error: identifier contains uncommon Unicode codepoints
|
error: aborting due to 3 previous errors
|
||||||
--> $DIR/lint-uncommon-codepoints.rs:10:20
|
|
||||||
|
|
|
||||||
LL | println!("{}", ㇻㇲㇳ);
|
|
||||||
| ^^^^^^
|
|
||||||
|
|
||||||
error: aborting due to 4 previous errors
|
|
||||||
|
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
// ignore-tidy-trailing-newlines
|
// ignore-tidy-trailing-newlines
|
||||||
// error-pattern: aborting due to 3 previous errors
|
// error-pattern: aborting due to 3 previous errors
|
||||||
|
#![allow(uncommon_codepoints)]
|
||||||
|
|
||||||
y![
|
y![
|
||||||
Ϥ,
|
Ϥ,
|
@ -1,5 +1,5 @@
|
|||||||
error: this file contains an unclosed delimiter
|
error: this file contains an unclosed delimiter
|
||||||
--> $DIR/issue-62524.rs:4:3
|
--> $DIR/issue-62524.rs:6:3
|
||||||
|
|
|
|
||||||
LL | y![
|
LL | y![
|
||||||
| - unclosed delimiter
|
| - unclosed delimiter
|
||||||
@ -7,7 +7,7 @@ LL | Ϥ,
|
|||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: macros that expand to items must be delimited with braces or followed by a semicolon
|
error: macros that expand to items must be delimited with braces or followed by a semicolon
|
||||||
--> $DIR/issue-62524.rs:3:3
|
--> $DIR/issue-62524.rs:5:3
|
||||||
|
|
|
|
||||||
LL | y![
|
LL | y![
|
||||||
| ___^
|
| ___^
|
||||||
@ -24,7 +24,7 @@ LL | Ϥ,;
|
|||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: cannot find macro `y` in this scope
|
error: cannot find macro `y` in this scope
|
||||||
--> $DIR/issue-62524.rs:3:1
|
--> $DIR/issue-62524.rs:5:1
|
||||||
|
|
|
|
||||||
LL | y![
|
LL | y![
|
||||||
| ^
|
| ^
|
||||||
|
Loading…
x
Reference in New Issue
Block a user