Auto merge of #75349 - nnethercote:tweak-confusable-idents-checking, r=petrochenkov

Tweak confusable idents checking

The confusable idents checking does some sub-optimal things with symbols.

r? @petrochenkov
cc @crlf0710
This commit is contained in:
bors 2020-08-10 21:47:29 +00:00
commit 770bd3d1d0
4 changed files with 43 additions and 65 deletions

View File

@ -1,7 +1,7 @@
use crate::{EarlyContext, EarlyLintPass, LintContext};
use rustc_ast::ast;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::SymbolStr;
use rustc_span::symbol::Symbol;
declare_lint! {
pub NON_ASCII_IDENTS,
@ -39,7 +39,6 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
use rustc_span::Span;
use std::collections::BTreeMap;
use unicode_security::GeneralSecurityProfile;
use utils::CowBoxSymStr;
let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
let check_uncommon_codepoints =
@ -58,6 +57,12 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
let mut has_non_ascii_idents = false;
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
// Sort by `Span` so that error messages make sense with respect to the
// order of identifier locations in the code.
let mut symbols: Vec<_> = symbols.iter().collect();
symbols.sort_by_key(|k| k.1);
for (symbol, &sp) in symbols.iter() {
let symbol_str = symbol.as_str();
if symbol_str.is_ascii() {
@ -77,33 +82,34 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
}
if has_non_ascii_idents && check_confusable_idents {
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
let mut skeleton_map: FxHashMap<Symbol, (Symbol, Span, bool)> =
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
let mut str_buf = String::new();
for (symbol, &sp) in symbols.iter() {
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
use std::mem::replace;
use unicode_security::confusable_detection::skeleton;
buffer.clear();
buffer.extend(skeleton(symbol_str));
if *symbol_str == *buffer {
CowBoxSymStr::Interned(symbol_str.clone())
} else {
let owned = replace(buffer, String::new());
CowBoxSymStr::Owned(owned.into_boxed_str())
}
}
let mut skeleton_buf = String::new();
for (&symbol, &sp) in symbols.iter() {
use unicode_security::confusable_detection::skeleton;
let symbol_str = symbol.as_str();
let is_ascii = symbol_str.is_ascii();
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
// Get the skeleton as a `Symbol`.
skeleton_buf.clear();
skeleton_buf.extend(skeleton(&symbol_str));
let skeleton_sym = if *symbol_str == *skeleton_buf {
symbol
} else {
Symbol::intern(&skeleton_buf)
};
skeleton_map
.entry(skeleton)
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
.entry(skeleton_sym)
.and_modify(|(existing_symbol, existing_span, existing_is_ascii)| {
if !*existing_is_ascii || !is_ascii {
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
lint.build(&format!(
"identifier pair considered confusable between `{}` and `{}`",
existing_symbolstr, symbol_str
existing_symbol.as_str(),
symbol.as_str()
))
.span_label(
*existing_span,
@ -113,12 +119,12 @@ fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
});
}
if *existing_is_ascii && !is_ascii {
*existing_symbolstr = symbol_str.clone();
*existing_symbol = symbol;
*existing_span = sp;
*existing_is_ascii = is_ascii;
}
})
.or_insert((symbol_str, sp, is_ascii));
.or_insert((symbol, sp, is_ascii));
}
}
@ -232,41 +238,3 @@ enum ScriptSetUsage {
}
}
}
mod utils {
use rustc_span::symbol::SymbolStr;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
pub(super) enum CowBoxSymStr {
Interned(SymbolStr),
Owned(Box<str>),
}
impl Deref for CowBoxSymStr {
type Target = str;
fn deref(&self) -> &str {
match self {
CowBoxSymStr::Interned(interned) => interned,
CowBoxSymStr::Owned(ref owned) => owned,
}
}
}
impl Hash for CowBoxSymStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&**self, state)
}
}
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
#[inline]
fn eq(&self, other: &CowBoxSymStr) -> bool {
PartialEq::eq(&**self, &**other)
}
}
impl Eq for CowBoxSymStr {}
}

View File

@ -13,7 +13,6 @@
use rustc_span::source_map::{FilePathMapping, SourceMap};
use rustc_span::{MultiSpan, Span, Symbol};
use std::collections::BTreeMap;
use std::path::PathBuf;
use std::str;
@ -64,7 +63,7 @@ pub fn merge(&self, mut spans: FxHashMap<Symbol, Vec<Span>>) {
#[derive(Default)]
pub struct SymbolGallery {
/// All symbols occurred and their first occurrence span.
pub symbols: Lock<BTreeMap<Symbol, Span>>,
pub symbols: Lock<FxHashMap<Symbol, Span>>,
}
impl SymbolGallery {

View File

@ -3,9 +3,11 @@
#![allow(uncommon_codepoints, non_upper_case_globals)]
const : usize = 42;
const s_s: usize = 42;
fn main() {
let s = "rust"; //~ ERROR identifier pair considered confusable
let _ = "rust2"; //~ ERROR identifier pair considered confusable
not_affected();
}

View File

@ -1,5 +1,5 @@
error: identifier pair considered confusable between `` and `s`
--> $DIR/lint-confusable-idents.rs:8:9
--> $DIR/lint-confusable-idents.rs:9:9
|
LL | const : usize = 42;
| -- this is where the previous identifier occurred
@ -13,5 +13,14 @@ note: the lint level is defined here
LL | #![deny(confusable_idents)]
| ^^^^^^^^^^^^^^^^^
error: aborting due to previous error
error: identifier pair considered confusable between `s_s` and `_`
--> $DIR/lint-confusable-idents.rs:10:9
|
LL | const s_s: usize = 42;
| --- this is where the previous identifier occurred
...
LL | let _ = "rust2";
| ^^^^^
error: aborting due to 2 previous errors