Auto merge of #75349 - nnethercote:tweak-confusable-idents-checking, r=petrochenkov
Tweak confusable idents checking The confusable idents checking does some sub-optimal things with symbols. r? @petrochenkov cc @crlf0710
This commit is contained in:
commit
770bd3d1d0
@ -1,7 +1,7 @@
|
||||
use crate::{EarlyContext, EarlyLintPass, LintContext};
|
||||
use rustc_ast::ast;
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_span::symbol::SymbolStr;
|
||||
use rustc_span::symbol::Symbol;
|
||||
|
||||
declare_lint! {
|
||||
pub NON_ASCII_IDENTS,
|
||||
@ -39,7 +39,6 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
||||
use rustc_span::Span;
|
||||
use std::collections::BTreeMap;
|
||||
use unicode_security::GeneralSecurityProfile;
|
||||
use utils::CowBoxSymStr;
|
||||
|
||||
let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
|
||||
let check_uncommon_codepoints =
|
||||
@ -58,6 +57,12 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
||||
|
||||
let mut has_non_ascii_idents = false;
|
||||
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
|
||||
|
||||
// Sort by `Span` so that error messages make sense with respect to the
|
||||
// order of identifier locations in the code.
|
||||
let mut symbols: Vec<_> = symbols.iter().collect();
|
||||
symbols.sort_by_key(|k| k.1);
|
||||
|
||||
for (symbol, &sp) in symbols.iter() {
|
||||
let symbol_str = symbol.as_str();
|
||||
if symbol_str.is_ascii() {
|
||||
@ -77,33 +82,34 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
||||
}
|
||||
|
||||
if has_non_ascii_idents && check_confusable_idents {
|
||||
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
|
||||
let mut skeleton_map: FxHashMap<Symbol, (Symbol, Span, bool)> =
|
||||
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
|
||||
let mut str_buf = String::new();
|
||||
for (symbol, &sp) in symbols.iter() {
|
||||
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
|
||||
use std::mem::replace;
|
||||
let mut skeleton_buf = String::new();
|
||||
|
||||
for (&symbol, &sp) in symbols.iter() {
|
||||
use unicode_security::confusable_detection::skeleton;
|
||||
buffer.clear();
|
||||
buffer.extend(skeleton(symbol_str));
|
||||
if *symbol_str == *buffer {
|
||||
CowBoxSymStr::Interned(symbol_str.clone())
|
||||
} else {
|
||||
let owned = replace(buffer, String::new());
|
||||
CowBoxSymStr::Owned(owned.into_boxed_str())
|
||||
}
|
||||
}
|
||||
|
||||
let symbol_str = symbol.as_str();
|
||||
let is_ascii = symbol_str.is_ascii();
|
||||
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
|
||||
|
||||
// Get the skeleton as a `Symbol`.
|
||||
skeleton_buf.clear();
|
||||
skeleton_buf.extend(skeleton(&symbol_str));
|
||||
let skeleton_sym = if *symbol_str == *skeleton_buf {
|
||||
symbol
|
||||
} else {
|
||||
Symbol::intern(&skeleton_buf)
|
||||
};
|
||||
|
||||
skeleton_map
|
||||
.entry(skeleton)
|
||||
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
|
||||
.entry(skeleton_sym)
|
||||
.and_modify(|(existing_symbol, existing_span, existing_is_ascii)| {
|
||||
if !*existing_is_ascii || !is_ascii {
|
||||
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
|
||||
lint.build(&format!(
|
||||
"identifier pair considered confusable between `{}` and `{}`",
|
||||
existing_symbolstr, symbol_str
|
||||
existing_symbol.as_str(),
|
||||
symbol.as_str()
|
||||
))
|
||||
.span_label(
|
||||
*existing_span,
|
||||
@ -113,12 +119,12 @@ fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
|
||||
});
|
||||
}
|
||||
if *existing_is_ascii && !is_ascii {
|
||||
*existing_symbolstr = symbol_str.clone();
|
||||
*existing_symbol = symbol;
|
||||
*existing_span = sp;
|
||||
*existing_is_ascii = is_ascii;
|
||||
}
|
||||
})
|
||||
.or_insert((symbol_str, sp, is_ascii));
|
||||
.or_insert((symbol, sp, is_ascii));
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,41 +238,3 @@ enum ScriptSetUsage {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod utils {
|
||||
use rustc_span::symbol::SymbolStr;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::ops::Deref;
|
||||
|
||||
pub(super) enum CowBoxSymStr {
|
||||
Interned(SymbolStr),
|
||||
Owned(Box<str>),
|
||||
}
|
||||
|
||||
impl Deref for CowBoxSymStr {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &str {
|
||||
match self {
|
||||
CowBoxSymStr::Interned(interned) => interned,
|
||||
CowBoxSymStr::Owned(ref owned) => owned,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for CowBoxSymStr {
|
||||
#[inline]
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
Hash::hash(&**self, state)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
|
||||
#[inline]
|
||||
fn eq(&self, other: &CowBoxSymStr) -> bool {
|
||||
PartialEq::eq(&**self, &**other)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for CowBoxSymStr {}
|
||||
}
|
||||
|
@ -13,7 +13,6 @@
|
||||
use rustc_span::source_map::{FilePathMapping, SourceMap};
|
||||
use rustc_span::{MultiSpan, Span, Symbol};
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::PathBuf;
|
||||
use std::str;
|
||||
|
||||
@ -64,7 +63,7 @@ pub fn merge(&self, mut spans: FxHashMap<Symbol, Vec<Span>>) {
|
||||
#[derive(Default)]
|
||||
pub struct SymbolGallery {
|
||||
/// All symbols occurred and their first occurrence span.
|
||||
pub symbols: Lock<BTreeMap<Symbol, Span>>,
|
||||
pub symbols: Lock<FxHashMap<Symbol, Span>>,
|
||||
}
|
||||
|
||||
impl SymbolGallery {
|
||||
|
@ -3,9 +3,11 @@
|
||||
#![allow(uncommon_codepoints, non_upper_case_globals)]
|
||||
|
||||
const s: usize = 42;
|
||||
const s_s: usize = 42;
|
||||
|
||||
fn main() {
|
||||
let s = "rust"; //~ ERROR identifier pair considered confusable
|
||||
let s_s = "rust2"; //~ ERROR identifier pair considered confusable
|
||||
not_affected();
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
error: identifier pair considered confusable between `s` and `s`
|
||||
--> $DIR/lint-confusable-idents.rs:8:9
|
||||
--> $DIR/lint-confusable-idents.rs:9:9
|
||||
|
|
||||
LL | const s: usize = 42;
|
||||
| -- this is where the previous identifier occurred
|
||||
@ -13,5 +13,14 @@ note: the lint level is defined here
|
||||
LL | #![deny(confusable_idents)]
|
||||
| ^^^^^^^^^^^^^^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
error: identifier pair considered confusable between `s_s` and `s_s`
|
||||
--> $DIR/lint-confusable-idents.rs:10:9
|
||||
|
|
||||
LL | const s_s: usize = 42;
|
||||
| --- this is where the previous identifier occurred
|
||||
...
|
||||
LL | let s_s = "rust2";
|
||||
| ^^^^^
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user