Rollup merge of #120840 - HTGAzureX1212:HTGAzureX1212/unicode-identifier-types, r=fmease,Manishearth
Split Diagnostics for Uncommon Codepoints: Add Individual Identifier Types This pull request further modifies the `uncommon_codepoints` lint, adding the individual identifier types of `Technical`, `Not_NFKC`, `Exclusion` and `Limited_Use` to the diagnostic message. Example rendered diagnostic: ``` error: identifier contains a Unicode codepoint that is not used in normalized strings: 'ij' --> $DIR/lint-uncommon-codepoints.rs:6:4 | LL | fn dijkstra() {} | ^^^^^^^ = note: this character is included in the Not_NFKC Unicode general security profile ``` Second step of #120228.
This commit is contained in:
commit
91d337dfa8
@ -244,9 +244,28 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
|
||||
lint_identifier_non_ascii_char = identifier contains non-ASCII characters
|
||||
|
||||
lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
|
||||
[one] an uncommon Unicode codepoint
|
||||
*[other] uncommon Unicode codepoints
|
||||
[one] { $identifier_type ->
|
||||
[Exclusion] a character from an archaic script
|
||||
[Technical] a character that is for non-linguistic, specialized usage
|
||||
[Limited_Use] a character from a script in limited use
|
||||
[Not_NFKC] a non normalized (NFKC) character
|
||||
*[other] an uncommon character
|
||||
}
|
||||
*[other] { $identifier_type ->
|
||||
[Exclusion] {$codepoints_len} characters from archaic scripts
|
||||
[Technical] {$codepoints_len} characters that are for non-linguistic, specialized usage
|
||||
[Limited_Use] {$codepoints_len} characters from scripts in limited use
|
||||
[Not_NFKC] {$codepoints_len} non normalized (NFKC) characters
|
||||
*[other] uncommon characters
|
||||
}
|
||||
}: {$codepoints}
|
||||
.note = {$codepoints_len ->
|
||||
[one] this character is
|
||||
*[other] these characters are
|
||||
} included in the{$identifier_type ->
|
||||
[Restricted] {""}
|
||||
*[other] {" "}{$identifier_type}
|
||||
} Unicode general security profile
|
||||
|
||||
lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
#![feature(array_windows)]
|
||||
#![feature(box_patterns)]
|
||||
#![feature(control_flow_enum)]
|
||||
#![feature(extract_if)]
|
||||
#![feature(generic_nonzero)]
|
||||
#![feature(if_let_guard)]
|
||||
#![feature(iter_order_by)]
|
||||
|
@ -1129,9 +1129,11 @@ pub struct MultipleSupertraitUpcastable {
|
||||
|
||||
#[derive(LintDiagnostic)]
|
||||
#[diag(lint_identifier_uncommon_codepoints)]
|
||||
#[note]
|
||||
pub struct IdentifierUncommonCodepoints {
|
||||
pub codepoints: Vec<char>,
|
||||
pub codepoints_len: usize,
|
||||
pub identifier_type: &'static str,
|
||||
}
|
||||
|
||||
#[derive(LintDiagnostic)]
|
||||
|
@ -7,6 +7,7 @@
|
||||
use rustc_data_structures::fx::FxIndexMap;
|
||||
use rustc_data_structures::unord::UnordMap;
|
||||
use rustc_span::symbol::Symbol;
|
||||
use unicode_security::general_security_profile::IdentifierType;
|
||||
|
||||
declare_lint! {
|
||||
/// The `non_ascii_idents` lint detects non-ASCII identifiers.
|
||||
@ -189,17 +190,47 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
||||
if check_uncommon_codepoints
|
||||
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
|
||||
{
|
||||
let codepoints: Vec<_> = symbol_str
|
||||
let mut chars: Vec<_> = symbol_str
|
||||
.chars()
|
||||
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
|
||||
.map(|c| (c, GeneralSecurityProfile::identifier_type(c)))
|
||||
.collect();
|
||||
let codepoints_len = codepoints.len();
|
||||
|
||||
cx.emit_span_lint(
|
||||
UNCOMMON_CODEPOINTS,
|
||||
sp,
|
||||
IdentifierUncommonCodepoints { codepoints, codepoints_len },
|
||||
);
|
||||
for (id_ty, id_ty_descr) in [
|
||||
(IdentifierType::Exclusion, "Exclusion"),
|
||||
(IdentifierType::Technical, "Technical"),
|
||||
(IdentifierType::Limited_Use, "Limited_Use"),
|
||||
(IdentifierType::Not_NFKC, "Not_NFKC"),
|
||||
] {
|
||||
let codepoints: Vec<_> =
|
||||
chars.extract_if(|(_, ty)| *ty == Some(id_ty)).collect();
|
||||
if codepoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
cx.emit_span_lint(
|
||||
UNCOMMON_CODEPOINTS,
|
||||
sp,
|
||||
IdentifierUncommonCodepoints {
|
||||
codepoints_len: codepoints.len(),
|
||||
codepoints: codepoints.into_iter().map(|(c, _)| c).collect(),
|
||||
identifier_type: id_ty_descr,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let remaining = chars
|
||||
.extract_if(|(c, _)| !GeneralSecurityProfile::identifier_allowed(*c))
|
||||
.collect::<Vec<_>>();
|
||||
if !remaining.is_empty() {
|
||||
cx.emit_span_lint(
|
||||
UNCOMMON_CODEPOINTS,
|
||||
sp,
|
||||
IdentifierUncommonCodepoints {
|
||||
codepoints_len: remaining.len(),
|
||||
codepoints: remaining.into_iter().map(|(c, _)| c).collect(),
|
||||
identifier_type: "Restricted",
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,7 @@ fn invalid_emoji_usages() {
|
||||
let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
|
||||
// FIXME
|
||||
let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
|
||||
//~^ WARN: identifier contains an uncommon Unicode codepoint
|
||||
//~^ WARN: identifier contains an uncommon character: '\u{fe0f}'
|
||||
let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
|
||||
let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
|
||||
let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji
|
||||
|
@ -40,12 +40,13 @@ error: identifiers cannot contain emoji: `folded🙏🏿`
|
||||
LL | let folded🙏🏿 = "modifier sequence";
|
||||
| ^^^^^^^^^^
|
||||
|
||||
warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}'
|
||||
warning: identifier contains an uncommon character: '\u{fe0f}'
|
||||
--> $DIR/lex-emoji-identifiers.rs:6:9
|
||||
|
|
||||
LL | let key1️⃣ = "keycap sequence";
|
||||
| ^^^^
|
||||
|
|
||||
= note: this character is included in the Unicode general security profile
|
||||
= note: `#[warn(uncommon_codepoints)]` on by default
|
||||
|
||||
error: aborting due to 7 previous errors; 1 warning emitted
|
||||
|
@ -1,12 +1,13 @@
|
||||
#![deny(uncommon_codepoints)]
|
||||
|
||||
const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint
|
||||
const µ: f64 = 0.000001; //~ identifier contains a non normalized (NFKC) character: 'µ'
|
||||
//~| WARNING should have an upper case name
|
||||
|
||||
fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint
|
||||
fn dijkstra() {}
|
||||
//~^ ERROR identifier contains a non normalized (NFKC) character: 'ij'
|
||||
|
||||
fn main() {
|
||||
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
|
||||
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon characters: 'ㇻ', 'ㇲ', and 'ㇳ'
|
||||
|
||||
// using the same identifier the second time won't trigger the lint.
|
||||
println!("{}", ㇻㇲㇳ);
|
||||
|
@ -1,26 +1,31 @@
|
||||
error: identifier contains an uncommon Unicode codepoint: 'µ'
|
||||
error: identifier contains a non normalized (NFKC) character: 'µ'
|
||||
--> $DIR/lint-uncommon-codepoints.rs:3:7
|
||||
|
|
||||
LL | const µ: f64 = 0.000001;
|
||||
| ^
|
||||
|
|
||||
= note: this character is included in the Not_NFKC Unicode general security profile
|
||||
note: the lint level is defined here
|
||||
--> $DIR/lint-uncommon-codepoints.rs:1:9
|
||||
|
|
||||
LL | #![deny(uncommon_codepoints)]
|
||||
| ^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
error: identifier contains an uncommon Unicode codepoint: 'ij'
|
||||
error: identifier contains a non normalized (NFKC) character: 'ij'
|
||||
--> $DIR/lint-uncommon-codepoints.rs:6:4
|
||||
|
|
||||
LL | fn dijkstra() {}
|
||||
| ^^^^^^^
|
||||
|
|
||||
= note: this character is included in the Not_NFKC Unicode general security profile
|
||||
|
||||
error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ'
|
||||
--> $DIR/lint-uncommon-codepoints.rs:9:9
|
||||
error: identifier contains uncommon characters: 'ㇻ', 'ㇲ', and 'ㇳ'
|
||||
--> $DIR/lint-uncommon-codepoints.rs:10:9
|
||||
|
|
||||
LL | let ㇻㇲㇳ = "rust";
|
||||
| ^^^^^^
|
||||
|
|
||||
= note: these characters are included in the Unicode general security profile
|
||||
|
||||
warning: constant `µ` should have an upper case name
|
||||
--> $DIR/lint-uncommon-codepoints.rs:3:7
|
||||
|
Loading…
Reference in New Issue
Block a user