Rollup merge of #120259 - HTGAzureX1212:HTGAzureX1212/split-diagnostics-uncommon-codepoints, r=Manishearth

Split Diagnostics for Uncommon Codepoints: Add List to Display Characters Involved

This Pull Request adds a list of the uncommon codepoints involved in the `uncommon_codepoints` lint, as outlined as a first step in #120228.

Example rendered diagnostic:
```
error: identifier contains an uncommon Unicode codepoint: 'µ'
  --> $DIR/lint-uncommon-codepoints.rs:3:7
   |
LL | const µ: f64 = 0.000001;
   |       ^
   |
note: the lint level is defined here
  --> $DIR/lint-uncommon-codepoints.rs:1:9
   |
LL | #![deny(uncommon_codepoints)]
   |         ^^^^^^^^^^^^^^^^^^^
```

(Retrying #120258.)
This commit is contained in:
León Orell Valerian Liehr 2024-01-23 21:53:59 +01:00 committed by GitHub
commit 4d9b983368
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 34 additions and 10 deletions

View File

@ -110,6 +110,14 @@ fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
}
}
impl IntoDiagnosticArg for Vec<char> {
fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
DiagnosticArgValue::StrListSepByAnd(
self.into_iter().map(|c| Cow::Owned(format!("{c:?}"))).collect(),
)
}
}
impl IntoDiagnosticArg for Symbol {
fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
self.to_ident_string().into_diagnostic_arg()

View File

@ -240,7 +240,10 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
lint_identifier_non_ascii_char = identifier contains non-ASCII characters
lint_identifier_uncommon_codepoints = identifier contains uncommon Unicode codepoints
lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
[one] an uncommon Unicode codepoint
*[other] uncommon Unicode codepoints
}: {$codepoints}
lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level

View File

@ -1107,7 +1107,10 @@ pub struct MultipleSupertraitUpcastable {
#[derive(LintDiagnostic)]
#[diag(lint_identifier_uncommon_codepoints)]
pub struct IdentifierUncommonCodepoints;
pub struct IdentifierUncommonCodepoints {
pub codepoints: Vec<char>,
pub codepoints_len: usize,
}
#[derive(LintDiagnostic)]
#[diag(lint_confusable_identifier_pair)]

View File

@ -190,7 +190,17 @@ fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
if check_uncommon_codepoints
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
{
cx.emit_span_lint(UNCOMMON_CODEPOINTS, sp, IdentifierUncommonCodepoints);
let codepoints: Vec<_> = symbol_str
.chars()
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
.collect();
let codepoints_len = codepoints.len();
cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints { codepoints, codepoints_len },
);
}
}

View File

@ -4,7 +4,7 @@ fn invalid_emoji_usages() {
let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
// FIXME
let key1 = "keycap sequence"; //~ ERROR: unknown start of token
//~^ WARN: identifier contains uncommon Unicode codepoints
//~^ WARN: identifier contains an uncommon Unicode codepoint
let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji

View File

@ -40,7 +40,7 @@ error: identifiers cannot contain emoji: `folded🙏🏿`
LL | let folded🙏🏿 = "modifier sequence";
| ^^^^^^^^^^
warning: identifier contains uncommon Unicode codepoints
warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}'
--> $DIR/lex-emoji-identifiers.rs:6:9
|
LL | let key1⃣ = "keycap sequence";

View File

@ -1,9 +1,9 @@
#![deny(uncommon_codepoints)]
const µ: f64 = 0.000001; //~ ERROR identifier contains uncommon Unicode codepoints
const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint
//~| WARNING should have an upper case name
fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints
fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint
fn main() {
let = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints

View File

@ -1,4 +1,4 @@
error: identifier contains uncommon Unicode codepoints
error: identifier contains an uncommon Unicode codepoint: 'µ'
--> $DIR/lint-uncommon-codepoints.rs:3:7
|
LL | const µ: f64 = 0.000001;
@ -10,13 +10,13 @@ note: the lint level is defined here
LL | #![deny(uncommon_codepoints)]
| ^^^^^^^^^^^^^^^^^^^
error: identifier contains uncommon Unicode codepoints
error: identifier contains an uncommon Unicode codepoint: 'ij'
--> $DIR/lint-uncommon-codepoints.rs:6:4
|
LL | fn dijkstra() {}
| ^^^^^^^
error: identifier contains uncommon Unicode codepoints
error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ'
--> $DIR/lint-uncommon-codepoints.rs:9:9
|
LL | let ㇻㇲㇳ = "rust";