Rollup merge of #103651 - Alexendoo:parse-format-unicode-escapes, r=wesleywiser
Fix `rustc_parse_format` spans following escaped utf-8 multibyte chars Currently too many skips are created for char escapes that are larger than 1 byte when encoded in UTF-8, [playground:](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=c77a9dc669b69b167271b59ed2c8d88c) ```rust fn main() { format!("\u{df}{a}"); format!("\u{211d}{a}"); format!("\u{1f4a3}{a}"); } ``` ``` error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope --> src/main.rs:2:22 | 2 | format!("\u{df}{a}"); | ^ not found in this scope error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope --> src/main.rs:3:25 | 3 | format!("\u{211d}{a}"); | ^ not found in this scope error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope --> src/main.rs:4:27 | 4 | format!("\u{1f4a3}{a}"); | ^ not found in this scope ``` This reduces the number of skips to account for that Fixes https://github.com/rust-lang/rust-clippy/issues/9727
This commit is contained in:
commit
4946ee7c8f
@ -819,19 +819,19 @@ fn find_skips_from_snippet(
|
||||
};
|
||||
|
||||
fn find_skips(snippet: &str, is_raw: bool) -> Vec<usize> {
|
||||
let mut s = snippet.char_indices().peekable();
|
||||
let mut s = snippet.char_indices();
|
||||
let mut skips = vec![];
|
||||
while let Some((pos, c)) = s.next() {
|
||||
match (c, s.peek()) {
|
||||
match (c, s.clone().next()) {
|
||||
// skip whitespace and empty lines ending in '\\'
|
||||
('\\', Some((next_pos, '\n'))) if !is_raw => {
|
||||
skips.push(pos);
|
||||
skips.push(*next_pos);
|
||||
skips.push(next_pos);
|
||||
let _ = s.next();
|
||||
|
||||
while let Some((pos, c)) = s.peek() {
|
||||
while let Some((pos, c)) = s.clone().next() {
|
||||
if matches!(c, ' ' | '\n' | '\t') {
|
||||
skips.push(*pos);
|
||||
skips.push(pos);
|
||||
let _ = s.next();
|
||||
} else {
|
||||
break;
|
||||
@ -839,7 +839,7 @@ fn find_skips(snippet: &str, is_raw: bool) -> Vec<usize> {
|
||||
}
|
||||
}
|
||||
('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
|
||||
skips.push(*next_pos);
|
||||
skips.push(next_pos);
|
||||
let _ = s.next();
|
||||
}
|
||||
('\\', Some((_, 'x'))) if !is_raw => {
|
||||
@ -858,19 +858,30 @@ fn find_skips(snippet: &str, is_raw: bool) -> Vec<usize> {
|
||||
}
|
||||
if let Some((next_pos, next_c)) = s.next() {
|
||||
if next_c == '{' {
|
||||
skips.push(next_pos);
|
||||
let mut i = 0; // consume up to 6 hexanumeric chars + closing `}`
|
||||
while let (Some((next_pos, c)), true) = (s.next(), i < 7) {
|
||||
if c.is_digit(16) {
|
||||
skips.push(next_pos);
|
||||
} else if c == '}' {
|
||||
skips.push(next_pos);
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
// consume up to 6 hexanumeric chars
|
||||
let digits_len =
|
||||
s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count();
|
||||
|
||||
let len_utf8 = s
|
||||
.as_str()
|
||||
.get(..digits_len)
|
||||
.and_then(|digits| u32::from_str_radix(digits, 16).ok())
|
||||
.and_then(char::from_u32)
|
||||
.map_or(1, char::len_utf8);
|
||||
|
||||
// Skip the digits, for chars that encode to more than 1 utf-8 byte
|
||||
// exclude as many digits as it is greater than 1 byte
|
||||
//
|
||||
// So for a 3 byte character, exclude 2 digits
|
||||
let required_skips =
|
||||
digits_len.saturating_sub(len_utf8.saturating_sub(1));
|
||||
|
||||
// skip '{' and '}' also
|
||||
for pos in (next_pos..).take(required_skips + 2) {
|
||||
skips.push(pos)
|
||||
}
|
||||
|
||||
s.nth(digits_len);
|
||||
} else if next_c.is_digit(16) {
|
||||
skips.push(next_pos);
|
||||
// We suggest adding `{` and `}` when appropriate, accept it here as if
|
||||
|
19
src/test/ui/fmt/unicode-escape-spans.rs
Normal file
19
src/test/ui/fmt/unicode-escape-spans.rs
Normal file
@ -0,0 +1,19 @@
|
||||
fn main() {
|
||||
// 1 byte in UTF-8
|
||||
format!("\u{000041}{a}"); //~ ERROR cannot find value
|
||||
format!("\u{0041}{a}"); //~ ERROR cannot find value
|
||||
format!("\u{41}{a}"); //~ ERROR cannot find value
|
||||
format!("\u{0}{a}"); //~ ERROR cannot find value
|
||||
|
||||
// 2 bytes
|
||||
format!("\u{0df}{a}"); //~ ERROR cannot find value
|
||||
format!("\u{df}{a}"); //~ ERROR cannot find value
|
||||
|
||||
// 3 bytes
|
||||
format!("\u{00211d}{a}"); //~ ERROR cannot find value
|
||||
format!("\u{211d}{a}"); //~ ERROR cannot find value
|
||||
|
||||
// 4 bytes
|
||||
format!("\u{1f4a3}{a}"); //~ ERROR cannot find value
|
||||
format!("\u{10ffff}{a}"); //~ ERROR cannot find value
|
||||
}
|
63
src/test/ui/fmt/unicode-escape-spans.stderr
Normal file
63
src/test/ui/fmt/unicode-escape-spans.stderr
Normal file
@ -0,0 +1,63 @@
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:3:25
|
||||
|
|
||||
LL | format!("\u{000041}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:4:23
|
||||
|
|
||||
LL | format!("\u{0041}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:5:21
|
||||
|
|
||||
LL | format!("\u{41}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:6:20
|
||||
|
|
||||
LL | format!("\u{0}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:9:22
|
||||
|
|
||||
LL | format!("\u{0df}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:10:21
|
||||
|
|
||||
LL | format!("\u{df}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:13:25
|
||||
|
|
||||
LL | format!("\u{00211d}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:14:23
|
||||
|
|
||||
LL | format!("\u{211d}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:17:24
|
||||
|
|
||||
LL | format!("\u{1f4a3}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `a` in this scope
|
||||
--> $DIR/unicode-escape-spans.rs:18:25
|
||||
|
|
||||
LL | format!("\u{10ffff}{a}");
|
||||
| ^ not found in this scope
|
||||
|
||||
error: aborting due to 10 previous errors
|
||||
|
||||
For more information about this error, try `rustc --explain E0425`.
|
Loading…
Reference in New Issue
Block a user