diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 1394993abad..54bf4d1d6b7 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -819,19 +819,19 @@ fn find_skips_from_snippet( }; fn find_skips(snippet: &str, is_raw: bool) -> Vec { - let mut s = snippet.char_indices().peekable(); + let mut s = snippet.char_indices(); let mut skips = vec![]; while let Some((pos, c)) = s.next() { - match (c, s.peek()) { + match (c, s.clone().next()) { // skip whitespace and empty lines ending in '\\' ('\\', Some((next_pos, '\n'))) if !is_raw => { skips.push(pos); - skips.push(*next_pos); + skips.push(next_pos); let _ = s.next(); - while let Some((pos, c)) = s.peek() { + while let Some((pos, c)) = s.clone().next() { if matches!(c, ' ' | '\n' | '\t') { - skips.push(*pos); + skips.push(pos); let _ = s.next(); } else { break; @@ -839,7 +839,7 @@ fn find_skips(snippet: &str, is_raw: bool) -> Vec { } } ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { - skips.push(*next_pos); + skips.push(next_pos); let _ = s.next(); } ('\\', Some((_, 'x'))) if !is_raw => { @@ -858,19 +858,30 @@ fn find_skips(snippet: &str, is_raw: bool) -> Vec { } if let Some((next_pos, next_c)) = s.next() { if next_c == '{' { - skips.push(next_pos); - let mut i = 0; // consume up to 6 hexanumeric chars + closing `}` - while let (Some((next_pos, c)), true) = (s.next(), i < 7) { - if c.is_digit(16) { - skips.push(next_pos); - } else if c == '}' { - skips.push(next_pos); - break; - } else { - break; - } - i += 1; + // consume up to 6 hexanumeric chars + let digits_len = + s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count(); + + let len_utf8 = s + .as_str() + .get(..digits_len) + .and_then(|digits| u32::from_str_radix(digits, 16).ok()) + .and_then(char::from_u32) + .map_or(1, char::len_utf8); + + // Skip the digits, for chars that encode to more than 1 utf-8 byte + // exclude as many digits as it is greater than 1 byte + // + // So for a 3 byte character, exclude 2 digits + let required_skips = + digits_len.saturating_sub(len_utf8.saturating_sub(1)); + + // skip '{' and '}' also + for pos in (next_pos..).take(required_skips + 2) { + skips.push(pos) } + + s.nth(digits_len); } else if next_c.is_digit(16) { skips.push(next_pos); // We suggest adding `{` and `}` when appropriate, accept it here as if diff --git a/src/test/ui/fmt/unicode-escape-spans.rs b/src/test/ui/fmt/unicode-escape-spans.rs new file mode 100644 index 00000000000..753d91ce58e --- /dev/null +++ b/src/test/ui/fmt/unicode-escape-spans.rs @@ -0,0 +1,19 @@ +fn main() { + // 1 byte in UTF-8 + format!("\u{000041}{a}"); //~ ERROR cannot find value + format!("\u{0041}{a}"); //~ ERROR cannot find value + format!("\u{41}{a}"); //~ ERROR cannot find value + format!("\u{0}{a}"); //~ ERROR cannot find value + + // 2 bytes + format!("\u{0df}{a}"); //~ ERROR cannot find value + format!("\u{df}{a}"); //~ ERROR cannot find value + + // 3 bytes + format!("\u{00211d}{a}"); //~ ERROR cannot find value + format!("\u{211d}{a}"); //~ ERROR cannot find value + + // 4 bytes + format!("\u{1f4a3}{a}"); //~ ERROR cannot find value + format!("\u{10ffff}{a}"); //~ ERROR cannot find value +} diff --git a/src/test/ui/fmt/unicode-escape-spans.stderr b/src/test/ui/fmt/unicode-escape-spans.stderr new file mode 100644 index 00000000000..1d8473f01b8 --- /dev/null +++ b/src/test/ui/fmt/unicode-escape-spans.stderr @@ -0,0 +1,63 @@ +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:3:25 + | +LL | format!("\u{000041}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:4:23 + | +LL | format!("\u{0041}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:5:21 + | +LL | format!("\u{41}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:6:20 + | +LL | format!("\u{0}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:9:22 + | +LL | format!("\u{0df}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:10:21 + | +LL | format!("\u{df}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:13:25 + | +LL | format!("\u{00211d}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:14:23 + | +LL | format!("\u{211d}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:17:24 + | +LL | format!("\u{1f4a3}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:18:25 + | +LL | format!("\u{10ffff}{a}"); + | ^ not found in this scope + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0425`.