Auto merge of #26947 - nagisa:unicode-escape-error, r=nrc
Inspired by the now-mysteriously-closed https://github.com/rust-lang/rust/pull/26782. This PR introduces better error messages when unicode escapes have invalid format (e.g. `\uFFFF`). It also makes rustc always tell the user that escape may not be used in byte-strings and bytes and fixes some spans to not include unecessary characters and include escape backslash in some others.
This commit is contained in:
commit
07be6299d8
@ -172,6 +172,11 @@ impl<'a> StringReader<'a> {
|
||||
self.span_diagnostic.span_err(sp, m)
|
||||
}
|
||||
|
||||
/// Suggest some help with a given span.
|
||||
pub fn help_span(&self, sp: Span, m: &str) {
|
||||
self.span_diagnostic.span_help(sp, m)
|
||||
}
|
||||
|
||||
/// Report a fatal error spanning [`from_pos`, `to_pos`).
|
||||
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
|
||||
self.fatal_span(codemap::mk_sp(from_pos, to_pos), m)
|
||||
@ -182,6 +187,11 @@ impl<'a> StringReader<'a> {
|
||||
self.err_span(codemap::mk_sp(from_pos, to_pos), m)
|
||||
}
|
||||
|
||||
/// Suggest some help spanning [`from_pos`, `to_pos`).
|
||||
fn help_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
|
||||
self.help_span(codemap::mk_sp(from_pos, to_pos), m)
|
||||
}
|
||||
|
||||
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
|
||||
/// escaped character to the error message
|
||||
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! {
|
||||
@ -728,19 +738,24 @@ impl<'a> StringReader<'a> {
|
||||
return match e {
|
||||
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
|
||||
'x' => self.scan_byte_escape(delim, !ascii_only),
|
||||
'u' if self.curr_is('{') => {
|
||||
let valid = self.scan_unicode_escape(delim);
|
||||
if valid && ascii_only {
|
||||
self.err_span_(
|
||||
escaped_pos,
|
||||
self.last_pos,
|
||||
'u' => {
|
||||
let valid = if self.curr_is('{') {
|
||||
self.scan_unicode_escape(delim) && !ascii_only
|
||||
} else {
|
||||
self.err_span_(start, self.last_pos,
|
||||
"incorrect unicode escape sequence");
|
||||
self.help_span_(start, self.last_pos,
|
||||
"format of unicode escape sequences is `\\u{…}`");
|
||||
false
|
||||
};
|
||||
if ascii_only {
|
||||
self.err_span_(start, self.last_pos,
|
||||
"unicode escape sequences cannot be used as a byte or in \
|
||||
a byte string"
|
||||
);
|
||||
false
|
||||
} else {
|
||||
valid
|
||||
}
|
||||
valid
|
||||
|
||||
}
|
||||
'\n' if delim == '"' => {
|
||||
self.consume_whitespace();
|
||||
@ -757,16 +772,13 @@ impl<'a> StringReader<'a> {
|
||||
if ascii_only { "unknown byte escape" }
|
||||
else { "unknown character escape" },
|
||||
c);
|
||||
let sp = codemap::mk_sp(escaped_pos, last_pos);
|
||||
if e == '\r' {
|
||||
self.span_diagnostic.span_help(
|
||||
sp,
|
||||
self.help_span_(escaped_pos, last_pos,
|
||||
"this is an isolated carriage return; consider checking \
|
||||
your editor and version control settings")
|
||||
}
|
||||
if (e == '{' || e == '}') && !ascii_only {
|
||||
self.span_diagnostic.span_help(
|
||||
sp,
|
||||
self.help_span_(escaped_pos, last_pos,
|
||||
"if used in a formatting string, \
|
||||
curly braces are escaped with `{{` and `}}`")
|
||||
}
|
||||
@ -848,14 +860,12 @@ impl<'a> StringReader<'a> {
|
||||
valid = false;
|
||||
}
|
||||
|
||||
self.bump(); // past the ending }
|
||||
|
||||
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
|
||||
self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape");
|
||||
valid = false;
|
||||
}
|
||||
|
||||
|
||||
self.bump(); // past the ending }
|
||||
valid
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,8 @@ fn main() {
|
||||
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
let _ = b'\u';
|
||||
//~^ ERROR unknown byte escape: u
|
||||
//~^ ERROR incorrect unicode escape sequence
|
||||
//~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
let _ = b'\x5';
|
||||
//~^ ERROR numeric character escape is too short
|
||||
@ -35,11 +36,12 @@ fn main() {
|
||||
let _ = b"\u{a4a4} \xf \u";
|
||||
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
//~^^ ERROR illegal character in numeric character escape:
|
||||
//~^^^ ERROR unknown byte escape: u
|
||||
//~^^^ ERROR incorrect unicode escape sequence
|
||||
//~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
let _ = "\u{ffffff} \xf \u";
|
||||
//~^ ERROR illegal unicode character escape
|
||||
//~^^ ERROR illegal character in numeric character escape:
|
||||
//~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
//~^^^^ ERROR unknown character escape: u
|
||||
//~^^^^ ERROR incorrect unicode escape sequence
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user