From 0bd5dd6449c9db734bd2d1700ea4b50e22b220be Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 10 Jul 2015 21:37:21 +0300 Subject: [PATCH 1/3] Improve incomplete unicode escape reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves diagnostic messages when \u escape is used incorrectly and { is missing. Instead of saying “unknown character escape: u”, it will now report that unicode escape sequence is incomplete and suggest what the correct syntax is. --- src/libsyntax/parse/lexer/mod.rs | 24 +++++++++++++++---- .../parse-fail/issue-23620-invalid-escapes.rs | 2 +- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 507bd9de2a1..b5085b5c44c 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -172,6 +172,11 @@ impl<'a> StringReader<'a> { self.span_diagnostic.span_err(sp, m) } + /// Suggest some help with a given span. + pub fn help_span(&self, sp: Span, m: &str) { + self.span_diagnostic.span_help(sp, m) + } + /// Report a fatal error spanning [`from_pos`, `to_pos`). fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! { self.fatal_span(codemap::mk_sp(from_pos, to_pos), m) @@ -182,6 +187,11 @@ impl<'a> StringReader<'a> { self.err_span(codemap::mk_sp(from_pos, to_pos), m) } + /// Suggest some help spanning [`from_pos`, `to_pos`). + fn help_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { + self.help_span(codemap::mk_sp(from_pos, to_pos), m) + } + /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an /// escaped character to the error message fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! { @@ -742,6 +752,13 @@ impl<'a> StringReader<'a> { valid } } + 'u' if !ascii_only => { + self.err_span_(escaped_pos, self.last_pos, + "incomplete unicode escape sequence"); + self.help_span_(escaped_pos, self.last_pos, + "format of unicode escape sequences is `\\u{…}`"); + false + } '\n' if delim == '"' => { self.consume_whitespace(); true @@ -757,16 +774,13 @@ impl<'a> StringReader<'a> { if ascii_only { "unknown byte escape" } else { "unknown character escape" }, c); - let sp = codemap::mk_sp(escaped_pos, last_pos); if e == '\r' { - self.span_diagnostic.span_help( - sp, + self.help_span_(escaped_pos, last_pos, "this is an isolated carriage return; consider checking \ your editor and version control settings") } if (e == '{' || e == '}') && !ascii_only { - self.span_diagnostic.span_help( - sp, + self.help_span_(escaped_pos, last_pos, "if used in a formatting string, \ curly braces are escaped with `{{` and `}}`") } diff --git a/src/test/parse-fail/issue-23620-invalid-escapes.rs b/src/test/parse-fail/issue-23620-invalid-escapes.rs index 7930ea75bf5..98db3efe114 100644 --- a/src/test/parse-fail/issue-23620-invalid-escapes.rs +++ b/src/test/parse-fail/issue-23620-invalid-escapes.rs @@ -41,5 +41,5 @@ fn main() { //~^ ERROR illegal unicode character escape //~^^ ERROR illegal character in numeric character escape: //~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f] - //~^^^^ ERROR unknown character escape: u + //~^^^^ ERROR incomplete unicode escape sequence } From d22f189da13f8ffb3c9227a038615608e99a6211 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 10 Jul 2015 21:41:37 +0300 Subject: [PATCH 2/3] Improve some of the string escape diagnostic spans --- src/libsyntax/parse/lexer/mod.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b5085b5c44c..edaac3b09ba 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -742,7 +742,7 @@ impl<'a> StringReader<'a> { let valid = self.scan_unicode_escape(delim); if valid && ascii_only { self.err_span_( - escaped_pos, + start, self.last_pos, "unicode escape sequences cannot be used as a byte or in \ a byte string" @@ -753,9 +753,9 @@ impl<'a> StringReader<'a> { } } 'u' if !ascii_only => { - self.err_span_(escaped_pos, self.last_pos, + self.err_span_(start, self.last_pos, "incomplete unicode escape sequence"); - self.help_span_(escaped_pos, self.last_pos, + self.help_span_(start, self.last_pos, "format of unicode escape sequences is `\\u{…}`"); false } @@ -862,14 +862,12 @@ impl<'a> StringReader<'a> { valid = false; } - self.bump(); // past the ending } - if valid && (char::from_u32(accum_int).is_none() || count == 0) { self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape"); valid = false; } - + self.bump(); // past the ending } valid } From 4d65ef45491b62fbecdb9a24822c216aa96bb34e Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 10 Jul 2015 22:31:44 +0300 Subject: [PATCH 3/3] =?UTF-8?q?Tell=20unicode=20escapes=20can=E2=80=99t=20?= =?UTF-8?q?be=20used=20as=20bytes=20earlier/more?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libsyntax/parse/lexer/mod.rs | 30 +++++++++---------- .../parse-fail/issue-23620-invalid-escapes.rs | 8 +++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index edaac3b09ba..b6a3788dacc 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -738,26 +738,24 @@ impl<'a> StringReader<'a> { return match e { 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, 'x' => self.scan_byte_escape(delim, !ascii_only), - 'u' if self.curr_is('{') => { - let valid = self.scan_unicode_escape(delim); - if valid && ascii_only { - self.err_span_( - start, - self.last_pos, + 'u' => { + let valid = if self.curr_is('{') { + self.scan_unicode_escape(delim) && !ascii_only + } else { + self.err_span_(start, self.last_pos, + "incorrect unicode escape sequence"); + self.help_span_(start, self.last_pos, + "format of unicode escape sequences is `\\u{…}`"); + false + }; + if ascii_only { + self.err_span_(start, self.last_pos, "unicode escape sequences cannot be used as a byte or in \ a byte string" ); - false - } else { - valid } - } - 'u' if !ascii_only => { - self.err_span_(start, self.last_pos, - "incomplete unicode escape sequence"); - self.help_span_(start, self.last_pos, - "format of unicode escape sequences is `\\u{…}`"); - false + valid + } '\n' if delim == '"' => { self.consume_whitespace(); diff --git a/src/test/parse-fail/issue-23620-invalid-escapes.rs b/src/test/parse-fail/issue-23620-invalid-escapes.rs index 98db3efe114..1790b9164b7 100644 --- a/src/test/parse-fail/issue-23620-invalid-escapes.rs +++ b/src/test/parse-fail/issue-23620-invalid-escapes.rs @@ -16,7 +16,8 @@ fn main() { //~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string let _ = b'\u'; - //~^ ERROR unknown byte escape: u + //~^ ERROR incorrect unicode escape sequence + //~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string let _ = b'\x5'; //~^ ERROR numeric character escape is too short @@ -35,11 +36,12 @@ fn main() { let _ = b"\u{a4a4} \xf \u"; //~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string //~^^ ERROR illegal character in numeric character escape: - //~^^^ ERROR unknown byte escape: u + //~^^^ ERROR incorrect unicode escape sequence + //~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string let _ = "\u{ffffff} \xf \u"; //~^ ERROR illegal unicode character escape //~^^ ERROR illegal character in numeric character escape: //~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f] - //~^^^^ ERROR incomplete unicode escape sequence + //~^^^^ ERROR incorrect unicode escape sequence }