From 0bd5dd6449c9db734bd2d1700ea4b50e22b220be Mon Sep 17 00:00:00 2001
From: Simonas Kazlauskas <git@kazlauskas.me>
Date: Fri, 10 Jul 2015 21:37:21 +0300
Subject: [PATCH 1/3] Improve incomplete unicode escape reporting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This improves diagnostic messages when \u escape is used incorrectly and { is
missing. Instead of saying “unknown character escape: u”, it will now report
that unicode escape sequence is incomplete and suggest what the correct syntax
is.
---
 src/libsyntax/parse/lexer/mod.rs              | 24 +++++++++++++++----
 .../parse-fail/issue-23620-invalid-escapes.rs |  2 +-
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 507bd9de2a1..b5085b5c44c 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -172,6 +172,11 @@ impl<'a> StringReader<'a> {
         self.span_diagnostic.span_err(sp, m)
     }
 
+    /// Suggest some help with a given span.
+    pub fn help_span(&self, sp: Span, m: &str) {
+        self.span_diagnostic.span_help(sp, m)
+    }
+
     /// Report a fatal error spanning [`from_pos`, `to_pos`).
     fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
         self.fatal_span(codemap::mk_sp(from_pos, to_pos), m)
@@ -182,6 +187,11 @@ impl<'a> StringReader<'a> {
         self.err_span(codemap::mk_sp(from_pos, to_pos), m)
     }
 
+    /// Suggest some help spanning [`from_pos`, `to_pos`).
+    fn help_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
+        self.help_span(codemap::mk_sp(from_pos, to_pos), m)
+    }
+
     /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
     /// escaped character to the error message
     fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! {
@@ -742,6 +752,13 @@ impl<'a> StringReader<'a> {
                                    valid
                                 }
                             }
+                            'u' if !ascii_only => {
+                                self.err_span_(escaped_pos, self.last_pos,
+                                    "incomplete unicode escape sequence");
+                                self.help_span_(escaped_pos, self.last_pos,
+                                    "format of unicode escape sequences is `\\u{…}`");
+                                false
+                            }
                             '\n' if delim == '"' => {
                                 self.consume_whitespace();
                                 true
@@ -757,16 +774,13 @@ impl<'a> StringReader<'a> {
                                     if ascii_only { "unknown byte escape" }
                                     else { "unknown character escape" },
                                     c);
-                                let sp = codemap::mk_sp(escaped_pos, last_pos);
                                 if e == '\r' {
-                                    self.span_diagnostic.span_help(
-                                        sp,
+                                    self.help_span_(escaped_pos, last_pos,
                                         "this is an isolated carriage return; consider checking \
                                          your editor and version control settings")
                                 }
                                 if (e == '{' || e == '}') && !ascii_only {
-                                    self.span_diagnostic.span_help(
-                                        sp,
+                                    self.help_span_(escaped_pos, last_pos,
                                         "if used in a formatting string, \
                                         curly braces are escaped with `{{` and `}}`")
                                 }
diff --git a/src/test/parse-fail/issue-23620-invalid-escapes.rs b/src/test/parse-fail/issue-23620-invalid-escapes.rs
index 7930ea75bf5..98db3efe114 100644
--- a/src/test/parse-fail/issue-23620-invalid-escapes.rs
+++ b/src/test/parse-fail/issue-23620-invalid-escapes.rs
@@ -41,5 +41,5 @@ fn main() {
     //~^ ERROR illegal unicode character escape
     //~^^ ERROR illegal character in numeric character escape:
     //~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
-    //~^^^^ ERROR unknown character escape: u
+    //~^^^^ ERROR incomplete unicode escape sequence
 }

From d22f189da13f8ffb3c9227a038615608e99a6211 Mon Sep 17 00:00:00 2001
From: Simonas Kazlauskas <git@kazlauskas.me>
Date: Fri, 10 Jul 2015 21:41:37 +0300
Subject: [PATCH 2/3] Improve some of the string escape diagnostic spans

---
 src/libsyntax/parse/lexer/mod.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index b5085b5c44c..edaac3b09ba 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -742,7 +742,7 @@ impl<'a> StringReader<'a> {
                                 let valid = self.scan_unicode_escape(delim);
                                 if valid && ascii_only {
                                     self.err_span_(
-                                        escaped_pos,
+                                        start,
                                         self.last_pos,
                                         "unicode escape sequences cannot be used as a byte or in \
                                         a byte string"
@@ -753,9 +753,9 @@ impl<'a> StringReader<'a> {
                                 }
                             }
                             'u' if !ascii_only => {
-                                self.err_span_(escaped_pos, self.last_pos,
+                                self.err_span_(start, self.last_pos,
                                     "incomplete unicode escape sequence");
-                                self.help_span_(escaped_pos, self.last_pos,
+                                self.help_span_(start, self.last_pos,
                                     "format of unicode escape sequences is `\\u{…}`");
                                 false
                             }
@@ -862,14 +862,12 @@ impl<'a> StringReader<'a> {
             valid = false;
         }
 
-        self.bump(); // past the ending }
-
         if valid && (char::from_u32(accum_int).is_none() || count == 0) {
             self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape");
             valid = false;
         }
 
-
+        self.bump(); // past the ending }
         valid
     }
 

From 4d65ef45491b62fbecdb9a24822c216aa96bb34e Mon Sep 17 00:00:00 2001
From: Simonas Kazlauskas <git@kazlauskas.me>
Date: Fri, 10 Jul 2015 22:31:44 +0300
Subject: [PATCH 3/3] =?UTF-8?q?Tell=20unicode=20escapes=20can=E2=80=99t=20?=
 =?UTF-8?q?be=20used=20as=20bytes=20earlier/more?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libsyntax/parse/lexer/mod.rs              | 30 +++++++++----------
 .../parse-fail/issue-23620-invalid-escapes.rs |  8 +++--
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index edaac3b09ba..b6a3788dacc 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -738,26 +738,24 @@ impl<'a> StringReader<'a> {
                         return match e {
                             'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
                             'x' => self.scan_byte_escape(delim, !ascii_only),
-                            'u' if self.curr_is('{') => {
-                                let valid = self.scan_unicode_escape(delim);
-                                if valid && ascii_only {
-                                    self.err_span_(
-                                        start,
-                                        self.last_pos,
+                            'u' => {
+                                let valid = if self.curr_is('{') {
+                                    self.scan_unicode_escape(delim) && !ascii_only
+                                } else {
+                                    self.err_span_(start, self.last_pos,
+                                        "incorrect unicode escape sequence");
+                                    self.help_span_(start, self.last_pos,
+                                        "format of unicode escape sequences is `\\u{…}`");
+                                    false
+                                };
+                                if ascii_only {
+                                    self.err_span_(start, self.last_pos,
                                         "unicode escape sequences cannot be used as a byte or in \
                                         a byte string"
                                     );
-                                    false
-                                } else {
-                                   valid
                                 }
-                            }
-                            'u' if !ascii_only => {
-                                self.err_span_(start, self.last_pos,
-                                    "incomplete unicode escape sequence");
-                                self.help_span_(start, self.last_pos,
-                                    "format of unicode escape sequences is `\\u{…}`");
-                                false
+                                valid
+
                             }
                             '\n' if delim == '"' => {
                                 self.consume_whitespace();
diff --git a/src/test/parse-fail/issue-23620-invalid-escapes.rs b/src/test/parse-fail/issue-23620-invalid-escapes.rs
index 98db3efe114..1790b9164b7 100644
--- a/src/test/parse-fail/issue-23620-invalid-escapes.rs
+++ b/src/test/parse-fail/issue-23620-invalid-escapes.rs
@@ -16,7 +16,8 @@ fn main() {
     //~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
 
     let _ = b'\u';
-    //~^ ERROR unknown byte escape: u
+    //~^ ERROR incorrect unicode escape sequence
+    //~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
 
     let _ = b'\x5';
     //~^ ERROR numeric character escape is too short
@@ -35,11 +36,12 @@ fn main() {
     let _ = b"\u{a4a4} \xf \u";
     //~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
     //~^^ ERROR illegal character in numeric character escape:
-    //~^^^ ERROR unknown byte escape: u
+    //~^^^ ERROR incorrect unicode escape sequence
+    //~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
 
     let _ = "\u{ffffff} \xf \u";
     //~^ ERROR illegal unicode character escape
     //~^^ ERROR illegal character in numeric character escape:
     //~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
-    //~^^^^ ERROR incomplete unicode escape sequence
+    //~^^^^ ERROR incorrect unicode escape sequence
 }