From bccdba02960b3cd428addbc2c856065ebb81eb04 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Fri, 6 Jun 2014 16:04:04 +0100
Subject: [PATCH 1/7] Add a b'x' byte literal of type u8.

---
 src/librustc/middle/const_eval.rs       |  1 +
 src/librustc/middle/lint.rs             |  1 +
 src/librustc/middle/trans/consts.rs     |  1 +
 src/librustc/middle/typeck/check/mod.rs |  1 +
 src/librustdoc/clean/mod.rs             |  8 +++
 src/librustdoc/html/highlight.rs        |  2 +-
 src/libsyntax/ast.rs                    |  1 +
 src/libsyntax/ext/concat.rs             |  1 +
 src/libsyntax/ext/quote.rs              |  6 +++
 src/libsyntax/parse/lexer/mod.rs        | 68 +++++++++++++++++++++++--
 src/libsyntax/parse/parser.rs           |  3 +-
 src/libsyntax/parse/token.rs            | 11 ++++
 src/libsyntax/print/pprust.rs           |  6 +++
 src/test/compile-fail/byte-literals.rs  | 25 +++++++++
 src/test/compile-fail/concat.rs         |  1 +
 src/test/run-pass/byte-literals.rs      | 38 ++++++++++++++
 16 files changed, 169 insertions(+), 5 deletions(-)
 create mode 100644 src/test/compile-fail/byte-literals.rs
 create mode 100644 src/test/run-pass/byte-literals.rs

diff --git a/src/librustc/middle/const_eval.rs b/src/librustc/middle/const_eval.rs
index 13d0443a00f..3c5b0664f03 100644
--- a/src/librustc/middle/const_eval.rs
+++ b/src/librustc/middle/const_eval.rs
@@ -506,6 +506,7 @@ pub fn lit_to_const(lit: &Lit) -> const_val {
         LitBinary(ref data) => {
             const_binary(Rc::new(data.iter().map(|x| *x).collect()))
         }
+        LitByte(n) => const_uint(n as u64),
         LitChar(n) => const_uint(n as u64),
         LitInt(n, _) => const_int(n),
         LitUint(n, _) => const_uint(n),
diff --git a/src/librustc/middle/lint.rs b/src/librustc/middle/lint.rs
index 392821a6ad3..4c11693e7a6 100644
--- a/src/librustc/middle/lint.rs
+++ b/src/librustc/middle/lint.rs
@@ -805,6 +805,7 @@ fn check_type_limits(cx: &Context, e: &ast::Expr) {
                     } else { t };
                     let (min, max) = uint_ty_range(uint_type);
                     let lit_val: u64 = match lit.node {
+                        ast::LitByte(_v) => return,  // _v is u8, within range by definition
                         ast::LitInt(v, _) => v as u64,
                         ast::LitUint(v, _) => v,
                         ast::LitIntUnsuffixed(v) => v as u64,
diff --git a/src/librustc/middle/trans/consts.rs b/src/librustc/middle/trans/consts.rs
index 45019edc58b..f5e66a527e7 100644
--- a/src/librustc/middle/trans/consts.rs
+++ b/src/librustc/middle/trans/consts.rs
@@ -43,6 +43,7 @@ pub fn const_lit(cx: &CrateContext, e: &ast::Expr, lit: ast::Lit)
     -> ValueRef {
     let _icx = push_ctxt("trans_lit");
     match lit.node {
+        ast::LitByte(b) => C_integral(Type::uint_from_ty(cx, ast::TyU8), b as u64, false),
         ast::LitChar(i) => C_integral(Type::char(cx), i as u64, false),
         ast::LitInt(i, t) => C_integral(Type::int_from_ty(cx, t), i as u64, true),
         ast::LitUint(u, t) => C_integral(Type::uint_from_ty(cx, t), u, false),
diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs
index 73b92e5b868..2516a00ff76 100644
--- a/src/librustc/middle/typeck/check/mod.rs
+++ b/src/librustc/middle/typeck/check/mod.rs
@@ -1715,6 +1715,7 @@ pub fn check_lit(fcx: &FnCtxt, lit: &ast::Lit) -> ty::t {
         ast::LitBinary(..) => {
             ty::mk_slice(tcx, ty::ReStatic, ty::mt{ ty: ty::mk_u8(), mutbl: ast::MutImmutable })
         }
+        ast::LitByte(_) => ty::mk_u8(),
         ast::LitChar(_) => ty::mk_char(),
         ast::LitInt(_, t) => ty::mk_mach_int(t),
         ast::LitUint(_, t) => ty::mk_mach_uint(t),
diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs
index 823e0f6a1b3..5e84a90121f 100644
--- a/src/librustdoc/clean/mod.rs
+++ b/src/librustdoc/clean/mod.rs
@@ -1924,6 +1924,14 @@ fn lit_to_str(lit: &ast::Lit) -> String {
     match lit.node {
         ast::LitStr(ref st, _) => st.get().to_string(),
         ast::LitBinary(ref data) => format!("{:?}", data.as_slice()),
+        ast::LitByte(b) => {
+            let mut res = String::from_str("b'");
+            (b as char).escape_default(|c| {
+                res.push_char(c);
+            });
+            res.push_char('\'');
+            res
+        },
         ast::LitChar(c) => format!("'{}'", c),
         ast::LitInt(i, _t) => i.to_str(),
         ast::LitUint(u, _t) => u.to_str(),
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index f0d7b029deb..8a63b55afed 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -140,7 +140,7 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
             }
 
             // text literals
-            t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
+            t::LIT_BYTE(..) | t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
 
             // number literals
             t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |
diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs
index 86dd736ceea..aeafc0e306c 100644
--- a/src/libsyntax/ast.rs
+++ b/src/libsyntax/ast.rs
@@ -616,6 +616,7 @@ pub type Lit = Spanned<Lit_>;
 pub enum Lit_ {
     LitStr(InternedString, StrStyle),
     LitBinary(Rc<Vec<u8> >),
+    LitByte(u8),
     LitChar(char),
     LitInt(i64, IntTy),
     LitUint(u64, UintTy),
diff --git a/src/libsyntax/ext/concat.rs b/src/libsyntax/ext/concat.rs
index 83f45ca9f16..670e38327d6 100644
--- a/src/libsyntax/ext/concat.rs
+++ b/src/libsyntax/ext/concat.rs
@@ -47,6 +47,7 @@ pub fn expand_syntax_ext(cx: &mut base::ExtCtxt,
                     ast::LitBool(b) => {
                         accumulator.push_str(format!("{}", b).as_slice());
                     }
+                    ast::LitByte(..) |
                     ast::LitBinary(..) => {
                         cx.span_err(e.span, "cannot concatenate a binary literal");
                     }
diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs
index 6514d8fa418..407715ab4da 100644
--- a/src/libsyntax/ext/quote.rs
+++ b/src/libsyntax/ext/quote.rs
@@ -436,6 +436,12 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc<ast::Expr> {
                                 vec!(mk_binop(cx, sp, binop)));
         }
 
+        LIT_BYTE(i) => {
+            let e_byte = cx.expr_lit(sp, ast::LitByte(i));
+
+            return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte));
+        }
+
         LIT_CHAR(i) => {
             let e_char = cx.expr_lit(sp, ast::LitChar(i));
 
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index f7eac0b323f..7e4cb195cea 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -650,10 +650,13 @@ impl<'a> StringReader<'a> {
     /// token, and updates the interner
     fn next_token_inner(&mut self) -> token::Token {
         let c = self.curr;
-        if ident_start(c) && !self.nextch_is('"') && !self.nextch_is('#') {
+        if ident_start(c) && match (c.unwrap(), self.nextch()) {
             // Note: r as in r" or r#" is part of a raw string literal,
-            // not an identifier, and is handled further down.
-
+            // b as in b' is part of a byte literal.
+            // They are not identifiers, and are handled further down.
+           ('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false,
+           _ => true
+        } {
             let start = self.last_pos;
             while ident_continue(self.curr) {
                 self.bump();
@@ -854,6 +857,65 @@ impl<'a> StringReader<'a> {
             self.bump(); // advance curr past token
             return token::LIT_CHAR(c2);
           }
+          'b' => {
+            self.bump();
+            assert!(self.curr_is('\''), "Should have been a token::IDENT");
+            self.bump();
+            let start = self.last_pos;
+
+            // the eof will be picked up by the final `'` check below
+            let mut c2 = self.curr.unwrap_or('\x00');
+            self.bump();
+
+            match c2 {
+                '\\' => {
+                    // '\X' for some X must be a character constant:
+                    let escaped = self.curr;
+                    let escaped_pos = self.last_pos;
+                    self.bump();
+                    match escaped {
+                        None => {}
+                        Some(e) => {
+                            c2 = match e {
+                                'n' => '\n',
+                                'r' => '\r',
+                                't' => '\t',
+                                '\\' => '\\',
+                                '\'' => '\'',
+                                '"' => '"',
+                                '0' => '\x00',
+                                'x' => self.scan_numeric_escape(2u, '\''),
+                                c2 => {
+                                    self.err_span_char(escaped_pos, self.last_pos,
+                                                       "unknown byte escape", c2);
+                                    c2
+                                }
+                            }
+                        }
+                    }
+                }
+                '\t' | '\n' | '\r' | '\'' => {
+                    self.err_span_char( start, self.last_pos,
+                        "byte constant must be escaped", c2);
+                }
+                _ if c2 > '\x7F' => {
+                    self.err_span_char( start, self.last_pos,
+                        "byte constant must be ASCII. \
+                         Use a \\xHH escape for a non-ASCII byte", c2);
+                }
+                _ => {}
+            }
+            if !self.curr_is('\'') {
+                self.fatal_span_verbose(
+                                   // Byte offsetting here is okay because the
+                                   // character before position `start` are an
+                                   // ascii single quote and ascii 'b'.
+                                   start - BytePos(2), self.last_pos,
+                                   "unterminated byte constant".to_string());
+            }
+            self.bump(); // advance curr past token
+            return token::LIT_BYTE(c2 as u8);
+          }
           '"' => {
             let mut accum_str = String::new();
             let start_bpos = self.last_pos;
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index bbe0680ef14..0bd47ede214 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -33,7 +33,7 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod};
 use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic};
 use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl};
 use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_};
-use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar};
+use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte};
 use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet};
 use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal};
 use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability};
@@ -1512,6 +1512,7 @@ impl<'a> Parser<'a> {
     // matches token_lit = LIT_INT | ...
     pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
         match *tok {
+            token::LIT_BYTE(i) => LitByte(i),
             token::LIT_CHAR(i) => LitChar(i),
             token::LIT_INT(i, it) => LitInt(i, it),
             token::LIT_UINT(u, ut) => LitUint(u, ut),
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index a4a022708d9..b8f13624a32 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -78,6 +78,7 @@ pub enum Token {
     DOLLAR,
 
     /* Literals */
+    LIT_BYTE(u8),
     LIT_CHAR(char),
     LIT_INT(i64, ast::IntTy),
     LIT_UINT(u64, ast::UintTy),
@@ -193,6 +194,14 @@ pub fn to_str(t: &Token) -> String {
       DOLLAR => "$".to_string(),
 
       /* Literals */
+      LIT_BYTE(b) => {
+          let mut res = String::from_str("b'");
+          (b as char).escape_default(|c| {
+              res.push_char(c);
+          });
+          res.push_char('\'');
+          res
+      }
       LIT_CHAR(c) => {
           let mut res = String::from_str("'");
           c.escape_default(|c| {
@@ -273,6 +282,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       IDENT(_, _) => true,
       UNDERSCORE => true,
       TILDE => true,
+      LIT_BYTE(_) => true,
       LIT_CHAR(_) => true,
       LIT_INT(_, _) => true,
       LIT_UINT(_, _) => true,
@@ -311,6 +321,7 @@ pub fn close_delimiter_for(t: &Token) -> Option<Token> {
 
 pub fn is_lit(t: &Token) -> bool {
     match *t {
+      LIT_BYTE(_) => true,
       LIT_CHAR(_) => true,
       LIT_INT(_, _) => true,
       LIT_UINT(_, _) => true,
diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs
index badfbe7eb15..6ea2eed293e 100644
--- a/src/libsyntax/print/pprust.rs
+++ b/src/libsyntax/print/pprust.rs
@@ -2305,6 +2305,12 @@ impl<'a> State<'a> {
         }
         match lit.node {
             ast::LitStr(ref st, style) => self.print_string(st.get(), style),
+            ast::LitByte(byte) => {
+                let mut res = String::from_str("b'");
+                (byte as char).escape_default(|c| res.push_char(c));
+                res.push_char('\'');
+                word(&mut self.s, res.as_slice())
+            }
             ast::LitChar(ch) => {
                 let mut res = String::from_str("'");
                 ch.escape_default(|c| res.push_char(c));
diff --git a/src/test/compile-fail/byte-literals.rs b/src/test/compile-fail/byte-literals.rs
new file mode 100644
index 00000000000..436078fa762
--- /dev/null
+++ b/src/test/compile-fail/byte-literals.rs
@@ -0,0 +1,25 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+// ignore-tidy-tab
+
+static FOO: u8 = b'\f';  //~ ERROR unknown byte escape
+
+pub fn main() {
+    b'\f';  //~ ERROR unknown byte escape
+    b'\x0Z';  //~ ERROR illegal character in numeric character escape: Z
+    b'	';  //~ ERROR byte constant must be escaped
+    b''';  //~ ERROR byte constant must be escaped
+    b'é';  //~ ERROR byte constant must be ASCII
+    b'a  //~ ERROR unterminated byte constant
+}
+
+
diff --git a/src/test/compile-fail/concat.rs b/src/test/compile-fail/concat.rs
index c34e402c90b..a3dc1174424 100644
--- a/src/test/compile-fail/concat.rs
+++ b/src/test/compile-fail/concat.rs
@@ -9,6 +9,7 @@
 // except according to those terms.
 
 fn main() {
+    concat!(b'f');  //~ ERROR: cannot concatenate a binary literal
     concat!(foo);   //~ ERROR: expected a literal
     concat!(foo()); //~ ERROR: expected a literal
 }
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs
new file mode 100644
index 00000000000..560b2f0337a
--- /dev/null
+++ b/src/test/run-pass/byte-literals.rs
@@ -0,0 +1,38 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+static FOO: u8 = b'\xF0';
+
+pub fn main() {
+    assert_eq!(b'a', 97u8);
+    assert_eq!(b'\n', 10u8);
+    assert_eq!(b'\r', 13u8);
+    assert_eq!(b'\t', 9u8);
+    assert_eq!(b'\\', 92u8);
+    assert_eq!(b'\'', 39u8);
+    assert_eq!(b'\"', 34u8);
+    assert_eq!(b'\0', 0u8);
+    assert_eq!(b'\xF0', 240u8);
+    assert_eq!(FOO, 240u8);
+
+    // FIXME: Do we want this to be valid?
+    assert_eq!([42, ..b'\t'].as_slice(), &[42, 42, 42, 42, 42, 42, 42, 42, 42]);
+
+    match 42 {
+        b'*' => {},
+        _ => fail!()
+    }
+
+    match 100 {
+        b'a' .. b'z' => {},
+        _ => fail!()
+    }
+}

From d7e01b5809cd600a30bab29da698acb3d1b52409 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Sat, 7 Jun 2014 15:32:01 +0100
Subject: [PATCH 2/7] Add a b"xx" byte string literal of type &'static [u8].

---
 src/libcore/str.rs                            |   4 +
 src/libregex_macros/lib.rs                    |   2 +-
 src/librustc/middle/const_eval.rs             |   1 +
 src/librustc/middle/trans/_match.rs           |  17 +-
 src/librustdoc/html/highlight.rs              |   3 +-
 src/libsyntax/parse/lexer/mod.rs              | 157 ++++++++++++------
 src/libsyntax/parse/parser.rs                 |   3 +-
 src/libsyntax/parse/token.rs                  |  16 +-
 src/libsyntax/print/pprust.rs                 |  16 +-
 src/test/compile-fail/byte-string-literals.rs |  23 +++
 src/test/compile-fail/concat.rs               |   1 +
 src/test/run-pass/byte-literals.rs            |  12 ++
 12 files changed, 185 insertions(+), 70 deletions(-)
 create mode 100644 src/test/compile-fail/byte-string-literals.rs

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index c01997f1c42..84ffb7fb20e 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -560,6 +560,8 @@ Section: Comparing strings
 
 // share the implementation of the lang-item vs. non-lang-item
 // eq_slice.
+/// NOTE: This function is (ab)used in rustc::middle::trans::_match
+/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
 #[inline]
 fn eq_slice_(a: &str, b: &str) -> bool {
     #[allow(ctypes)]
@@ -572,6 +574,8 @@ fn eq_slice_(a: &str, b: &str) -> bool {
 }
 
 /// Bytewise slice equality
+/// NOTE: This function is (ab)used in rustc::middle::trans::_match
+/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
 #[cfg(not(test))]
 #[lang="str_eq"]
 #[inline]
diff --git a/src/libregex_macros/lib.rs b/src/libregex_macros/lib.rs
index 8641936cc34..ff5cada05ea 100644
--- a/src/libregex_macros/lib.rs
+++ b/src/libregex_macros/lib.rs
@@ -182,7 +182,7 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
         #[allow(unused_variable)]
         fn run(&mut self, start: uint, end: uint) -> Vec<Option<uint>> {
             let mut matched = false;
-            let prefix_bytes: &[u8] = &$prefix_bytes;
+            let prefix_bytes: &[u8] = $prefix_bytes;
             let mut clist = &mut Threads::new(self.which);
             let mut nlist = &mut Threads::new(self.which);
 
diff --git a/src/librustc/middle/const_eval.rs b/src/librustc/middle/const_eval.rs
index 3c5b0664f03..72def2c10da 100644
--- a/src/librustc/middle/const_eval.rs
+++ b/src/librustc/middle/const_eval.rs
@@ -529,6 +529,7 @@ pub fn compare_const_vals(a: &const_val, b: &const_val) -> Option<int> {
         (&const_float(a), &const_float(b)) => compare_vals(a, b),
         (&const_str(ref a), &const_str(ref b)) => compare_vals(a, b),
         (&const_bool(a), &const_bool(b)) => compare_vals(a, b),
+        (&const_binary(ref a), &const_binary(ref b)) => compare_vals(a, b),
         _ => None
     }
 }
diff --git a/src/librustc/middle/trans/_match.rs b/src/librustc/middle/trans/_match.rs
index 9361d64250c..808d894be43 100644
--- a/src/librustc/middle/trans/_match.rs
+++ b/src/librustc/middle/trans/_match.rs
@@ -1273,13 +1273,24 @@ fn compare_values<'a>(
                     val: bool_to_i1(result.bcx, result.val)
                 }
             }
-            _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+            _ => cx.sess().bug("only strings supported in compare_values"),
         },
         ty::ty_rptr(_, mt) => match ty::get(mt.ty).sty {
             ty::ty_str => compare_str(cx, lhs, rhs, rhs_t),
-            _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+            ty::ty_vec(mt, _) => match ty::get(mt.ty).sty {
+                ty::ty_uint(ast::TyU8) => {
+                    // NOTE: cast &[u8] to &str and abuse the str_eq lang item,
+                    // which calls memcmp().
+                    let t = ty::mk_str_slice(cx.tcx(), ty::ReStatic, ast::MutImmutable);
+                    let lhs = BitCast(cx, lhs, type_of::type_of(cx.ccx(), t).ptr_to());
+                    let rhs = BitCast(cx, rhs, type_of::type_of(cx.ccx(), t).ptr_to());
+                    compare_str(cx, lhs, rhs, rhs_t)
+                },
+                _ => cx.sess().bug("only byte strings supported in compare_values"),
+            },
+            _ => cx.sess().bug("on string and byte strings supported in compare_values"),
         },
-        _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+        _ => cx.sess().bug("only scalars, byte strings, and strings supported in compare_values"),
     }
 }
 
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index 8a63b55afed..172a1be7b4e 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -140,7 +140,8 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
             }
 
             // text literals
-            t::LIT_BYTE(..) | t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
+            t::LIT_BYTE(..) | t::LIT_BINARY(..) |
+                t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
 
             // number literals
             t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 7e4cb195cea..59bcf059fcd 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -654,7 +654,8 @@ impl<'a> StringReader<'a> {
             // Note: r as in r" or r#" is part of a raw string literal,
             // b as in b' is part of a byte literal.
             // They are not identifiers, and are handled further down.
-           ('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false,
+           ('r', Some('"')) | ('r', Some('#')) |
+           ('b', Some('"')) | ('b', Some('\'')) => false,
            _ => true
         } {
             let start = self.last_pos;
@@ -859,62 +860,124 @@ impl<'a> StringReader<'a> {
           }
           'b' => {
             self.bump();
-            assert!(self.curr_is('\''), "Should have been a token::IDENT");
-            self.bump();
-            let start = self.last_pos;
+            return match self.curr {
+                Some('\'') => parse_byte(self),
+                Some('"') => parse_byte_string(self),
+                _ => unreachable!()  // Should have been a token::IDENT above.
+            };
 
-            // the eof will be picked up by the final `'` check below
-            let mut c2 = self.curr.unwrap_or('\x00');
-            self.bump();
+            fn parse_byte(self_: &mut StringReader) -> token::Token {
+                self_.bump();
+                let start = self_.last_pos;
 
-            match c2 {
-                '\\' => {
-                    // '\X' for some X must be a character constant:
-                    let escaped = self.curr;
-                    let escaped_pos = self.last_pos;
-                    self.bump();
-                    match escaped {
-                        None => {}
-                        Some(e) => {
-                            c2 = match e {
-                                'n' => '\n',
-                                'r' => '\r',
-                                't' => '\t',
-                                '\\' => '\\',
-                                '\'' => '\'',
-                                '"' => '"',
-                                '0' => '\x00',
-                                'x' => self.scan_numeric_escape(2u, '\''),
-                                c2 => {
-                                    self.err_span_char(escaped_pos, self.last_pos,
-                                                       "unknown byte escape", c2);
-                                    c2
+                // the eof will be picked up by the final `'` check below
+                let mut c2 = self_.curr.unwrap_or('\x00');
+                self_.bump();
+
+                match c2 {
+                    '\\' => {
+                        // '\X' for some X must be a character constant:
+                        let escaped = self_.curr;
+                        let escaped_pos = self_.last_pos;
+                        self_.bump();
+                        match escaped {
+                            None => {}
+                            Some(e) => {
+                                c2 = match e {
+                                    'n' => '\n',
+                                    'r' => '\r',
+                                    't' => '\t',
+                                    '\\' => '\\',
+                                    '\'' => '\'',
+                                    '"' => '"',
+                                    '0' => '\x00',
+                                    'x' => self_.scan_numeric_escape(2u, '\''),
+                                    c2 => {
+                                        self_.err_span_char(
+                                            escaped_pos, self_.last_pos,
+                                            "unknown byte escape", c2);
+                                        c2
+                                    }
                                 }
                             }
                         }
                     }
+                    '\t' | '\n' | '\r' | '\'' => {
+                        self_.err_span_char( start, self_.last_pos,
+                            "byte constant must be escaped", c2);
+                    }
+                    _ => if c2 > '\x7F' {
+                        self_.err_span_char( start, self_.last_pos,
+                            "byte constant must be ASCII. \
+                             Use a \\xHH escape for a non-ASCII byte", c2);
+                    }
                 }
-                '\t' | '\n' | '\r' | '\'' => {
-                    self.err_span_char( start, self.last_pos,
-                        "byte constant must be escaped", c2);
+                if !self_.curr_is('\'') {
+                    // Byte offsetting here is okay because the
+                    // character before position `start` are an
+                    // ascii single quote and ascii 'b'.
+                    self_.fatal_span_verbose(
+                        start - BytePos(2), self_.last_pos,
+                        "unterminated byte constant".to_string());
                 }
-                _ if c2 > '\x7F' => {
-                    self.err_span_char( start, self.last_pos,
-                        "byte constant must be ASCII. \
-                         Use a \\xHH escape for a non-ASCII byte", c2);
-                }
-                _ => {}
+                self_.bump(); // advance curr past token
+                return token::LIT_BYTE(c2 as u8);
             }
-            if !self.curr_is('\'') {
-                self.fatal_span_verbose(
-                                   // Byte offsetting here is okay because the
-                                   // character before position `start` are an
-                                   // ascii single quote and ascii 'b'.
-                                   start - BytePos(2), self.last_pos,
-                                   "unterminated byte constant".to_string());
+
+            fn parse_byte_string(self_: &mut StringReader) -> token::Token {
+                self_.bump();
+                let start = self_.last_pos;
+                let mut value = Vec::new();
+                while !self_.curr_is('"') {
+                    if self_.is_eof() {
+                        self_.fatal_span(start, self_.last_pos,
+                                         "unterminated double quote byte string");
+                    }
+
+                    let ch = self_.curr.unwrap();
+                    self_.bump();
+                    match ch {
+                      '\\' => {
+                        if self_.is_eof() {
+                            self_.fatal_span(start, self_.last_pos,
+                                             "unterminated double quote byte string");
+                        }
+
+                        let escaped = self_.curr.unwrap();
+                        let escaped_pos = self_.last_pos;
+                        self_.bump();
+                        match escaped {
+                          'n' => value.push('\n' as u8),
+                          'r' => value.push('\r' as u8),
+                          't' => value.push('\t' as u8),
+                          '\\' => value.push('\\' as u8),
+                          '\'' => value.push('\'' as u8),
+                          '"' => value.push('"' as u8),
+                          '\n' => self_.consume_whitespace(),
+                          '0' => value.push(0),
+                          'x' => {
+                            value.push(self_.scan_numeric_escape(2u, '"') as u8);
+                          }
+                          c2 => {
+                            self_.err_span_char(escaped_pos, self_.last_pos,
+                                                "unknown byte string escape", c2);
+                          }
+                        }
+                      }
+                      _ => {
+                        if ch <= '\x7F' {
+                            value.push(ch as u8)
+                        } else {
+                            self_.err_span_char(self_.last_pos, self_.last_pos,
+                                "byte string must be ASCII. \
+                                 Use a \\xHH escape for a non-ASCII byte", ch);
+                        }
+                      }
+                    }
+                }
+                self_.bump();
+                return token::LIT_BINARY(Rc::new(value));
             }
-            self.bump(); // advance curr past token
-            return token::LIT_BYTE(c2 as u8);
           }
           '"' => {
             let mut accum_str = String::new();
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 0bd47ede214..826d28ef3ff 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -33,7 +33,7 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod};
 use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic};
 use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl};
 use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_};
-use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte};
+use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte, LitBinary};
 use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet};
 use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal};
 use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability};
@@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> {
             token::LIT_STR_RAW(s, n) => {
                 LitStr(self.id_to_interned_str(s), ast::RawStr(n))
             }
+            token::LIT_BINARY(ref v) => LitBinary(v.clone()),
             token::LPAREN => { self.expect(&token::RPAREN); LitNil },
             _ => { self.unexpected_last(tok); }
         }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index b8f13624a32..b76dcaf0b94 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -87,6 +87,7 @@ pub enum Token {
     LIT_FLOAT_UNSUFFIXED(ast::Ident),
     LIT_STR(ast::Ident),
     LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
+    LIT_BINARY(Rc<Vec<u8>>),
 
     /* Name components */
     // an identifier contains an "is_mod_name" boolean,
@@ -231,17 +232,22 @@ pub fn to_str(t: &Token) -> String {
         body
       }
       LIT_STR(s) => {
-          (format!("\"{}\"", get_ident(s).get().escape_default())).to_string()
+          format!("\"{}\"", get_ident(s).get().escape_default())
       }
       LIT_STR_RAW(s, n) => {
-          (format!("r{delim}\"{string}\"{delim}",
-                  delim="#".repeat(n), string=get_ident(s))).to_string()
+        format!("r{delim}\"{string}\"{delim}",
+                 delim="#".repeat(n), string=get_ident(s))
+      }
+      LIT_BINARY(ref v) => {
+          format!(
+            "b\"{}\"",
+            v.iter().map(|&b| b as char).collect::<String>().escape_default())
       }
 
       /* Name components */
       IDENT(s, _) => get_ident(s).get().to_string(),
       LIFETIME(s) => {
-          (format!("{}", get_ident(s))).to_string()
+          format!("{}", get_ident(s))
       }
       UNDERSCORE => "_".to_string(),
 
@@ -291,6 +297,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       LIT_FLOAT_UNSUFFIXED(_) => true,
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
+      LIT_BINARY(_) => true,
       POUND => true,
       AT => true,
       NOT => true,
@@ -330,6 +337,7 @@ pub fn is_lit(t: &Token) -> bool {
       LIT_FLOAT_UNSUFFIXED(_) => true,
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
+      LIT_BINARY(_) => true,
       _ => false
     }
 }
diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs
index 6ea2eed293e..fafebd3c5dc 100644
--- a/src/libsyntax/print/pprust.rs
+++ b/src/libsyntax/print/pprust.rs
@@ -2342,19 +2342,9 @@ impl<'a> State<'a> {
             ast::LitBool(val) => {
                 if val { word(&mut self.s, "true") } else { word(&mut self.s, "false") }
             }
-            ast::LitBinary(ref arr) => {
-                try!(self.ibox(indent_unit));
-                try!(word(&mut self.s, "["));
-                try!(self.commasep_cmnt(Inconsistent,
-                                        arr.as_slice(),
-                                        |s, u| {
-                                            word(&mut s.s,
-                                                 format!("{}",
-                                                         *u).as_slice())
-                                        },
-                                        |_| lit.span));
-                try!(word(&mut self.s, "]"));
-                self.end()
+            ast::LitBinary(ref v) => {
+                let escaped: String = v.iter().map(|&b| b as char).collect();
+                word(&mut self.s, format!("b\"{}\"", escaped.escape_default()).as_slice())
             }
         }
     }
diff --git a/src/test/compile-fail/byte-string-literals.rs b/src/test/compile-fail/byte-string-literals.rs
new file mode 100644
index 00000000000..ec67cdd77e1
--- /dev/null
+++ b/src/test/compile-fail/byte-string-literals.rs
@@ -0,0 +1,23 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+// ignore-tidy-tab
+
+static FOO: &'static [u8] = b"\f";  //~ ERROR unknown byte escape
+
+pub fn main() {
+    b"\f";  //~ ERROR unknown byte escape
+    b"\x0Z";  //~ ERROR illegal character in numeric character escape: Z
+    b"é";  //~ ERROR byte constant must be ASCII
+    b"a  //~ ERROR unterminated double quote byte string
+}
+
+
diff --git a/src/test/compile-fail/concat.rs b/src/test/compile-fail/concat.rs
index a3dc1174424..dc31126e6d6 100644
--- a/src/test/compile-fail/concat.rs
+++ b/src/test/compile-fail/concat.rs
@@ -10,6 +10,7 @@
 
 fn main() {
     concat!(b'f');  //~ ERROR: cannot concatenate a binary literal
+    concat!(b"foo");  //~ ERROR: cannot concatenate a binary literal
     concat!(foo);   //~ ERROR: expected a literal
     concat!(foo()); //~ ERROR: expected a literal
 }
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs
index 560b2f0337a..58df7dc8efd 100644
--- a/src/test/run-pass/byte-literals.rs
+++ b/src/test/run-pass/byte-literals.rs
@@ -10,6 +10,7 @@
 
 
 static FOO: u8 = b'\xF0';
+static BAR: &'static [u8] = b"a\xF0\t";
 
 pub fn main() {
     assert_eq!(b'a', 97u8);
@@ -35,4 +36,15 @@ pub fn main() {
         b'a' .. b'z' => {},
         _ => fail!()
     }
+
+    assert_eq!(b"a\n\r\t\\\'\"\0\xF0",
+               &[97u8, 10u8, 13u8, 9u8, 92u8, 39u8, 34u8, 0u8, 240u8]);
+    assert_eq!(b"a\
+                 b", &[97u8, 98u8]);
+    assert_eq!(BAR, &[97u8, 240u8, 9u8]);
+
+    match &[97u8, 10u8] {
+        b"a\n" => {},
+        _ => fail!(),
+    }
 }

From b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Fri, 13 Jun 2014 18:56:24 +0100
Subject: [PATCH 3/7] Add br##"xx"## raw byte string literals.

---
 src/librustdoc/html/highlight.rs              |  2 +-
 src/libsyntax/parse/lexer/mod.rs              | 56 ++++++++++++++++++-
 src/libsyntax/parse/parser.rs                 |  1 +
 src/libsyntax/parse/token.rs                  |  7 +++
 src/test/compile-fail/raw-byte-string-eof.rs  | 16 ++++++
 .../compile-fail/raw-byte-string-literals.rs  | 17 ++++++
 src/test/run-pass/byte-literals.rs            |  8 ++-
 7 files changed, 102 insertions(+), 5 deletions(-)
 create mode 100644 src/test/compile-fail/raw-byte-string-eof.rs
 create mode 100644 src/test/compile-fail/raw-byte-string-literals.rs

diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index 172a1be7b4e..daa9ee3da84 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -140,7 +140,7 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
             }
 
             // text literals
-            t::LIT_BYTE(..) | t::LIT_BINARY(..) |
+            t::LIT_BYTE(..) | t::LIT_BINARY(..) | t::LIT_BINARY_RAW(..) |
                 t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
 
             // number literals
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 59bcf059fcd..31f15fd7495 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -650,12 +650,13 @@ impl<'a> StringReader<'a> {
     /// token, and updates the interner
     fn next_token_inner(&mut self) -> token::Token {
         let c = self.curr;
-        if ident_start(c) && match (c.unwrap(), self.nextch()) {
+        if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) {
             // Note: r as in r" or r#" is part of a raw string literal,
             // b as in b' is part of a byte literal.
             // They are not identifiers, and are handled further down.
-           ('r', Some('"')) | ('r', Some('#')) |
-           ('b', Some('"')) | ('b', Some('\'')) => false,
+           ('r', Some('"'), _) | ('r', Some('#'), _) |
+           ('b', Some('"'), _) | ('b', Some('\''), _) |
+           ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false,
            _ => true
         } {
             let start = self.last_pos;
@@ -863,6 +864,7 @@ impl<'a> StringReader<'a> {
             return match self.curr {
                 Some('\'') => parse_byte(self),
                 Some('"') => parse_byte_string(self),
+                Some('r') => parse_raw_byte_string(self),
                 _ => unreachable!()  // Should have been a token::IDENT above.
             };
 
@@ -978,6 +980,54 @@ impl<'a> StringReader<'a> {
                 self_.bump();
                 return token::LIT_BINARY(Rc::new(value));
             }
+
+            fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token {
+                let start_bpos = self_.last_pos;
+                self_.bump();
+                let mut hash_count = 0u;
+                while self_.curr_is('#') {
+                    self_.bump();
+                    hash_count += 1;
+                }
+
+                if self_.is_eof() {
+                    self_.fatal_span(start_bpos, self_.last_pos, "unterminated raw string");
+                } else if !self_.curr_is('"') {
+                    self_.fatal_span_char(start_bpos, self_.last_pos,
+                                    "only `#` is allowed in raw string delimitation; \
+                                     found illegal character",
+                                    self_.curr.unwrap());
+                }
+                self_.bump();
+                let content_start_bpos = self_.last_pos;
+                let mut content_end_bpos;
+                'outer: loop {
+                    match self_.curr {
+                        None => self_.fatal_span(start_bpos, self_.last_pos,
+                                                 "unterminated raw string"),
+                        Some('"') => {
+                            content_end_bpos = self_.last_pos;
+                            for _ in range(0, hash_count) {
+                                self_.bump();
+                                if !self_.curr_is('#') {
+                                    continue 'outer;
+                                }
+                            }
+                            break;
+                        },
+                        Some(c) => if c > '\x7F' {
+                            self_.err_span_char(self_.last_pos, self_.last_pos,
+                                                "raw byte string must be ASCII", c);
+                        }
+                    }
+                    self_.bump();
+                }
+                self_.bump();
+                let bytes = self_.with_str_from_to(content_start_bpos,
+                                                   content_end_bpos,
+                                                   |s| s.as_bytes().to_owned());
+                return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
+            }
           }
           '"' => {
             let mut accum_str = String::new();
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 826d28ef3ff..ae2ec216bee 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> {
             token::LIT_STR_RAW(s, n) => {
                 LitStr(self.id_to_interned_str(s), ast::RawStr(n))
             }
+            token::LIT_BINARY_RAW(ref v, _) |
             token::LIT_BINARY(ref v) => LitBinary(v.clone()),
             token::LPAREN => { self.expect(&token::RPAREN); LitNil },
             _ => { self.unexpected_last(tok); }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index b76dcaf0b94..a2af417ed79 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -88,6 +88,7 @@ pub enum Token {
     LIT_STR(ast::Ident),
     LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
     LIT_BINARY(Rc<Vec<u8>>),
+    LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
 
     /* Name components */
     // an identifier contains an "is_mod_name" boolean,
@@ -243,6 +244,10 @@ pub fn to_str(t: &Token) -> String {
             "b\"{}\"",
             v.iter().map(|&b| b as char).collect::<String>().escape_default())
       }
+      LIT_BINARY_RAW(ref s, n) => {
+        format!("br{delim}\"{string}\"{delim}",
+                 delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
+      }
 
       /* Name components */
       IDENT(s, _) => get_ident(s).get().to_string(),
@@ -298,6 +303,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
       LIT_BINARY(_) => true,
+      LIT_BINARY_RAW(_, _) => true,
       POUND => true,
       AT => true,
       NOT => true,
@@ -338,6 +344,7 @@ pub fn is_lit(t: &Token) -> bool {
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
       LIT_BINARY(_) => true,
+      LIT_BINARY_RAW(_, _) => true,
       _ => false
     }
 }
diff --git a/src/test/compile-fail/raw-byte-string-eof.rs b/src/test/compile-fail/raw-byte-string-eof.rs
new file mode 100644
index 00000000000..83ea9db39b7
--- /dev/null
+++ b/src/test/compile-fail/raw-byte-string-eof.rs
@@ -0,0 +1,16 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+pub fn main() {
+    br##"a"#;  //~ unterminated raw string
+}
+
+
diff --git a/src/test/compile-fail/raw-byte-string-literals.rs b/src/test/compile-fail/raw-byte-string-literals.rs
new file mode 100644
index 00000000000..7a3d1b2318a
--- /dev/null
+++ b/src/test/compile-fail/raw-byte-string-literals.rs
@@ -0,0 +1,17 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+pub fn main() {
+    br"é";  //~ raw byte string must be ASCII
+    br##~"a"~##;  //~ only `#` is allowed in raw string delimitation
+}
+
+
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs
index 58df7dc8efd..5317fdc391f 100644
--- a/src/test/run-pass/byte-literals.rs
+++ b/src/test/run-pass/byte-literals.rs
@@ -11,6 +11,7 @@
 
 static FOO: u8 = b'\xF0';
 static BAR: &'static [u8] = b"a\xF0\t";
+static BAZ: &'static [u8] = br"a\n";
 
 pub fn main() {
     assert_eq!(b'a', 97u8);
@@ -24,7 +25,6 @@ pub fn main() {
     assert_eq!(b'\xF0', 240u8);
     assert_eq!(FOO, 240u8);
 
-    // FIXME: Do we want this to be valid?
     assert_eq!([42, ..b'\t'].as_slice(), &[42, 42, 42, 42, 42, 42, 42, 42, 42]);
 
     match 42 {
@@ -47,4 +47,10 @@ pub fn main() {
         b"a\n" => {},
         _ => fail!(),
     }
+
+    assert_eq!(BAZ, &[97u8, 92u8, 110u8]);
+    assert_eq!(br"a\n", &[97u8, 92u8, 110u8]);
+    assert_eq!(br"a\n", b"a\\n");
+    assert_eq!(br###"a"##b"###, &[97u8, 34u8, 35u8, 35u8, 98u8]);
+    assert_eq!(br###"a"##b"###, b"a\"##b");
 }

From 3a52a8a8b8079de795dabdd35985f9f663aa0b5d Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Fri, 13 Jun 2014 20:26:37 +0100
Subject: [PATCH 4/7] Document the byte, byte string, and raw byte string
 literals.

---
 src/doc/rust.md | 71 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 65 insertions(+), 6 deletions(-)

diff --git a/src/doc/rust.md b/src/doc/rust.md
index 8f47e81ba5c..97757a53e4c 100644
--- a/src/doc/rust.md
+++ b/src/doc/rust.md
@@ -234,7 +234,7 @@ rule. A literal is a form of constant expression, so is evaluated (primarily)
 at compile time.
 
 ~~~~ {.ebnf .gram}
-literal : string_lit | char_lit | num_lit ;
+literal : string_lit | char_lit | byte_string_lit | byte_lit | num_lit ;
 ~~~~
 
 #### Character and string literals
@@ -244,17 +244,17 @@ char_lit : '\x27' char_body '\x27' ;
 string_lit : '"' string_body * '"' | 'r' raw_string ;
 
 char_body : non_single_quote
-          | '\x5c' [ '\x27' | common_escape ] ;
+          | '\x5c' [ '\x27' | common_escape | unicode_escape ] ;
 
 string_body : non_double_quote
-            | '\x5c' [ '\x22' | common_escape ] ;
+            | '\x5c' [ '\x22' | common_escape | unicode_escape ] ;
 raw_string : '"' raw_string_body '"' | '#' raw_string '#' ;
 
 common_escape : '\x5c'
               | 'n' | 'r' | 't' | '0'
               | 'x' hex_digit 2
-              | 'u' hex_digit 4
-              | 'U' hex_digit 8 ;
+unicode_escape : 'u' hex_digit 4
+               | 'U' hex_digit 8 ;
 
 hex_digit : 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
           | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'
@@ -294,7 +294,7 @@ the following forms:
     escaped in order to denote *itself*.
 
 Raw string literals do not process any escapes. They start with the character
-`U+0072` (`r`), followed zero or more of the character `U+0023` (`#`) and a
+`U+0072` (`r`), followed by zero or more of the character `U+0023` (`#`) and a
 `U+0022` (double-quote) character. The _raw string body_ is not defined in the
 EBNF grammar above: it can contain any sequence of Unicode characters and is
 terminated only by another `U+0022` (double-quote) character, followed by the
@@ -319,6 +319,65 @@ r##"foo #"# bar"##;                // foo #"# bar
 "\\x52"; r"\x52";                  // \x52
 ~~~~
 
+#### Byte and byte string literals
+
+~~~~ {.ebnf .gram}
+byte_lit : 'b' '\x27' byte_body '\x27' ;
+byte_string_lit : 'b' '"' string_body * '"' | 'b' 'r' raw_byte_string ;
+
+byte_body : ascii_non_single_quote
+          | '\x5c' [ '\x27' | common_escape ] ;
+
+byte_string_body : ascii_non_double_quote
+            | '\x5c' [ '\x22' | common_escape ] ;
+raw_byte_string : '"' raw_byte_string_body '"' | '#' raw_byte_string '#' ;
+
+~~~~
+
+A _byte literal_ is a single ASCII character (in the `U+0000` to `U+007F` range)
+enclosed within two `U+0027` (single-quote) characters,
+with the exception of `U+0027` itself,
+which must be _escaped_ by a preceding U+005C character (`\`),
+or a single _escape_.
+It is equivalent to a `u8` unsigned 8-bit integer _number literal_.
+
+A _byte string literal_ is a sequence of ASCII characters and _escapes_
+enclosed within two `U+0022` (double-quote) characters,
+with the exception of `U+0022` itself,
+which must be _escaped_ by a preceding `U+005C` character (`\`),
+or a _raw byte string literal_.
+It is equivalent to a `&'static [u8]` borrowed vectior unsigned 8-bit integers.
+
+Some additional _escapes_ are available in either byte or non-raw byte string
+literals. An escape starts with a `U+005C` (`\`) and continues with one of
+the following forms:
+
+  * An _byte escape_ escape starts with `U+0078` (`x`) and is
+    followed by exactly two _hex digits_. It denotes the byte
+    equal to the provided hex value.
+  * A _whitespace escape_ is one of the characters `U+006E` (`n`), `U+0072`
+    (`r`), or `U+0074` (`t`), denoting the bytes values `0x0A` (ASCII LF),
+    `0x0D` (ASCII CR) or `0x09` (ASCII HT) respectively.
+  * The _backslash escape_ is the character `U+005C` (`\`) which must be
+    escaped in order to denote its ASCII encoding `0x5C`.
+
+Raw byte string literals do not process any escapes.
+They start with the character `U+0072` (`r`),
+followed by `U+0062` (`b`),
+followed by zero or more of the character `U+0023` (`#`),
+and a `U+0022` (double-quote) character.
+The _raw string body_ is not defined in the EBNF grammar above:
+it can contain any sequence of ASCII characters and is
+terminated only by another `U+0022` (double-quote) character, followed by the
+same number of `U+0023` (`#`) characters that preceded the opening `U+0022`
+(double-quote) character.
+A raw byte string literal can not contain any non-ASCII byte.
+
+All characters contained in the raw string body represent their ASCII encoding,
+the characters `U+0022` (double-quote) (except when followed by at least as
+many `U+0023` (`#`) characters as were used to start the raw string literal) or
+`U+005C` (`\`) do not have any special meaning.
+
 #### Number literals
 
 ~~~~ {.ebnf .gram}

From 612bbaf7a07fe247e5e2d057cc4f10742918ead0 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Tue, 17 Jun 2014 17:58:13 +0200
Subject: [PATCH 5/7] Refactor backslash-escape parsing to share similar code.

Move into a new syntax::parse::lexer::StringReader method the code
that was almost duplicated for parsing backslash-escapes in
byte, byte string, char, and string literals.
---
 src/libsyntax/parse/lexer/mod.rs | 220 ++++++++++---------------------
 1 file changed, 69 insertions(+), 151 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 31f15fd7495..f67b77d64dd 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -636,6 +636,67 @@ impl<'a> StringReader<'a> {
         }
     }
 
+    /// Scan for a single (possibly escaped) byte or char
+    /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
+    /// `start` is the position of `first_source_char`, which is already consumed.
+    fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
+                         ascii_only: bool, delim: char) -> Option<char> {
+        match first_source_char {
+            '\\' => {
+                // '\X' for some X must be a character constant:
+                let escaped = self.curr;
+                let escaped_pos = self.last_pos;
+                self.bump();
+                match escaped {
+                    None => {},  // EOF here is an error that will be checked later.
+                    Some(e) => {
+                        return Some(match e {
+                            'n' => '\n',
+                            'r' => '\r',
+                            't' => '\t',
+                            '\\' => '\\',
+                            '\'' => '\'',
+                            '"' => '"',
+                            '0' => '\x00',
+                            'x' => self.scan_numeric_escape(2u, delim),
+                            'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
+                            'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
+                            '\n' if delim == '"' => {
+                                self.consume_whitespace();
+                                return None
+                            },
+                            c => {
+                                let last_pos = self.last_pos;
+                                self.err_span_char(
+                                    escaped_pos, last_pos,
+                                    if ascii_only { "unknown byte escape" }
+                                    else { "unknown character escape" },
+                                    c);
+                                c
+                            }
+                        })
+                    }
+                }
+            }
+            '\t' | '\n' | '\r' | '\'' if delim == '\'' => {
+                let last_pos = self.last_pos;
+                self.err_span_char(
+                    start, last_pos,
+                    if ascii_only { "byte constant must be escaped" }
+                    else { "character constant must be escaped" },
+                    first_source_char);
+            }
+            _ => if ascii_only && first_source_char > '\x7F' {
+                let last_pos = self.last_pos;
+                self.err_span_char(
+                    start, last_pos,
+                    "byte constant must be ASCII. \
+                     Use a \\xHH escape for a non-ASCII byte", first_source_char);
+            }
+        }
+        Some(first_source_char)
+    }
+
     fn binop(&mut self, op: token::BinOp) -> token::Token {
         self.bump();
         if self.curr_is('=') {
@@ -810,43 +871,7 @@ impl<'a> StringReader<'a> {
             }
 
             // Otherwise it is a character constant:
-            match c2 {
-                '\\' => {
-                    // '\X' for some X must be a character constant:
-                    let escaped = self.curr;
-                    let escaped_pos = self.last_pos;
-                    self.bump();
-                    match escaped {
-                        None => {}
-                        Some(e) => {
-                            c2 = match e {
-                                'n' => '\n',
-                                'r' => '\r',
-                                't' => '\t',
-                                '\\' => '\\',
-                                '\'' => '\'',
-                                '"' => '"',
-                                '0' => '\x00',
-                                'x' => self.scan_numeric_escape(2u, '\''),
-                                'u' => self.scan_numeric_escape(4u, '\''),
-                                'U' => self.scan_numeric_escape(8u, '\''),
-                                c2 => {
-                                    let last_bpos = self.last_pos;
-                                    self.err_span_char(escaped_pos, last_bpos,
-                                                         "unknown character escape", c2);
-                                    c2
-                                }
-                            }
-                        }
-                    }
-                }
-                '\t' | '\n' | '\r' | '\'' => {
-                    let last_bpos = self.last_pos;
-                    self.err_span_char( start, last_bpos,
-                        "character constant must be escaped", c2);
-                }
-                _ => {}
-            }
+            c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
             if !self.curr_is('\'') {
                 let last_bpos = self.last_pos;
                 self.fatal_span_verbose(
@@ -876,44 +901,7 @@ impl<'a> StringReader<'a> {
                 let mut c2 = self_.curr.unwrap_or('\x00');
                 self_.bump();
 
-                match c2 {
-                    '\\' => {
-                        // '\X' for some X must be a character constant:
-                        let escaped = self_.curr;
-                        let escaped_pos = self_.last_pos;
-                        self_.bump();
-                        match escaped {
-                            None => {}
-                            Some(e) => {
-                                c2 = match e {
-                                    'n' => '\n',
-                                    'r' => '\r',
-                                    't' => '\t',
-                                    '\\' => '\\',
-                                    '\'' => '\'',
-                                    '"' => '"',
-                                    '0' => '\x00',
-                                    'x' => self_.scan_numeric_escape(2u, '\''),
-                                    c2 => {
-                                        self_.err_span_char(
-                                            escaped_pos, self_.last_pos,
-                                            "unknown byte escape", c2);
-                                        c2
-                                    }
-                                }
-                            }
-                        }
-                    }
-                    '\t' | '\n' | '\r' | '\'' => {
-                        self_.err_span_char( start, self_.last_pos,
-                            "byte constant must be escaped", c2);
-                    }
-                    _ => if c2 > '\x7F' {
-                        self_.err_span_char( start, self_.last_pos,
-                            "byte constant must be ASCII. \
-                             Use a \\xHH escape for a non-ASCII byte", c2);
-                    }
-                }
+                c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
                 if !self_.curr_is('\'') {
                     // Byte offsetting here is okay because the
                     // character before position `start` are an
@@ -936,46 +924,11 @@ impl<'a> StringReader<'a> {
                                          "unterminated double quote byte string");
                     }
 
+                    let ch_start = self_.last_pos;
                     let ch = self_.curr.unwrap();
                     self_.bump();
-                    match ch {
-                      '\\' => {
-                        if self_.is_eof() {
-                            self_.fatal_span(start, self_.last_pos,
-                                             "unterminated double quote byte string");
-                        }
-
-                        let escaped = self_.curr.unwrap();
-                        let escaped_pos = self_.last_pos;
-                        self_.bump();
-                        match escaped {
-                          'n' => value.push('\n' as u8),
-                          'r' => value.push('\r' as u8),
-                          't' => value.push('\t' as u8),
-                          '\\' => value.push('\\' as u8),
-                          '\'' => value.push('\'' as u8),
-                          '"' => value.push('"' as u8),
-                          '\n' => self_.consume_whitespace(),
-                          '0' => value.push(0),
-                          'x' => {
-                            value.push(self_.scan_numeric_escape(2u, '"') as u8);
-                          }
-                          c2 => {
-                            self_.err_span_char(escaped_pos, self_.last_pos,
-                                                "unknown byte string escape", c2);
-                          }
-                        }
-                      }
-                      _ => {
-                        if ch <= '\x7F' {
-                            value.push(ch as u8)
-                        } else {
-                            self_.err_span_char(self_.last_pos, self_.last_pos,
-                                "byte string must be ASCII. \
-                                 Use a \\xHH escape for a non-ASCII byte", ch);
-                        }
-                      }
-                    }
+                    self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
+                        .map(|ch| value.push(ch as u8));
                 }
                 self_.bump();
                 return token::LIT_BINARY(Rc::new(value));
@@ -1039,46 +992,11 @@ impl<'a> StringReader<'a> {
                     self.fatal_span(start_bpos, last_bpos, "unterminated double quote string");
                 }
 
+                let ch_start = self.last_pos;
                 let ch = self.curr.unwrap();
                 self.bump();
-                match ch {
-                  '\\' => {
-                    if self.is_eof() {
-                        let last_bpos = self.last_pos;
-                        self.fatal_span(start_bpos, last_bpos,
-                               "unterminated double quote string");
-                    }
-
-                    let escaped = self.curr.unwrap();
-                    let escaped_pos = self.last_pos;
-                    self.bump();
-                    match escaped {
-                      'n' => accum_str.push_char('\n'),
-                      'r' => accum_str.push_char('\r'),
-                      't' => accum_str.push_char('\t'),
-                      '\\' => accum_str.push_char('\\'),
-                      '\'' => accum_str.push_char('\''),
-                      '"' => accum_str.push_char('"'),
-                      '\n' => self.consume_whitespace(),
-                      '0' => accum_str.push_char('\x00'),
-                      'x' => {
-                        accum_str.push_char(self.scan_numeric_escape(2u, '"'));
-                      }
-                      'u' => {
-                        accum_str.push_char(self.scan_numeric_escape(4u, '"'));
-                      }
-                      'U' => {
-                        accum_str.push_char(self.scan_numeric_escape(8u, '"'));
-                      }
-                      c2 => {
-                        let last_bpos = self.last_pos;
-                        self.err_span_char(escaped_pos, last_bpos,
-                                        "unknown string escape", c2);
-                      }
-                    }
-                  }
-                  _ => accum_str.push_char(ch)
-                }
+                self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
+                    .map(|ch| accum_str.push_char(ch));
             }
             self.bump();
             return token::LIT_STR(str_to_ident(accum_str.as_slice()));

From 8de2618182f14bb2245e8e89f171aaf9b2f29690 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Wed, 18 Jun 2014 00:06:26 +0200
Subject: [PATCH 6/7] Fix some violations of stronger guarantees for mutable
 borrows.

See 159e27aebb940926ccf1bad0b2b12087d36ad903
---
 src/libsyntax/parse/lexer/mod.rs | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index f67b77d64dd..9039f346edb 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -906,8 +906,9 @@ impl<'a> StringReader<'a> {
                     // Byte offsetting here is okay because the
                     // character before position `start` are an
                     // ascii single quote and ascii 'b'.
+                    let last_pos = self_.last_pos;
                     self_.fatal_span_verbose(
-                        start - BytePos(2), self_.last_pos,
+                        start - BytePos(2), last_pos,
                         "unterminated byte constant".to_string());
                 }
                 self_.bump(); // advance curr past token
@@ -920,7 +921,8 @@ impl<'a> StringReader<'a> {
                 let mut value = Vec::new();
                 while !self_.curr_is('"') {
                     if self_.is_eof() {
-                        self_.fatal_span(start, self_.last_pos,
+                        let last_pos = self_.last_pos;
+                        self_.fatal_span(start, last_pos,
                                          "unterminated double quote byte string");
                     }
 
@@ -944,20 +946,25 @@ impl<'a> StringReader<'a> {
                 }
 
                 if self_.is_eof() {
-                    self_.fatal_span(start_bpos, self_.last_pos, "unterminated raw string");
+                    let last_pos = self_.last_pos;
+                    self_.fatal_span(start_bpos, last_pos, "unterminated raw string");
                 } else if !self_.curr_is('"') {
-                    self_.fatal_span_char(start_bpos, self_.last_pos,
+                    let last_pos = self_.last_pos;
+                    let ch = self_.curr.unwrap();
+                    self_.fatal_span_char(start_bpos, last_pos,
                                     "only `#` is allowed in raw string delimitation; \
                                      found illegal character",
-                                    self_.curr.unwrap());
+                                    ch);
                 }
                 self_.bump();
                 let content_start_bpos = self_.last_pos;
                 let mut content_end_bpos;
                 'outer: loop {
                     match self_.curr {
-                        None => self_.fatal_span(start_bpos, self_.last_pos,
-                                                 "unterminated raw string"),
+                        None => {
+                            let last_pos = self_.last_pos;
+                            self_.fatal_span(start_bpos, last_pos, "unterminated raw string")
+                        },
                         Some('"') => {
                             content_end_bpos = self_.last_pos;
                             for _ in range(0, hash_count) {
@@ -969,8 +976,9 @@ impl<'a> StringReader<'a> {
                             break;
                         },
                         Some(c) => if c > '\x7F' {
-                            self_.err_span_char(self_.last_pos, self_.last_pos,
-                                                "raw byte string must be ASCII", c);
+                            let last_pos = self_.last_pos;
+                            self_.err_span_char(
+                                last_pos, last_pos, "raw byte string must be ASCII", c);
                         }
                     }
                     self_.bump();

From 3744d828513092d1ed64c4c6f8cd2536f7a5ff0d Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Wed, 18 Jun 2014 00:40:57 +0200
Subject: [PATCH 7/7] Fix expected error message in a test.

The change is a result of the char/string parsing refactor.
---
 src/test/compile-fail/lex-unknown-str-escape.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/compile-fail/lex-unknown-str-escape.rs b/src/test/compile-fail/lex-unknown-str-escape.rs
index f7809b02b0b..9a59c422711 100644
--- a/src/test/compile-fail/lex-unknown-str-escape.rs
+++ b/src/test/compile-fail/lex-unknown-str-escape.rs
@@ -9,5 +9,5 @@
 // except according to those terms.
 
 static s: &'static str =
-    "\●" //~ ERROR: unknown string escape
+    "\●" //~ ERROR: unknown character escape
 ;