Auto merge of #36485 - nnethercote:char_lit-2, r=nagisa

Overhaul char_lit()

This commit does the following.

- Removes parsing support for '\X12', '\u123456' and '\U12345678' char
  literals. These are no longer valid Rust and rejected by the lexer.
  (This strange-sounding situation occurs because the parser rescans
  char literals to compute their value.)

- Rearranges the function so that all the escaped values are handled in
  a single `match`. The error-handling strategy is based on the one used
  by byte_lit().
This commit is contained in:
bors 2016-09-17 02:51:13 -07:00 committed by GitHub
commit cde61ba7f7

View File

@ -286,52 +286,37 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess,
pub fn char_lit(lit: &str) -> (char, isize) { pub fn char_lit(lit: &str) -> (char, isize) {
use std::char; use std::char;
let mut chars = lit.chars(); // Handle non-escaped chars first.
match (chars.next(), chars.next()) { if lit.as_bytes()[0] != b'\\' {
(Some(c), None) if c != '\\' => return (c, 1), // If the first byte isn't '\\' it might part of a multi-byte char, so
(Some('\\'), Some(c)) => match c { // get the char with chars().
'"' => return ('"', 2), let c = lit.chars().next().unwrap();
'n' => return ('\n', 2), return (c, 1);
'r' => return ('\r', 2),
't' => return ('\t', 2),
'\\' => return ('\\', 2),
'\'' => return ('\'', 2),
'0' => return ('\0', 2),
_ => {}
},
_ => panic!("lexer accepted invalid char escape `{}`", lit)
};
fn esc(len: usize, lit: &str) -> Option<(char, isize)> {
u32::from_str_radix(&lit[2..len], 16).ok()
.and_then(char::from_u32)
.map(|x| (x, len as isize))
} }
let unicode_escape = || -> Option<(char, isize)> { // Handle escaped chars.
if lit.as_bytes()[2] == b'{' { match lit.as_bytes()[1] as char {
let idx = lit.find('}').unwrap_or_else(|| { '"' => ('"', 2),
panic!("lexer should have rejected a bad character escape {}", lit) 'n' => ('\n', 2),
}); 'r' => ('\r', 2),
't' => ('\t', 2),
let subslice = &lit[3..idx]; '\\' => ('\\', 2),
u32::from_str_radix(subslice, 16).ok() '\'' => ('\'', 2),
.and_then(char::from_u32) '0' => ('\0', 2),
.map(|x| (x, subslice.chars().count() as isize + 4)) 'x' => {
} else { let v = u32::from_str_radix(&lit[2..4], 16).unwrap();
esc(6, lit) let c = char::from_u32(v).unwrap();
(c, 4)
} }
}; 'u' => {
assert!(lit.as_bytes()[2] == b'{');
// Unicode escapes let idx = lit.find('}').unwrap();
return match lit.as_bytes()[1] as char { let v = u32::from_str_radix(&lit[3..idx], 16).unwrap();
'x' | 'X' => esc(4, lit), let c = char::from_u32(v).unwrap();
'u' => unicode_escape(), (c, (idx + 1) as isize)
'U' => esc(10, lit), }
_ => None, _ => panic!("lexer should have rejected a bad character escape {}", lit)
}.unwrap_or_else(|| { }
panic!("lexer should have rejected a bad character escape {}", lit)
})
} }
/// Parse a string representing a string literal into its final form. Does /// Parse a string representing a string literal into its final form. Does