Auto merge of #36485 - nnethercote:char_lit-2, r=nagisa
Overhaul char_lit() This commit does the following. - Removes parsing support for '\X12', '\u123456' and '\U12345678' char literals. These are no longer valid Rust and rejected by the lexer. (This strange-sounding situation occurs because the parser rescans char literals to compute their value.) - Rearranges the function so that all the escaped values are handled in a single `match`. The error-handling strategy is based on the one used by byte_lit().
This commit is contained in:
commit
cde61ba7f7
@ -286,52 +286,37 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess,
|
|||||||
pub fn char_lit(lit: &str) -> (char, isize) {
|
pub fn char_lit(lit: &str) -> (char, isize) {
|
||||||
use std::char;
|
use std::char;
|
||||||
|
|
||||||
let mut chars = lit.chars();
|
// Handle non-escaped chars first.
|
||||||
match (chars.next(), chars.next()) {
|
if lit.as_bytes()[0] != b'\\' {
|
||||||
(Some(c), None) if c != '\\' => return (c, 1),
|
// If the first byte isn't '\\' it might part of a multi-byte char, so
|
||||||
(Some('\\'), Some(c)) => match c {
|
// get the char with chars().
|
||||||
'"' => return ('"', 2),
|
let c = lit.chars().next().unwrap();
|
||||||
'n' => return ('\n', 2),
|
return (c, 1);
|
||||||
'r' => return ('\r', 2),
|
|
||||||
't' => return ('\t', 2),
|
|
||||||
'\\' => return ('\\', 2),
|
|
||||||
'\'' => return ('\'', 2),
|
|
||||||
'0' => return ('\0', 2),
|
|
||||||
_ => {}
|
|
||||||
},
|
|
||||||
_ => panic!("lexer accepted invalid char escape `{}`", lit)
|
|
||||||
};
|
|
||||||
|
|
||||||
fn esc(len: usize, lit: &str) -> Option<(char, isize)> {
|
|
||||||
u32::from_str_radix(&lit[2..len], 16).ok()
|
|
||||||
.and_then(char::from_u32)
|
|
||||||
.map(|x| (x, len as isize))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let unicode_escape = || -> Option<(char, isize)> {
|
// Handle escaped chars.
|
||||||
if lit.as_bytes()[2] == b'{' {
|
match lit.as_bytes()[1] as char {
|
||||||
let idx = lit.find('}').unwrap_or_else(|| {
|
'"' => ('"', 2),
|
||||||
panic!("lexer should have rejected a bad character escape {}", lit)
|
'n' => ('\n', 2),
|
||||||
});
|
'r' => ('\r', 2),
|
||||||
|
't' => ('\t', 2),
|
||||||
let subslice = &lit[3..idx];
|
'\\' => ('\\', 2),
|
||||||
u32::from_str_radix(subslice, 16).ok()
|
'\'' => ('\'', 2),
|
||||||
.and_then(char::from_u32)
|
'0' => ('\0', 2),
|
||||||
.map(|x| (x, subslice.chars().count() as isize + 4))
|
'x' => {
|
||||||
} else {
|
let v = u32::from_str_radix(&lit[2..4], 16).unwrap();
|
||||||
esc(6, lit)
|
let c = char::from_u32(v).unwrap();
|
||||||
|
(c, 4)
|
||||||
}
|
}
|
||||||
};
|
'u' => {
|
||||||
|
assert!(lit.as_bytes()[2] == b'{');
|
||||||
// Unicode escapes
|
let idx = lit.find('}').unwrap();
|
||||||
return match lit.as_bytes()[1] as char {
|
let v = u32::from_str_radix(&lit[3..idx], 16).unwrap();
|
||||||
'x' | 'X' => esc(4, lit),
|
let c = char::from_u32(v).unwrap();
|
||||||
'u' => unicode_escape(),
|
(c, (idx + 1) as isize)
|
||||||
'U' => esc(10, lit),
|
}
|
||||||
_ => None,
|
_ => panic!("lexer should have rejected a bad character escape {}", lit)
|
||||||
}.unwrap_or_else(|| {
|
}
|
||||||
panic!("lexer should have rejected a bad character escape {}", lit)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a string representing a string literal into its final form. Does
|
/// Parse a string representing a string literal into its final form. Does
|
||||||
|
Loading…
x
Reference in New Issue
Block a user