Refactor backslash-escape parsing to share similar code.
Move into a new syntax::parse::lexer::StringReader method the code that was almost duplicated for parsing backslash-escapes in byte, byte string, char, and string literals.
This commit is contained in:
parent
3a52a8a8b8
commit
612bbaf7a0
@ -636,6 +636,67 @@ fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char {
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan for a single (possibly escaped) byte or char
|
||||
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
||||
/// `start` is the position of `first_source_char`, which is already consumed.
|
||||
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
ascii_only: bool, delim: char) -> Option<char> {
|
||||
match first_source_char {
|
||||
'\\' => {
|
||||
// '\X' for some X must be a character constant:
|
||||
let escaped = self.curr;
|
||||
let escaped_pos = self.last_pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
None => {}, // EOF here is an error that will be checked later.
|
||||
Some(e) => {
|
||||
return Some(match e {
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'"' => '"',
|
||||
'0' => '\x00',
|
||||
'x' => self.scan_numeric_escape(2u, delim),
|
||||
'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
|
||||
'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
|
||||
'\n' if delim == '"' => {
|
||||
self.consume_whitespace();
|
||||
return None
|
||||
},
|
||||
c => {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
escaped_pos, last_pos,
|
||||
if ascii_only { "unknown byte escape" }
|
||||
else { "unknown character escape" },
|
||||
c);
|
||||
c
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
start, last_pos,
|
||||
if ascii_only { "byte constant must be escaped" }
|
||||
else { "character constant must be escaped" },
|
||||
first_source_char);
|
||||
}
|
||||
_ => if ascii_only && first_source_char > '\x7F' {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
start, last_pos,
|
||||
"byte constant must be ASCII. \
|
||||
Use a \\xHH escape for a non-ASCII byte", first_source_char);
|
||||
}
|
||||
}
|
||||
Some(first_source_char)
|
||||
}
|
||||
|
||||
fn binop(&mut self, op: token::BinOp) -> token::Token {
|
||||
self.bump();
|
||||
if self.curr_is('=') {
|
||||
@ -810,43 +871,7 @@ fn next_token_inner(&mut self) -> token::Token {
|
||||
}
|
||||
|
||||
// Otherwise it is a character constant:
|
||||
match c2 {
|
||||
'\\' => {
|
||||
// '\X' for some X must be a character constant:
|
||||
let escaped = self.curr;
|
||||
let escaped_pos = self.last_pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
None => {}
|
||||
Some(e) => {
|
||||
c2 = match e {
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'"' => '"',
|
||||
'0' => '\x00',
|
||||
'x' => self.scan_numeric_escape(2u, '\''),
|
||||
'u' => self.scan_numeric_escape(4u, '\''),
|
||||
'U' => self.scan_numeric_escape(8u, '\''),
|
||||
c2 => {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_char(escaped_pos, last_bpos,
|
||||
"unknown character escape", c2);
|
||||
c2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'\t' | '\n' | '\r' | '\'' => {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_char( start, last_bpos,
|
||||
"character constant must be escaped", c2);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
|
||||
if !self.curr_is('\'') {
|
||||
let last_bpos = self.last_pos;
|
||||
self.fatal_span_verbose(
|
||||
@ -876,44 +901,7 @@ fn parse_byte(self_: &mut StringReader) -> token::Token {
|
||||
let mut c2 = self_.curr.unwrap_or('\x00');
|
||||
self_.bump();
|
||||
|
||||
match c2 {
|
||||
'\\' => {
|
||||
// '\X' for some X must be a character constant:
|
||||
let escaped = self_.curr;
|
||||
let escaped_pos = self_.last_pos;
|
||||
self_.bump();
|
||||
match escaped {
|
||||
None => {}
|
||||
Some(e) => {
|
||||
c2 = match e {
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'"' => '"',
|
||||
'0' => '\x00',
|
||||
'x' => self_.scan_numeric_escape(2u, '\''),
|
||||
c2 => {
|
||||
self_.err_span_char(
|
||||
escaped_pos, self_.last_pos,
|
||||
"unknown byte escape", c2);
|
||||
c2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'\t' | '\n' | '\r' | '\'' => {
|
||||
self_.err_span_char( start, self_.last_pos,
|
||||
"byte constant must be escaped", c2);
|
||||
}
|
||||
_ => if c2 > '\x7F' {
|
||||
self_.err_span_char( start, self_.last_pos,
|
||||
"byte constant must be ASCII. \
|
||||
Use a \\xHH escape for a non-ASCII byte", c2);
|
||||
}
|
||||
}
|
||||
c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
|
||||
if !self_.curr_is('\'') {
|
||||
// Byte offsetting here is okay because the
|
||||
// character before position `start` are an
|
||||
@ -936,46 +924,11 @@ fn parse_byte_string(self_: &mut StringReader) -> token::Token {
|
||||
"unterminated double quote byte string");
|
||||
}
|
||||
|
||||
let ch_start = self_.last_pos;
|
||||
let ch = self_.curr.unwrap();
|
||||
self_.bump();
|
||||
match ch {
|
||||
'\\' => {
|
||||
if self_.is_eof() {
|
||||
self_.fatal_span(start, self_.last_pos,
|
||||
"unterminated double quote byte string");
|
||||
}
|
||||
|
||||
let escaped = self_.curr.unwrap();
|
||||
let escaped_pos = self_.last_pos;
|
||||
self_.bump();
|
||||
match escaped {
|
||||
'n' => value.push('\n' as u8),
|
||||
'r' => value.push('\r' as u8),
|
||||
't' => value.push('\t' as u8),
|
||||
'\\' => value.push('\\' as u8),
|
||||
'\'' => value.push('\'' as u8),
|
||||
'"' => value.push('"' as u8),
|
||||
'\n' => self_.consume_whitespace(),
|
||||
'0' => value.push(0),
|
||||
'x' => {
|
||||
value.push(self_.scan_numeric_escape(2u, '"') as u8);
|
||||
}
|
||||
c2 => {
|
||||
self_.err_span_char(escaped_pos, self_.last_pos,
|
||||
"unknown byte string escape", c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if ch <= '\x7F' {
|
||||
value.push(ch as u8)
|
||||
} else {
|
||||
self_.err_span_char(self_.last_pos, self_.last_pos,
|
||||
"byte string must be ASCII. \
|
||||
Use a \\xHH escape for a non-ASCII byte", ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
|
||||
.map(|ch| value.push(ch as u8));
|
||||
}
|
||||
self_.bump();
|
||||
return token::LIT_BINARY(Rc::new(value));
|
||||
@ -1039,46 +992,11 @@ fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token {
|
||||
self.fatal_span(start_bpos, last_bpos, "unterminated double quote string");
|
||||
}
|
||||
|
||||
let ch_start = self.last_pos;
|
||||
let ch = self.curr.unwrap();
|
||||
self.bump();
|
||||
match ch {
|
||||
'\\' => {
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.last_pos;
|
||||
self.fatal_span(start_bpos, last_bpos,
|
||||
"unterminated double quote string");
|
||||
}
|
||||
|
||||
let escaped = self.curr.unwrap();
|
||||
let escaped_pos = self.last_pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
'n' => accum_str.push_char('\n'),
|
||||
'r' => accum_str.push_char('\r'),
|
||||
't' => accum_str.push_char('\t'),
|
||||
'\\' => accum_str.push_char('\\'),
|
||||
'\'' => accum_str.push_char('\''),
|
||||
'"' => accum_str.push_char('"'),
|
||||
'\n' => self.consume_whitespace(),
|
||||
'0' => accum_str.push_char('\x00'),
|
||||
'x' => {
|
||||
accum_str.push_char(self.scan_numeric_escape(2u, '"'));
|
||||
}
|
||||
'u' => {
|
||||
accum_str.push_char(self.scan_numeric_escape(4u, '"'));
|
||||
}
|
||||
'U' => {
|
||||
accum_str.push_char(self.scan_numeric_escape(8u, '"'));
|
||||
}
|
||||
c2 => {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_char(escaped_pos, last_bpos,
|
||||
"unknown string escape", c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => accum_str.push_char(ch)
|
||||
}
|
||||
self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
|
||||
.map(|ch| accum_str.push_char(ch));
|
||||
}
|
||||
self.bump();
|
||||
return token::LIT_STR(str_to_ident(accum_str.as_slice()));
|
||||
|
Loading…
Reference in New Issue
Block a user