1220 lines
45 KiB
Rust
Raw Normal View History

// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
2012-11-28 16:20:41 -08:00
//! The main parser interface
use ast;
use codemap::{Span, CodeMap, FileMap};
use diagnostic::{SpanHandler, mk_span_handler, default_handler, Auto};
use parse::attr::ParserAttr;
use parse::parser::Parser;
2014-09-13 19:06:01 +03:00
use ptr::P;
2014-09-17 11:58:11 +12:00
use std::cell::{Cell, RefCell};
2013-11-10 22:46:32 -08:00
use std::io::File;
2014-03-16 20:56:24 +02:00
use std::rc::Rc;
use std::num::Int;
use std::str;
use std::iter;
#[cfg_attr(stage0, macro_escape)]
#[cfg_attr(not(stage0), macro_use)]
pub mod parser;
pub mod lexer;
pub mod token;
pub mod attr;
2012-11-18 17:56:50 -08:00
pub mod common;
pub mod classify;
pub mod obsolete;
2012-11-28 16:20:41 -08:00
2014-06-09 13:12:30 -07:00
/// Info about a parsing session.
pub struct ParseSess {
pub span_diagnostic: SpanHandler, // better be the same as the one in the reader!
/// Used to determine and report recursive mod inclusions
2014-03-16 20:56:24 +02:00
included_mod_stack: RefCell<Vec<Path>>,
2014-07-25 14:44:24 +12:00
pub node_id: Cell<ast::NodeId>,
}
2012-11-28 16:20:41 -08:00
2014-03-09 16:54:34 +02:00
pub fn new_parse_sess() -> ParseSess {
ParseSess {
span_diagnostic: mk_span_handler(default_handler(Auto, None), CodeMap::new()),
included_mod_stack: RefCell::new(Vec::new()),
2014-07-25 14:44:24 +12:00
node_id: Cell::new(1),
}
2012-11-28 16:20:41 -08:00
}
2014-03-16 20:56:24 +02:00
pub fn new_parse_sess_special_handler(sh: SpanHandler) -> ParseSess {
2014-03-09 16:54:34 +02:00
ParseSess {
span_diagnostic: sh,
included_mod_stack: RefCell::new(Vec::new()),
2014-07-25 14:44:24 +12:00
node_id: Cell::new(1),
}
}
impl ParseSess {
pub fn next_node_id(&self) -> ast::NodeId {
self.reserve_node_ids(1)
}
pub fn reserve_node_ids(&self, count: ast::NodeId) -> ast::NodeId {
let v = self.node_id.get();
match v.checked_add(count) {
2014-07-25 14:44:24 +12:00
Some(next) => { self.node_id.set(next); }
None => panic!("Input too large, ran out of node ids!")
2014-07-25 14:44:24 +12:00
}
v
}
2012-11-28 16:20:41 -08:00
}
2013-02-11 13:36:24 -08:00
// a bunch of utility functions of the form parse_<thing>_from_<source>
// where <thing> includes crate, expr, item, stmt, tts, and one that
// uses a HOF to parse anything, and <source> includes file and
// source_str.
pub fn parse_crate_from_file(
input: &Path,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess
2013-09-27 19:46:09 -07:00
) -> ast::Crate {
2014-02-06 02:16:44 +09:00
new_parser_from_file(sess, cfg, input).parse_crate_mod()
2013-02-11 13:36:24 -08:00
// why is there no p.abort_if_errors here?
2012-11-28 16:20:41 -08:00
}
pub fn parse_crate_attrs_from_file(
input: &Path,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess
) -> Vec<ast::Attribute> {
2013-12-30 14:04:00 -08:00
let mut parser = new_parser_from_file(sess, cfg, input);
let (inner, _) = parser.parse_inner_attrs_and_next();
2014-03-09 16:54:34 +02:00
inner
}
pub fn parse_crate_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess)
-> ast::Crate {
2013-12-30 14:04:00 -08:00
let mut p = new_parser_from_source_str(sess,
2014-02-06 02:16:44 +09:00
cfg,
2013-12-30 14:04:00 -08:00
name,
source);
2013-04-23 10:57:41 -07:00
maybe_aborted(p.parse_crate_mod(),p)
2012-11-28 16:20:41 -08:00
}
pub fn parse_crate_attrs_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess)
-> Vec<ast::Attribute> {
2013-12-30 14:04:00 -08:00
let mut p = new_parser_from_source_str(sess,
2014-02-06 02:16:44 +09:00
cfg,
2013-12-30 14:04:00 -08:00
name,
source);
let (inner, _) = maybe_aborted(p.parse_inner_attrs_and_next(),p);
2014-03-09 16:54:34 +02:00
inner
}
pub fn parse_expr_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess)
2014-09-13 19:06:01 +03:00
-> P<ast::Expr> {
2013-12-30 14:04:00 -08:00
let mut p = new_parser_from_source_str(sess, cfg, name, source);
maybe_aborted(p.parse_expr(), p)
2012-11-28 16:20:41 -08:00
}
pub fn parse_item_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess)
2014-09-13 19:06:01 +03:00
-> Option<P<ast::Item>> {
2013-12-30 14:04:00 -08:00
let mut p = new_parser_from_source_str(sess, cfg, name, source);
maybe_aborted(p.parse_item_with_outer_attributes(),p)
2012-11-28 16:20:41 -08:00
}
pub fn parse_meta_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess)
2014-09-13 19:06:01 +03:00
-> P<ast::MetaItem> {
2013-12-30 14:04:00 -08:00
let mut p = new_parser_from_source_str(sess, cfg, name, source);
maybe_aborted(p.parse_meta_item(),p)
}
pub fn parse_stmt_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
attrs: Vec<ast::Attribute> ,
2014-03-09 16:54:34 +02:00
sess: &ParseSess)
2014-09-13 19:06:01 +03:00
-> P<ast::Stmt> {
2013-12-30 14:04:00 -08:00
let mut p = new_parser_from_source_str(
sess,
cfg,
name,
source
);
maybe_aborted(p.parse_stmt(attrs),p)
2012-11-28 16:20:41 -08:00
}
// Note: keep in sync with `with_hygiene::parse_tts_from_source_str`
// until #16472 is resolved.
//
// Warning: This parses with quote_depth > 0, which is not the default.
pub fn parse_tts_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
2014-03-09 16:54:34 +02:00
sess: &ParseSess)
-> Vec<ast::TokenTree> {
2013-12-30 14:04:00 -08:00
let mut p = new_parser_from_source_str(
sess,
cfg,
name,
source
);
p.quote_depth += 1u;
2013-04-23 10:57:41 -07:00
// right now this is re-creating the token trees from ... token trees.
maybe_aborted(p.parse_all_token_trees(),p)
2012-11-28 16:20:41 -08:00
}
// Note: keep in sync with `with_hygiene::new_parser_from_source_str`
// until #16472 is resolved.
2013-04-23 10:57:41 -07:00
// Create a new parser from a source string
2014-03-09 16:54:34 +02:00
pub fn new_parser_from_source_str<'a>(sess: &'a ParseSess,
2014-03-16 20:56:24 +02:00
cfg: ast::CrateConfig,
name: String,
source: String)
2014-03-16 20:56:24 +02:00
-> Parser<'a> {
filemap_to_parser(sess, string_to_filemap(sess, source, name), cfg)
2012-11-28 16:20:41 -08:00
}
/// Create a new parser, handling errors as appropriate
2012-11-28 16:20:41 -08:00
/// if the file doesn't exist
2014-03-16 20:56:24 +02:00
pub fn new_parser_from_file<'a>(sess: &'a ParseSess,
cfg: ast::CrateConfig,
path: &Path) -> Parser<'a> {
filemap_to_parser(sess, file_to_filemap(sess, path, None), cfg)
2012-11-28 16:20:41 -08:00
}
2013-04-23 10:57:41 -07:00
/// Given a session, a crate config, a path, and a span, add
/// the file at the given path to the codemap, and return a parser.
/// On an error, use the given span as the source of the problem.
2014-03-16 20:56:24 +02:00
pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess,
cfg: ast::CrateConfig,
path: &Path,
owns_directory: bool,
module_name: Option<String>,
2014-03-16 20:56:24 +02:00
sp: Span) -> Parser<'a> {
let mut p = filemap_to_parser(sess, file_to_filemap(sess, path, Some(sp)), cfg);
p.owns_directory = owns_directory;
p.root_module_name = module_name;
p
2013-04-23 10:57:41 -07:00
}
// Note: keep this in sync with `with_hygiene::filemap_to_parser` until
// #16472 is resolved.
2013-04-23 10:57:41 -07:00
/// Given a filemap and config, return a parser
2014-03-09 16:54:34 +02:00
pub fn filemap_to_parser<'a>(sess: &'a ParseSess,
2014-03-16 20:56:24 +02:00
filemap: Rc<FileMap>,
2014-03-09 16:54:34 +02:00
cfg: ast::CrateConfig) -> Parser<'a> {
2014-03-16 20:56:24 +02:00
tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg)
2013-04-23 10:57:41 -07:00
}
// must preserve old name for now, because quote! from the *existing*
// compiler expands into it
2014-03-09 16:54:34 +02:00
pub fn new_parser_from_tts<'a>(sess: &'a ParseSess,
cfg: ast::CrateConfig,
tts: Vec<ast::TokenTree>) -> Parser<'a> {
2014-03-16 20:56:24 +02:00
tts_to_parser(sess, tts, cfg)
2013-04-23 10:57:41 -07:00
}
// base abstractions
/// Given a session and a path and an optional span (for error reporting),
/// add the path to the session's codemap and return the new filemap.
2014-03-09 16:54:34 +02:00
pub fn file_to_filemap(sess: &ParseSess, path: &Path, spanopt: Option<Span>)
2014-03-16 20:56:24 +02:00
-> Rc<FileMap> {
let err = |&: msg: &str| {
match spanopt {
Some(sp) => sess.span_diagnostic.span_fatal(sp, msg),
None => sess.span_diagnostic.handler().fatal(msg),
}
};
2014-01-29 17:39:21 -08:00
let bytes = match File::open(path).read_to_end() {
Ok(bytes) => bytes,
Err(e) => {
err(format!("couldn't read {}: {}",
path.display(),
2014-12-10 19:46:38 -08:00
e)[]);
unreachable!()
}
};
2014-12-10 19:46:38 -08:00
match str::from_utf8(bytes[]).ok() {
Some(s) => {
return string_to_filemap(sess, s.to_string(),
path.as_str().unwrap().to_string())
2012-11-28 16:20:41 -08:00
}
None => {
2014-12-10 19:46:38 -08:00
err(format!("{} is not UTF-8 encoded", path.display())[])
}
2012-11-28 16:20:41 -08:00
}
unreachable!()
2012-11-28 16:20:41 -08:00
}
2014-06-09 13:12:30 -07:00
/// Given a session and a string, add the string to
/// the session's codemap and return the new filemap
pub fn string_to_filemap(sess: &ParseSess, source: String, path: String)
2014-03-16 20:56:24 +02:00
-> Rc<FileMap> {
sess.span_diagnostic.cm.new_filemap(path, source)
2013-04-23 10:57:41 -07:00
}
// Note: keep this in sync with `with_hygiene::filemap_to_tts` (apart
// from the StringReader constructor), until #16472 is resolved.
2014-06-09 13:12:30 -07:00
/// Given a filemap, produce a sequence of token-trees
2014-03-16 20:56:24 +02:00
pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
-> Vec<ast::TokenTree> {
2013-04-23 10:57:41 -07:00
// it appears to me that the cfg doesn't matter here... indeed,
// parsing tt's probably shouldn't require a parser at all.
let cfg = Vec::new();
2014-05-21 16:57:31 -07:00
let srdr = lexer::StringReader::new(&sess.span_diagnostic, filemap);
let mut p1 = Parser::new(sess, cfg, box srdr);
2013-04-23 10:57:41 -07:00
p1.parse_all_token_trees()
}
2014-06-09 13:12:30 -07:00
/// Given tts and cfg, produce a parser
2014-03-09 16:54:34 +02:00
pub fn tts_to_parser<'a>(sess: &'a ParseSess,
tts: Vec<ast::TokenTree>,
cfg: ast::CrateConfig) -> Parser<'a> {
2014-09-15 18:27:28 -07:00
let trdr = lexer::new_tt_reader(&sess.span_diagnostic, None, None, tts);
Parser::new(sess, cfg, box trdr)
2012-11-28 16:20:41 -08:00
}
2013-01-30 09:56:33 -08:00
// FIXME (Issue #16472): The `with_hygiene` mod should go away after
// ToToken impls are revised to go directly to token-trees.
pub mod with_hygiene {
use ast;
use codemap::FileMap;
use parse::parser::Parser;
use std::rc::Rc;
use super::ParseSess;
use super::{maybe_aborted, string_to_filemap, tts_to_parser};
// Note: keep this in sync with `super::parse_tts_from_source_str` until
// #16472 is resolved.
//
// Warning: This parses with quote_depth > 0, which is not the default.
pub fn parse_tts_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
sess: &ParseSess) -> Vec<ast::TokenTree> {
let mut p = new_parser_from_source_str(
sess,
cfg,
name,
source
);
p.quote_depth += 1u;
// right now this is re-creating the token trees from ... token trees.
maybe_aborted(p.parse_all_token_trees(),p)
}
// Note: keep this in sync with `super::new_parser_from_source_str` until
// #16472 is resolved.
// Create a new parser from a source string
fn new_parser_from_source_str<'a>(sess: &'a ParseSess,
cfg: ast::CrateConfig,
name: String,
source: String) -> Parser<'a> {
filemap_to_parser(sess, string_to_filemap(sess, source, name), cfg)
}
// Note: keep this in sync with `super::filemap_to_parserr` until
// #16472 is resolved.
/// Given a filemap and config, return a parser
fn filemap_to_parser<'a>(sess: &'a ParseSess,
filemap: Rc<FileMap>,
cfg: ast::CrateConfig) -> Parser<'a> {
tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg)
}
// Note: keep this in sync with `super::filemap_to_tts` until
// #16472 is resolved.
/// Given a filemap, produce a sequence of token-trees
fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
-> Vec<ast::TokenTree> {
// it appears to me that the cfg doesn't matter here... indeed,
// parsing tt's probably shouldn't require a parser at all.
use super::lexer::make_reader_with_embedded_idents as make_reader;
let cfg = Vec::new();
let srdr = make_reader(&sess.span_diagnostic, filemap);
let mut p1 = Parser::new(sess, cfg, box srdr);
p1.parse_all_token_trees()
}
}
2014-06-09 13:12:30 -07:00
/// Abort if necessary
2013-12-30 14:04:00 -08:00
pub fn maybe_aborted<T>(result: T, mut p: Parser) -> T {
p.abort_if_errors();
result
}
/// Parse a string representing a character literal into its final form.
/// Rather than just accepting/rejecting a given literal, unescapes it as
/// well. Can take any slice prefixed by a character escape. Returns the
/// character and the number of characters consumed.
pub fn char_lit(lit: &str) -> (char, int) {
use std::{num, char};
let mut chars = lit.chars();
let c = match (chars.next(), chars.next()) {
(Some(c), None) if c != '\\' => return (c, 1),
(Some('\\'), Some(c)) => match c {
'"' => Some('"'),
'n' => Some('\n'),
'r' => Some('\r'),
't' => Some('\t'),
'\\' => Some('\\'),
'\'' => Some('\''),
'0' => Some('\0'),
_ => { None }
},
_ => panic!("lexer accepted invalid char escape `{}`", lit)
};
match c {
Some(x) => return (x, 2),
None => { }
}
let msg = format!("lexer should have rejected a bad character escape {}", lit);
2014-12-10 19:46:38 -08:00
let msg2 = msg[];
fn esc(len: uint, lit: &str) -> Option<(char, int)> {
2014-12-10 19:46:38 -08:00
num::from_str_radix(lit[2..len], 16)
.and_then(char::from_u32)
.map(|x| (x, len as int))
}
let unicode_escape = |&: | -> Option<(char, int)>
if lit.as_bytes()[2] == b'{' {
let idx = lit.find('}').expect(msg2);
2014-12-10 19:46:38 -08:00
let subslice = lit[3..idx];
num::from_str_radix(subslice, 16)
.and_then(char::from_u32)
2014-12-10 19:46:38 -08:00
.map(|x| (x, subslice.chars().count() as int + 4))
} else {
esc(6, lit)
};
// Unicode escapes
return match lit.as_bytes()[1] as char {
'x' | 'X' => esc(4, lit),
'u' => unicode_escape(),
'U' => esc(10, lit),
_ => None,
}.expect(msg2);
}
/// Parse a string representing a string literal into its final form. Does
/// unescaping.
pub fn str_lit(lit: &str) -> String {
debug!("parse_str_lit: given {}", lit.escape_default());
let mut res = String::with_capacity(lit.len());
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
let error = |&: i| format!("lexer should have rejected {} at {}", lit, i);
/// Eat everything up to a non-whitespace
2014-12-10 19:46:38 -08:00
fn eat<'a>(it: &mut iter::Peekable<(uint, char), str::CharIndices<'a>>) {
loop {
2014-12-09 12:17:24 -05:00
match it.peek().map(|x| x.1) {
Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
it.next();
},
_ => { break; }
}
}
}
let mut chars = lit.char_indices().peekable();
loop {
match chars.next() {
Some((i, c)) => {
match c {
'\\' => {
let ch = chars.peek().unwrap_or_else(|| {
panic!("{}", error(i).as_slice())
2014-12-09 12:17:24 -05:00
}).1;
if ch == '\n' {
eat(&mut chars);
} else if ch == '\r' {
chars.next();
let ch = chars.peek().unwrap_or_else(|| {
panic!("{}", error(i).as_slice())
2014-12-09 12:17:24 -05:00
}).1;
if ch != '\n' {
panic!("lexer accepted bare CR");
}
eat(&mut chars);
} else {
// otherwise, a normal escape
2014-12-10 19:46:38 -08:00
let (c, n) = char_lit(lit[i..]);
for _ in range(0, n - 1) { // we don't need to move past the first \
chars.next();
}
res.push(c);
}
},
'\r' => {
let ch = chars.peek().unwrap_or_else(|| {
panic!("{}", error(i).as_slice())
2014-12-09 12:17:24 -05:00
}).1;
if ch != '\n' {
panic!("lexer accepted bare CR");
}
chars.next();
res.push('\n');
}
c => res.push(c),
}
},
None => break
}
}
res.shrink_to_fit(); // probably not going to do anything, unless there was an escape.
debug!("parse_str_lit: returning {}", res);
res
}
/// Parse a string representing a raw string literal into its final form. The
/// only operation this does is convert embedded CRLF into a single LF.
pub fn raw_str_lit(lit: &str) -> String {
debug!("raw_str_lit: given {}", lit.escape_default());
let mut res = String::with_capacity(lit.len());
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
let mut chars = lit.chars().peekable();
loop {
match chars.next() {
Some(c) => {
if c == '\r' {
if *chars.peek().unwrap() != '\n' {
panic!("lexer accepted bare CR");
}
chars.next();
res.push('\n');
} else {
res.push(c);
}
},
None => break
}
}
res.shrink_to_fit();
res
}
// check if `s` looks like i32 or u1234 etc.
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
s.len() > 1 &&
first_chars.contains(&s.char_at(0)) &&
2014-12-10 19:46:38 -08:00
s[1..].chars().all(|c| '0' <= c && c <= '9')
}
fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>,
sd: &SpanHandler, sp: Span) -> ast::Lit_ {
debug!("filtered_float_lit: {}, {}", data, suffix);
match suffix {
Some("f32") => ast::LitFloat(data, ast::TyF32),
Some("f64") => ast::LitFloat(data, ast::TyF64),
Some(suf) => {
if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) {
// if it looks like a width, lets try to be helpful.
sd.span_err(sp, &*format!("illegal width `{}` for float literal, \
2014-12-10 19:46:38 -08:00
valid widths are 32 and 64", suf[1..]));
} else {
sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \
valid suffixes are `f32` and `f64`", suf));
}
ast::LitFloatUnsuffixed(data)
}
None => ast::LitFloatUnsuffixed(data)
}
}
pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
debug!("float_lit: {}, {}", s, suffix);
// FIXME #2252: bounds checking float literals is defered until trans
let s = s.chars().filter(|&c| c != '_').collect::<String>();
let data = token::intern_and_get_ident(&*s);
filtered_float_lit(data, suffix, sd, sp)
}
/// Parse a string representing a byte literal into its final form. Similar to `char_lit`
pub fn byte_lit(lit: &str) -> (u8, uint) {
let err = |&: i| format!("lexer accepted invalid byte literal {} step {}", lit, i);
if lit.len() == 1 {
(lit.as_bytes()[0], 1)
} else {
assert!(lit.as_bytes()[0] == b'\\', err(0i));
let b = match lit.as_bytes()[1] {
b'"' => b'"',
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
b'\\' => b'\\',
b'\'' => b'\'',
b'0' => b'\0',
_ => {
2014-12-10 19:46:38 -08:00
match ::std::num::from_str_radix::<u64>(lit[2..4], 16) {
Some(c) =>
if c > 0xFF {
panic!(err(2))
} else {
return (c as u8, 4)
},
None => panic!(err(3))
}
}
};
return (b, 2);
}
}
pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
let mut res = Vec::with_capacity(lit.len());
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
let error = |&: i| format!("lexer should have rejected {} at {}", lit, i);
/// Eat everything up to a non-whitespace
2015-01-01 22:55:09 -05:00
fn eat<'a, I: Iterator<Item=(uint, u8)>>(it: &mut iter::Peekable<(uint, u8), I>) {
loop {
2014-12-09 12:17:24 -05:00
match it.peek().map(|x| x.1) {
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
it.next();
},
_ => { break; }
}
}
}
// binary literals *must* be ASCII, but the escapes don't have to be
let mut chars = lit.bytes().enumerate().peekable();
loop {
match chars.next() {
Some((i, b'\\')) => {
let em = error(i);
2014-12-09 12:17:24 -05:00
match chars.peek().expect(em.as_slice()).1 {
b'\n' => eat(&mut chars),
b'\r' => {
chars.next();
2014-12-09 12:17:24 -05:00
if chars.peek().expect(em.as_slice()).1 != b'\n' {
panic!("lexer accepted bare CR");
}
eat(&mut chars);
}
_ => {
// otherwise, a normal escape
2014-12-10 19:46:38 -08:00
let (c, n) = byte_lit(lit[i..]);
// we don't need to move past the first \
for _ in range(0, n - 1) {
chars.next();
}
res.push(c);
}
}
},
Some((i, b'\r')) => {
let em = error(i);
2014-12-09 12:17:24 -05:00
if chars.peek().expect(em.as_slice()).1 != b'\n' {
panic!("lexer accepted bare CR");
}
chars.next();
res.push(b'\n');
}
Some((_, c)) => res.push(c),
None => break,
}
}
Rc::new(res)
}
2013-02-04 13:15:17 -08:00
pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
// s can only be ascii, byte indexing is fine
let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
2014-12-10 19:46:38 -08:00
let mut s = s2[];
debug!("integer_lit: {}, {}", s, suffix);
let mut base = 10;
let orig = s;
let mut ty = ast::UnsuffixedIntLit(ast::Plus);
if s.char_at(0) == '0' && s.len() > 1 {
match s.char_at(1) {
'x' => base = 16,
'o' => base = 8,
'b' => base = 2,
_ => { }
}
}
// 1f64 and 2f32 etc. are valid float literals.
match suffix {
Some(suf) if looks_like_width_suffix(&['f'], suf) => {
match base {
16u => sd.span_err(sp, "hexadecimal float literal is not supported"),
8u => sd.span_err(sp, "octal float literal is not supported"),
2u => sd.span_err(sp, "binary float literal is not supported"),
_ => ()
}
let ident = token::intern_and_get_ident(&*s);
return filtered_float_lit(ident, suffix, sd, sp)
}
_ => {}
}
if base != 10 {
2014-12-10 19:46:38 -08:00
s = s[2..];
}
if let Some(suf) = suffix {
if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")}
ty = match suf {
"i" => ast::SignedIntLit(ast::TyI, ast::Plus),
"i8" => ast::SignedIntLit(ast::TyI8, ast::Plus),
"i16" => ast::SignedIntLit(ast::TyI16, ast::Plus),
"i32" => ast::SignedIntLit(ast::TyI32, ast::Plus),
"i64" => ast::SignedIntLit(ast::TyI64, ast::Plus),
"u" => ast::UnsignedIntLit(ast::TyU),
"u8" => ast::UnsignedIntLit(ast::TyU8),
"u16" => ast::UnsignedIntLit(ast::TyU16),
"u32" => ast::UnsignedIntLit(ast::TyU32),
"u64" => ast::UnsignedIntLit(ast::TyU64),
_ => {
// i<digits> and u<digits> look like widths, so lets
// give an error message along those lines
if looks_like_width_suffix(&['i', 'u'], suf) {
sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \
valid widths are 8, 16, 32 and 64",
2014-12-10 19:46:38 -08:00
suf[1..]));
} else {
sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf));
}
ty
}
}
}
debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \
string was {}, the original suffix was {}", ty, base, s, orig, suffix);
let res: u64 = match ::std::num::from_str_radix(s, base) {
Some(r) => r,
None => { sd.span_err(sp, "int literal is too large"); 0 }
};
// adjust the sign
let sign = ast::Sign::new(res);
match ty {
ast::SignedIntLit(t, _) => ast::LitInt(res, ast::SignedIntLit(t, sign)),
ast::UnsuffixedIntLit(_) => ast::LitInt(res, ast::UnsuffixedIntLit(sign)),
us@ast::UnsignedIntLit(_) => ast::LitInt(res, us)
}
}
2013-02-04 13:15:17 -08:00
#[cfg(test)]
mod test {
use super::*;
2014-07-04 22:36:27 +02:00
use serialize::json;
use codemap::{Span, BytePos, Pos, Spanned, NO_EXPANSION};
use owned_slice::OwnedSlice;
use ast;
2013-04-23 10:57:41 -07:00
use abi;
use attr::{first_attr_value_str_by_name, AttrMetaMethods};
2013-04-23 10:57:41 -07:00
use parse::parser::Parser;
2013-06-09 02:21:11 +10:00
use parse::token::{str_to_ident};
use print::pprust::view_item_to_string;
use ptr::P;
2013-09-24 12:31:24 -07:00
use util::parser_testing::{string_to_tts, string_to_parser};
use util::parser_testing::{string_to_expr, string_to_item};
use util::parser_testing::{string_to_stmt, string_to_view_item};
2013-04-23 10:57:41 -07:00
// produce a codemap::span
2013-11-21 01:32:29 +09:00
fn sp(a: u32, b: u32) -> Span {
Span {lo: BytePos(a), hi: BytePos(b), expn_id: NO_EXPANSION}
2013-04-23 10:57:41 -07:00
}
#[test] fn path_exprs_1() {
assert!(string_to_expr("a".to_string()) ==
2014-09-13 19:06:01 +03:00
P(ast::Expr{
id: ast::DUMMY_NODE_ID,
node: ast::ExprPath(ast::Path {
span: sp(0, 1),
global: false,
segments: vec!(
ast::PathSegment {
identifier: str_to_ident("a"),
parameters: ast::PathParameters::none(),
}
),
}),
span: sp(0, 1)
2014-09-13 19:06:01 +03:00
}))
2013-04-23 10:57:41 -07:00
}
#[test] fn path_exprs_2 () {
assert!(string_to_expr("::a::b".to_string()) ==
2014-09-13 19:06:01 +03:00
P(ast::Expr {
id: ast::DUMMY_NODE_ID,
node: ast::ExprPath(ast::Path {
span: sp(0, 6),
global: true,
segments: vec!(
ast::PathSegment {
identifier: str_to_ident("a"),
parameters: ast::PathParameters::none(),
},
ast::PathSegment {
identifier: str_to_ident("b"),
parameters: ast::PathParameters::none(),
}
)
}),
span: sp(0, 6)
2014-09-13 19:06:01 +03:00
}))
2013-04-23 10:57:41 -07:00
}
#[should_fail]
2013-04-23 10:57:41 -07:00
#[test] fn bad_path_expr_1() {
string_to_expr("::abc::def::return".to_string());
}
2013-04-23 10:57:41 -07:00
// check the token-tree-ization of macros
#[test]
fn string_to_tts_macro () {
let tts = string_to_tts("macro_rules! zip (($a)=>($a))".to_string());
2014-12-10 19:46:38 -08:00
let tts: &[ast::TokenTree] = tts[];
match tts {
[ast::TtToken(_, token::Ident(name_macro_rules, token::Plain)),
2014-10-27 19:22:52 +11:00
ast::TtToken(_, token::Not),
ast::TtToken(_, token::Ident(name_zip, token::Plain)),
ast::TtDelimited(_, ref macro_delimed)]
if name_macro_rules.as_str() == "macro_rules"
&& name_zip.as_str() == "zip" => {
2014-12-10 19:46:38 -08:00
match macro_delimed.tts[] {
[ast::TtDelimited(_, ref first_delimed),
ast::TtToken(_, token::FatArrow),
ast::TtDelimited(_, ref second_delimed)]
if macro_delimed.delim == token::Paren => {
2014-12-10 19:46:38 -08:00
match first_delimed.tts[] {
[ast::TtToken(_, token::Dollar),
ast::TtToken(_, token::Ident(name, token::Plain))]
if first_delimed.delim == token::Paren
&& name.as_str() == "a" => {},
_ => panic!("value 3: {}", **first_delimed),
}
2014-12-10 19:46:38 -08:00
match second_delimed.tts[] {
[ast::TtToken(_, token::Dollar),
ast::TtToken(_, token::Ident(name, token::Plain))]
if second_delimed.delim == token::Paren
&& name.as_str() == "a" => {},
_ => panic!("value 4: {}", **second_delimed),
}
},
_ => panic!("value 2: {}", **macro_delimed),
}
},
_ => panic!("value: {}",tts),
}
}
#[test]
fn string_to_tts_1 () {
let tts = string_to_tts("fn a (b : int) { b; }".to_string());
2014-07-04 22:36:27 +02:00
assert_eq!(json::encode(&tts),
2014-04-15 18:17:48 -07:00
"[\
2013-09-16 19:12:54 -04:00
{\
\"variant\":\"TtToken\",\
2013-09-16 19:12:54 -04:00
\"fields\":[\
null,\
{\
2014-10-27 19:22:52 +11:00
\"variant\":\"Ident\",\
2013-09-16 19:12:54 -04:00
\"fields\":[\
\"fn\",\
\"Plain\"\
2013-09-16 19:12:54 -04:00
]\
}\
]\
},\
{\
\"variant\":\"TtToken\",\
2013-09-16 19:12:54 -04:00
\"fields\":[\
null,\
{\
2014-10-27 19:22:52 +11:00
\"variant\":\"Ident\",\
2013-09-16 19:12:54 -04:00
\"fields\":[\
\"a\",\
\"Plain\"\
2013-09-16 19:12:54 -04:00
]\
}\
]\
},\
{\
\"variant\":\"TtDelimited\",\
2013-09-16 19:12:54 -04:00
\"fields\":[\
null,\
{\
\"delim\":\"Paren\",\
\"open_span\":null,\
\"tts\":[\
{\
\"variant\":\"TtToken\",\
\"fields\":[\
null,\
{\
2014-10-27 19:22:52 +11:00
\"variant\":\"Ident\",\
\"fields\":[\
\"b\",\
\"Plain\"\
]\
}\
]\
},\
{\
\"variant\":\"TtToken\",\
\"fields\":[\
null,\
2014-10-27 19:22:52 +11:00
\"Colon\"\
]\
},\
{\
\"variant\":\"TtToken\",\
\"fields\":[\
null,\
{\
2014-10-27 19:22:52 +11:00
\"variant\":\"Ident\",\
\"fields\":[\
\"int\",\
\"Plain\"\
]\
}\
]\
}\
],\
\"close_span\":null\
}\
2013-09-16 19:12:54 -04:00
]\
},\
{\
\"variant\":\"TtDelimited\",\
2013-09-16 19:12:54 -04:00
\"fields\":[\
null,\
{\
\"delim\":\"Brace\",\
\"open_span\":null,\
\"tts\":[\
{\
\"variant\":\"TtToken\",\
\"fields\":[\
null,\
{\
2014-10-27 19:22:52 +11:00
\"variant\":\"Ident\",\
\"fields\":[\
\"b\",\
\"Plain\"\
]\
}\
]\
},\
{\
\"variant\":\"TtToken\",\
\"fields\":[\
null,\
2014-10-27 19:22:52 +11:00
\"Semi\"\
]\
}\
],\
\"close_span\":null\
}\
2013-09-16 19:12:54 -04:00
]\
}\
]"
2013-09-16 17:12:53 -04:00
);
2013-04-23 10:57:41 -07:00
}
#[test] fn ret_expr() {
assert!(string_to_expr("return d".to_string()) ==
2014-09-13 19:06:01 +03:00
P(ast::Expr{
id: ast::DUMMY_NODE_ID,
2014-09-13 19:06:01 +03:00
node:ast::ExprRet(Some(P(ast::Expr{
id: ast::DUMMY_NODE_ID,
node:ast::ExprPath(ast::Path{
span: sp(7, 8),
global: false,
segments: vec!(
ast::PathSegment {
identifier: str_to_ident("d"),
parameters: ast::PathParameters::none(),
}
),
}),
span:sp(7,8)
2014-09-13 19:06:01 +03:00
}))),
span:sp(0,8)
2014-09-13 19:06:01 +03:00
}))
2013-04-23 10:57:41 -07:00
}
#[test] fn parse_stmt_1 () {
assert!(string_to_stmt("b;".to_string()) ==
2014-09-13 19:06:01 +03:00
P(Spanned{
node: ast::StmtExpr(P(ast::Expr {
id: ast::DUMMY_NODE_ID,
node: ast::ExprPath(ast::Path {
span:sp(0,1),
global:false,
segments: vec!(
ast::PathSegment {
identifier: str_to_ident("b"),
parameters: ast::PathParameters::none(),
}
),
}),
2014-09-13 19:06:01 +03:00
span: sp(0,1)}),
ast::DUMMY_NODE_ID),
2014-09-13 19:06:01 +03:00
span: sp(0,1)}))
2013-04-23 10:57:41 -07:00
}
fn parser_done(p: Parser){
2014-10-27 19:22:52 +11:00
assert_eq!(p.token.clone(), token::Eof);
}
2013-04-23 10:57:41 -07:00
#[test] fn parse_ident_pat () {
2014-03-17 09:55:41 +02:00
let sess = new_parse_sess();
let mut parser = string_to_parser(&sess, "b".to_string());
assert!(parser.parse_pat()
2014-09-13 19:06:01 +03:00
== P(ast::Pat{
id: ast::DUMMY_NODE_ID,
node: ast::PatIdent(ast::BindByValue(ast::MutImmutable),
Spanned{ span:sp(0, 1),
node: str_to_ident("b")
},
None),
2014-09-13 19:06:01 +03:00
span: sp(0,1)}));
parser_done(parser);
2013-04-23 10:57:41 -07:00
}
// check the contents of the tt manually:
#[test] fn parse_fundecl () {
// this test depends on the intern order of "fn" and "int"
assert!(string_to_item("fn a (b : int) { b; }".to_string()) ==
2013-04-23 10:57:41 -07:00
Some(
2014-09-13 19:06:01 +03:00
P(ast::Item{ident:str_to_ident("a"),
attrs:Vec::new(),
id: ast::DUMMY_NODE_ID,
node: ast::ItemFn(P(ast::FnDecl {
inputs: vec!(ast::Arg{
ty: P(ast::Ty{id: ast::DUMMY_NODE_ID,
node: ast::TyPath(ast::Path{
2013-04-23 10:57:41 -07:00
span:sp(10,13),
global:false,
segments: vec!(
ast::PathSegment {
identifier:
str_to_ident("int"),
parameters: ast::PathParameters::none(),
}
),
}, ast::DUMMY_NODE_ID),
span:sp(10,13)
}),
2014-09-13 19:06:01 +03:00
pat: P(ast::Pat {
id: ast::DUMMY_NODE_ID,
node: ast::PatIdent(
ast::BindByValue(ast::MutImmutable),
Spanned{
span: sp(6,7),
node: str_to_ident("b")},
None
),
span: sp(6,7)
2014-09-13 19:06:01 +03:00
}),
id: ast::DUMMY_NODE_ID
}),
output: ast::Return(P(ast::Ty{id: ast::DUMMY_NODE_ID,
node: ast::TyTup(vec![]),
span:sp(15,15)})), // not sure
variadic: false
}),
2014-12-09 10:36:46 -05:00
ast::Unsafety::Normal,
abi::Rust,
2013-04-23 10:57:41 -07:00
ast::Generics{ // no idea on either of these:
lifetimes: Vec::new(),
ty_params: OwnedSlice::empty(),
where_clause: ast::WhereClause {
id: ast::DUMMY_NODE_ID,
predicates: Vec::new(),
}
2013-04-23 10:57:41 -07:00
},
P(ast::Block {
view_items: Vec::new(),
2014-09-13 19:06:01 +03:00
stmts: vec!(P(Spanned{
node: ast::StmtSemi(P(ast::Expr{
id: ast::DUMMY_NODE_ID,
node: ast::ExprPath(
ast::Path{
span:sp(17,18),
global:false,
segments: vec!(
ast::PathSegment {
identifier:
str_to_ident(
"b"),
parameters:
ast::PathParameters::none(),
}
),
}),
2014-09-13 19:06:01 +03:00
span: sp(17,18)}),
ast::DUMMY_NODE_ID),
2014-09-13 19:06:01 +03:00
span: sp(17,19)})),
expr: None,
id: ast::DUMMY_NODE_ID,
rules: ast::DefaultBlock, // no idea
2013-04-23 10:57:41 -07:00
span: sp(15,21),
})),
vis: ast::Inherited,
2014-09-13 19:06:01 +03:00
span: sp(0,21)})));
2013-04-23 10:57:41 -07:00
}
#[test] fn parse_use() {
let use_s = "use foo::bar::baz;";
let vitem = string_to_view_item(use_s.to_string());
let vitem_s = view_item_to_string(&vitem);
2014-12-10 19:46:38 -08:00
assert_eq!(vitem_s[], use_s);
let use_s = "use foo::bar as baz;";
let vitem = string_to_view_item(use_s.to_string());
let vitem_s = view_item_to_string(&vitem);
2014-12-10 19:46:38 -08:00
assert_eq!(vitem_s[], use_s);
}
#[test] fn parse_extern_crate() {
let ex_s = "extern crate foo;";
let vitem = string_to_view_item(ex_s.to_string());
let vitem_s = view_item_to_string(&vitem);
2014-12-10 19:46:38 -08:00
assert_eq!(vitem_s[], ex_s);
let ex_s = "extern crate \"foo\" as bar;";
let vitem = string_to_view_item(ex_s.to_string());
let vitem_s = view_item_to_string(&vitem);
2014-12-10 19:46:38 -08:00
assert_eq!(vitem_s[], ex_s);
}
fn get_spans_of_pat_idents(src: &str) -> Vec<Span> {
let item = string_to_item(src.to_string()).unwrap();
struct PatIdentVisitor {
spans: Vec<Span>
}
impl<'v> ::visit::Visitor<'v> for PatIdentVisitor {
fn visit_pat(&mut self, p: &'v ast::Pat) {
match p.node {
ast::PatIdent(_ , ref spannedident, _) => {
self.spans.push(spannedident.span.clone());
}
_ => {
::visit::walk_pat(self, p);
}
}
}
}
let mut v = PatIdentVisitor { spans: Vec::new() };
::visit::walk_item(&mut v, &*item);
return v.spans;
}
#[test] fn span_of_self_arg_pat_idents_are_correct() {
let srcs = ["impl z { fn a (&self, &myarg: int) {} }",
"impl z { fn a (&mut self, &myarg: int) {} }",
"impl z { fn a (&'a self, &myarg: int) {} }",
"impl z { fn a (self, &myarg: int) {} }",
"impl z { fn a (self: Foo, &myarg: int) {} }",
];
for &src in srcs.iter() {
let spans = get_spans_of_pat_idents(src);
let Span{lo:lo,hi:hi,..} = spans[0];
2014-12-10 19:46:38 -08:00
assert!("self" == src[lo.to_uint()..hi.to_uint()],
"\"{}\" != \"self\". src=\"{}\"",
2014-12-10 19:46:38 -08:00
src[lo.to_uint()..hi.to_uint()], src)
}
}
2013-04-23 10:57:41 -07:00
#[test] fn parse_exprs () {
// just make sure that they parse....
string_to_expr("3 + 4".to_string());
2014-09-13 19:06:01 +03:00
string_to_expr("a::z.froob(b,&(987+3))".to_string());
2013-02-04 13:15:17 -08:00
}
#[test] fn attrs_fix_bug () {
2014-04-15 18:17:48 -07:00
string_to_item("pub fn mk_file_writer(path: &Path, flags: &[FileFlag])
2014-09-13 19:06:01 +03:00
-> Result<Box<Writer>, String> {
#[cfg(windows)]
fn wb() -> c_int {
(O_WRONLY | libc::consts::os::extra::O_BINARY) as c_int
}
#[cfg(unix)]
fn wb() -> c_int { O_WRONLY as c_int }
let mut fflags: c_int = wb();
}".to_string());
}
#[test] fn crlf_doc_comments() {
let sess = new_parse_sess();
let name = "<source>".to_string();
let source = "/// doc comment\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name.clone(), source, Vec::new(), &sess).unwrap();
let doc = first_attr_value_str_by_name(item.attrs.as_slice(), "doc").unwrap();
assert_eq!(doc.get(), "/// doc comment");
let source = "/// doc comment\r\n/// line 2\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name.clone(), source, Vec::new(), &sess).unwrap();
let docs = item.attrs.iter().filter(|a| a.name().get() == "doc")
.map(|a| a.value_str().unwrap().get().to_string()).collect::<Vec<_>>();
DST coercions and DST structs [breaking-change] 1. The internal layout for traits has changed from (vtable, data) to (data, vtable). If you were relying on this in unsafe transmutes, you might get some very weird and apparently unrelated errors. You should not be doing this! Prefer not to do this at all, but if you must, you should use raw::TraitObject rather than hardcoding rustc's internal representation into your code. 2. The minimal type of reference-to-vec-literals (e.g., `&[1, 2, 3]`) is now a fixed size vec (e.g., `&[int, ..3]`) where it used to be an unsized vec (e.g., `&[int]`). If you want the unszied type, you must explicitly give the type (e.g., `let x: &[_] = &[1, 2, 3]`). Note in particular where multiple blocks must have the same type (e.g., if and else clauses, vec elements), the compiler will not coerce to the unsized type without a hint. E.g., `[&[1], &[1, 2]]` used to be a valid expression of type '[&[int]]'. It no longer type checks since the first element now has type `&[int, ..1]` and the second has type &[int, ..2]` which are incompatible. 3. The type of blocks (including functions) must be coercible to the expected type (used to be a subtype). Mostly this makes things more flexible and not less (in particular, in the case of coercing function bodies to the return type). However, in some rare cases, this is less flexible. TBH, I'm not exactly sure of the exact effects. I think the change causes us to resolve inferred type variables slightly earlier which might make us slightly more restrictive. Possibly it only affects blocks with unreachable code. E.g., `if ... { fail!(); "Hello" }` used to type check, it no longer does. The fix is to add a semicolon after the string.
2014-08-04 14:20:11 +02:00
let b: &[_] = &["/// doc comment".to_string(), "/// line 2".to_string()];
2014-12-10 19:46:38 -08:00
assert_eq!(docs[], b);
let source = "/** doc comment\r\n * with CRLF */\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name, source, Vec::new(), &sess).unwrap();
let doc = first_attr_value_str_by_name(item.attrs.as_slice(), "doc").unwrap();
assert_eq!(doc.get(), "/** doc comment\n * with CRLF */");
}
2013-02-04 13:15:17 -08:00
}