syntax: methodify the lexer
This commit is contained in:
parent
5343eb7e0c
commit
46d1af28b5
@ -34,7 +34,7 @@ pub fn highlight(src: &str, class: Option<&str>) -> String {
|
||||
|
||||
let mut out = io::MemWriter::new();
|
||||
doit(&sess,
|
||||
lexer::new_string_reader(&sess.span_diagnostic, fm),
|
||||
lexer::StringReader::new(&sess.span_diagnostic, fm),
|
||||
class,
|
||||
&mut out).unwrap();
|
||||
str::from_utf8_lossy(out.unwrap().as_slice()).to_string()
|
||||
|
@ -15,7 +15,7 @@ use ast::{AttrId, Attribute, Attribute_, MetaItem, MetaWord, MetaNameValue, Meta
|
||||
use codemap::{Span, Spanned, spanned, dummy_spanned};
|
||||
use codemap::BytePos;
|
||||
use diagnostic::SpanHandler;
|
||||
use parse::comments::{doc_comment_style, strip_doc_comment_decoration};
|
||||
use parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration};
|
||||
use parse::token::InternedString;
|
||||
use parse::token;
|
||||
use crateid::CrateId;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,8 +11,8 @@
|
||||
use ast;
|
||||
use codemap::{BytePos, CharPos, CodeMap, Pos};
|
||||
use diagnostic;
|
||||
use parse::lexer::{is_whitespace, with_str_from, Reader};
|
||||
use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan};
|
||||
use parse::lexer::{is_whitespace, Reader};
|
||||
use parse::lexer::{StringReader, TokenAndSpan};
|
||||
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
|
||||
use parse::lexer;
|
||||
use parse::token;
|
||||
@ -141,31 +141,6 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {
|
||||
fail!("not a doc-comment: {}", comment);
|
||||
}
|
||||
|
||||
fn read_to_eol(rdr: &mut StringReader) -> String {
|
||||
let mut val = String::new();
|
||||
while !rdr.curr_is('\n') && !is_eof(rdr) {
|
||||
val.push_char(rdr.curr.unwrap());
|
||||
bump(rdr);
|
||||
}
|
||||
if rdr.curr_is('\n') { bump(rdr); }
|
||||
return val
|
||||
}
|
||||
|
||||
fn read_one_line_comment(rdr: &mut StringReader) -> String {
|
||||
let val = read_to_eol(rdr);
|
||||
assert!((val.as_slice()[0] == '/' as u8 &&
|
||||
val.as_slice()[1] == '/' as u8) ||
|
||||
(val.as_slice()[0] == '#' as u8 &&
|
||||
val.as_slice()[1] == '!' as u8));
|
||||
return val;
|
||||
}
|
||||
|
||||
fn consume_non_eol_whitespace(rdr: &mut StringReader) {
|
||||
while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
|
||||
bump(rdr);
|
||||
}
|
||||
}
|
||||
|
||||
fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
|
||||
debug!(">>> blank-line comment");
|
||||
comments.push(Comment {
|
||||
@ -177,11 +152,11 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
|
||||
|
||||
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
|
||||
comments: &mut Vec<Comment>) {
|
||||
while is_whitespace(rdr.curr) && !is_eof(rdr) {
|
||||
while is_whitespace(rdr.curr) && !rdr.is_eof() {
|
||||
if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
|
||||
push_blank_line_comment(rdr, &mut *comments);
|
||||
}
|
||||
bump(rdr);
|
||||
rdr.bump();
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,7 +168,7 @@ fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
|
||||
debug!("<<< shebang comment");
|
||||
comments.push(Comment {
|
||||
style: if code_to_the_left { Trailing } else { Isolated },
|
||||
lines: vec!(read_one_line_comment(rdr)),
|
||||
lines: vec!(rdr.read_one_line_comment()),
|
||||
pos: p
|
||||
});
|
||||
}
|
||||
@ -203,15 +178,15 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
|
||||
debug!(">>> line comments");
|
||||
let p = rdr.last_pos;
|
||||
let mut lines: Vec<String> = Vec::new();
|
||||
while rdr.curr_is('/') && nextch_is(rdr, '/') {
|
||||
let line = read_one_line_comment(rdr);
|
||||
while rdr.curr_is('/') && rdr.nextch_is('/') {
|
||||
let line = rdr.read_one_line_comment();
|
||||
debug!("{}", line);
|
||||
// Doc comments are not put in comments.
|
||||
if is_doc_comment(line.as_slice()) {
|
||||
break;
|
||||
}
|
||||
lines.push(line);
|
||||
consume_non_eol_whitespace(rdr);
|
||||
rdr.consume_non_eol_whitespace();
|
||||
}
|
||||
debug!("<<< line comments");
|
||||
if !lines.is_empty() {
|
||||
@ -265,21 +240,21 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
let p = rdr.last_pos;
|
||||
let mut lines: Vec<String> = Vec::new();
|
||||
let col = rdr.col;
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
|
||||
let mut curr_line = String::from_str("/*");
|
||||
|
||||
// doc-comments are not really comments, they are attributes
|
||||
if (rdr.curr_is('*') && !nextch_is(rdr, '*')) || rdr.curr_is('!') {
|
||||
while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) {
|
||||
if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
|
||||
while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
|
||||
curr_line.push_char(rdr.curr.unwrap());
|
||||
bump(rdr);
|
||||
rdr.bump();
|
||||
}
|
||||
if !is_eof(rdr) {
|
||||
if !rdr.is_eof() {
|
||||
curr_line.push_str("*/");
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
}
|
||||
if !is_block_non_doc_comment(curr_line.as_slice()) {
|
||||
return
|
||||
@ -290,7 +265,7 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
let mut level: int = 1;
|
||||
while level > 0 {
|
||||
debug!("=== block comment level {}", level);
|
||||
if is_eof(rdr) {
|
||||
if rdr.is_eof() {
|
||||
rdr.fatal("unterminated block comment");
|
||||
}
|
||||
if rdr.curr_is('\n') {
|
||||
@ -298,21 +273,21 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
curr_line,
|
||||
col);
|
||||
curr_line = String::new();
|
||||
bump(rdr);
|
||||
rdr.bump();
|
||||
} else {
|
||||
curr_line.push_char(rdr.curr.unwrap());
|
||||
if rdr.curr_is('/') && nextch_is(rdr, '*') {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
if rdr.curr_is('/') && rdr.nextch_is('*') {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line.push_char('*');
|
||||
level += 1;
|
||||
} else {
|
||||
if rdr.curr_is('*') && nextch_is(rdr, '/') {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
if rdr.curr_is('*') && rdr.nextch_is('/') {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line.push_char('/');
|
||||
level -= 1;
|
||||
} else { bump(rdr); }
|
||||
} else { rdr.bump(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -324,31 +299,24 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
}
|
||||
|
||||
let mut style = if code_to_the_left { Trailing } else { Isolated };
|
||||
consume_non_eol_whitespace(rdr);
|
||||
if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u {
|
||||
rdr.consume_non_eol_whitespace();
|
||||
if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
|
||||
style = Mixed;
|
||||
}
|
||||
debug!("<<< block comment");
|
||||
comments.push(Comment {style: style, lines: lines, pos: p});
|
||||
}
|
||||
|
||||
fn peeking_at_comment(rdr: &StringReader) -> bool {
|
||||
return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
|
||||
(rdr.curr_is('/') && nextch_is(rdr, '*')) ||
|
||||
// consider shebangs comments, but not inner attributes
|
||||
(rdr.curr_is('#') && nextch_is(rdr, '!') &&
|
||||
!lexer::nextnextch_is(rdr, '['));
|
||||
}
|
||||
|
||||
fn consume_comment(rdr: &mut StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment> ) {
|
||||
debug!(">>> consume comment");
|
||||
if rdr.curr_is('/') && nextch_is(rdr, '/') {
|
||||
if rdr.curr_is('/') && rdr.nextch_is('/') {
|
||||
read_line_comments(rdr, code_to_the_left, comments);
|
||||
} else if rdr.curr_is('/') && nextch_is(rdr, '*') {
|
||||
} else if rdr.curr_is('/') && rdr.nextch_is('*') {
|
||||
read_block_comment(rdr, code_to_the_left, comments);
|
||||
} else if rdr.curr_is('#') && nextch_is(rdr, '!') {
|
||||
} else if rdr.curr_is('#') && rdr.nextch_is('!') {
|
||||
read_shebang_comment(rdr, code_to_the_left, comments);
|
||||
} else { fail!(); }
|
||||
debug!("<<< consume comment");
|
||||
@ -362,8 +330,7 @@ pub struct Literal {
|
||||
|
||||
// it appears this function is called only from pprust... that's
|
||||
// probably not a good thing.
|
||||
pub fn gather_comments_and_literals(span_diagnostic:
|
||||
&diagnostic::SpanHandler,
|
||||
pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
|
||||
path: String,
|
||||
srdr: &mut io::Reader)
|
||||
-> (Vec<Comment>, Vec<Literal>) {
|
||||
@ -371,20 +338,20 @@ pub fn gather_comments_and_literals(span_diagnostic:
|
||||
let src = str::from_utf8(src.as_slice()).unwrap().to_string();
|
||||
let cm = CodeMap::new();
|
||||
let filemap = cm.new_filemap(path, src);
|
||||
let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
|
||||
let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
|
||||
|
||||
let mut comments: Vec<Comment> = Vec::new();
|
||||
let mut literals: Vec<Literal> = Vec::new();
|
||||
let mut first_read: bool = true;
|
||||
while !is_eof(&rdr) {
|
||||
while !rdr.is_eof() {
|
||||
loop {
|
||||
let mut code_to_the_left = !first_read;
|
||||
consume_non_eol_whitespace(&mut rdr);
|
||||
rdr.consume_non_eol_whitespace();
|
||||
if rdr.curr_is('\n') {
|
||||
code_to_the_left = false;
|
||||
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
|
||||
}
|
||||
while peeking_at_comment(&rdr) {
|
||||
while rdr.peeking_at_comment() {
|
||||
consume_comment(&mut rdr, code_to_the_left, &mut comments);
|
||||
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
|
||||
}
|
||||
@ -397,7 +364,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
|
||||
//discard, and look ahead; we're working with internal state
|
||||
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
|
||||
if token::is_lit(&tok) {
|
||||
with_str_from(&rdr, bstart, |s| {
|
||||
rdr.with_str_from(bstart, |s| {
|
||||
debug!("tok lit: {}", s);
|
||||
literals.push(Literal {lit: s.to_string(), pos: sp.lo});
|
||||
})
|
1153
src/libsyntax/parse/lexer/mod.rs
Normal file
1153
src/libsyntax/parse/lexer/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -25,7 +25,6 @@ use std::str;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
pub mod token;
|
||||
pub mod comments;
|
||||
pub mod attr;
|
||||
|
||||
pub mod common;
|
||||
@ -255,7 +254,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
|
||||
// it appears to me that the cfg doesn't matter here... indeed,
|
||||
// parsing tt's probably shouldn't require a parser at all.
|
||||
let cfg = Vec::new();
|
||||
let srdr = lexer::new_string_reader(&sess.span_diagnostic, filemap);
|
||||
let srdr = lexer::StringReader::new(&sess.span_diagnostic, filemap);
|
||||
let mut p1 = Parser::new(sess, cfg, box srdr);
|
||||
p1.parse_all_token_trees()
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ pub fn to_str(t: &Token) -> String {
|
||||
ANDAND => "&&".to_string(),
|
||||
BINOP(op) => binop_to_str(op).to_string(),
|
||||
BINOPEQ(op) => {
|
||||
let mut s = binop_to_str(op).to_strbuf();
|
||||
let mut s = binop_to_str(op).to_string();
|
||||
s.push_str("=");
|
||||
s
|
||||
}
|
||||
|
@ -20,7 +20,8 @@ use codemap;
|
||||
use diagnostic;
|
||||
use parse::classify::expr_is_simple_block;
|
||||
use parse::token::IdentInterner;
|
||||
use parse::{comments, token};
|
||||
use parse::token;
|
||||
use parse::lexer::comments;
|
||||
use parse;
|
||||
use print::pp::{break_offset, word, space, zerobreak, hardbreak};
|
||||
use print::pp::{Breaks, Consistent, Inconsistent, eof};
|
||||
|
Loading…
x
Reference in New Issue
Block a user