2012-04-15 03:27:24 -07:00
|
|
|
import io::reader_util;
|
2012-05-22 17:49:16 -07:00
|
|
|
import io::println;//XXXXXXXXxxx
|
2012-04-15 03:27:24 -07:00
|
|
|
import util::interner;
|
2012-06-15 09:32:17 -07:00
|
|
|
import lexer::{ string_reader, bump, is_eof, nextch,
|
2012-05-30 11:36:30 -07:00
|
|
|
is_whitespace, get_str_from, string_reader_as_reader };
|
2012-04-15 03:27:24 -07:00
|
|
|
|
2012-04-17 19:34:44 -07:00
|
|
|
export cmnt;
|
|
|
|
export lit;
|
|
|
|
export cmnt_style;
|
|
|
|
export gather_comments_and_literals;
|
|
|
|
|
2012-04-15 03:27:24 -07:00
|
|
|
enum cmnt_style {
|
|
|
|
isolated, // No code on either side of each line of the comment
|
|
|
|
trailing, // Code exists to the left of the comment
|
|
|
|
mixed, // Code before /* foo */ and after the comment
|
|
|
|
blank_line, // Just a manual blank line "\n\n", for layout
|
|
|
|
}
|
|
|
|
|
|
|
|
type cmnt = {style: cmnt_style, lines: [str], pos: uint};
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn read_to_eol(rdr: string_reader) -> str {
|
2012-04-15 03:27:24 -07:00
|
|
|
let mut val = "";
|
2012-05-30 11:36:30 -07:00
|
|
|
while rdr.curr != '\n' && !is_eof(rdr) {
|
2012-04-15 03:27:24 -07:00
|
|
|
str::push_char(val, rdr.curr);
|
2012-05-30 11:36:30 -07:00
|
|
|
bump(rdr);
|
2012-04-15 03:27:24 -07:00
|
|
|
}
|
2012-05-30 11:36:30 -07:00
|
|
|
if rdr.curr == '\n' { bump(rdr); }
|
2012-04-15 03:27:24 -07:00
|
|
|
ret val;
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn read_one_line_comment(rdr: string_reader) -> str {
|
2012-04-15 03:27:24 -07:00
|
|
|
let val = read_to_eol(rdr);
|
2012-05-22 17:49:16 -07:00
|
|
|
assert ((val[0] == '/' as u8 && val[1] == '/' as u8) ||
|
|
|
|
(val[0] == '#' as u8 && val[1] == '!' as u8));
|
2012-04-15 03:27:24 -07:00
|
|
|
ret val;
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn consume_non_eol_whitespace(rdr: string_reader) {
|
|
|
|
while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
|
|
|
|
bump(rdr);
|
2012-04-15 03:27:24 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn push_blank_line_comment(rdr: string_reader, &comments: [cmnt]) {
|
2012-04-15 03:27:24 -07:00
|
|
|
#debug(">>> blank-line comment");
|
|
|
|
let v: [str] = [];
|
|
|
|
comments += [{style: blank_line, lines: v, pos: rdr.chpos}];
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn consume_whitespace_counting_blank_lines(rdr: string_reader,
|
|
|
|
&comments: [cmnt]) {
|
|
|
|
while is_whitespace(rdr.curr) && !is_eof(rdr) {
|
2012-04-15 03:27:24 -07:00
|
|
|
if rdr.col == 0u && rdr.curr == '\n' {
|
|
|
|
push_blank_line_comment(rdr, comments);
|
|
|
|
}
|
2012-05-30 11:36:30 -07:00
|
|
|
bump(rdr);
|
2012-04-15 03:27:24 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
|
2012-05-22 17:49:16 -07:00
|
|
|
#debug(">>> shebang comment");
|
|
|
|
let p = rdr.chpos;
|
|
|
|
#debug("<<< shebang comment");
|
|
|
|
ret {style: if code_to_the_left { trailing } else { isolated },
|
|
|
|
lines: [read_one_line_comment(rdr)],
|
|
|
|
pos: p};
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn read_line_comments(rdr: string_reader, code_to_the_left: bool) -> cmnt {
|
2012-04-15 03:27:24 -07:00
|
|
|
#debug(">>> line comments");
|
|
|
|
let p = rdr.chpos;
|
|
|
|
let mut lines: [str] = [];
|
2012-05-30 11:36:30 -07:00
|
|
|
while rdr.curr == '/' && nextch(rdr) == '/' {
|
2012-04-15 03:27:24 -07:00
|
|
|
let line = read_one_line_comment(rdr);
|
|
|
|
log(debug, line);
|
|
|
|
lines += [line];
|
|
|
|
consume_non_eol_whitespace(rdr);
|
|
|
|
}
|
|
|
|
#debug("<<< line comments");
|
|
|
|
ret {style: if code_to_the_left { trailing } else { isolated },
|
|
|
|
lines: lines,
|
|
|
|
pos: p};
|
|
|
|
}
|
|
|
|
|
|
|
|
fn all_whitespace(s: str, begin: uint, end: uint) -> bool {
|
|
|
|
let mut i: uint = begin;
|
|
|
|
while i != end { if !is_whitespace(s[i] as char) { ret false; } i += 1u; }
|
|
|
|
ret true;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn trim_whitespace_prefix_and_push_line(&lines: [str],
|
|
|
|
s: str, col: uint) unsafe {
|
|
|
|
let mut s1;
|
2012-04-18 17:02:00 -07:00
|
|
|
let len = str::len(s);
|
|
|
|
if all_whitespace(s, 0u, uint::min(len, col)) {
|
|
|
|
if col < len {
|
|
|
|
s1 = str::slice(s, col, len);
|
2012-04-15 03:27:24 -07:00
|
|
|
} else { s1 = ""; }
|
|
|
|
} else { s1 = s; }
|
|
|
|
log(debug, "pushing line: " + s1);
|
|
|
|
lines += [s1];
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
|
2012-04-15 03:27:24 -07:00
|
|
|
#debug(">>> block comment");
|
|
|
|
let p = rdr.chpos;
|
|
|
|
let mut lines: [str] = [];
|
|
|
|
let mut col: uint = rdr.col;
|
2012-05-30 11:36:30 -07:00
|
|
|
bump(rdr);
|
|
|
|
bump(rdr);
|
2012-04-15 03:27:24 -07:00
|
|
|
let mut curr_line = "/*";
|
|
|
|
let mut level: int = 1;
|
|
|
|
while level > 0 {
|
|
|
|
#debug("=== block comment level %d", level);
|
2012-05-30 11:36:30 -07:00
|
|
|
if is_eof(rdr) {(rdr as reader).fatal("unterminated block comment");}
|
2012-04-15 03:27:24 -07:00
|
|
|
if rdr.curr == '\n' {
|
|
|
|
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
|
|
|
curr_line = "";
|
2012-05-30 11:36:30 -07:00
|
|
|
bump(rdr);
|
2012-04-15 03:27:24 -07:00
|
|
|
} else {
|
|
|
|
str::push_char(curr_line, rdr.curr);
|
2012-05-30 11:36:30 -07:00
|
|
|
if rdr.curr == '/' && nextch(rdr) == '*' {
|
|
|
|
bump(rdr);
|
|
|
|
bump(rdr);
|
2012-04-15 03:27:24 -07:00
|
|
|
curr_line += "*";
|
|
|
|
level += 1;
|
|
|
|
} else {
|
2012-05-30 11:36:30 -07:00
|
|
|
if rdr.curr == '*' && nextch(rdr) == '/' {
|
|
|
|
bump(rdr);
|
|
|
|
bump(rdr);
|
2012-04-15 03:27:24 -07:00
|
|
|
curr_line += "/";
|
|
|
|
level -= 1;
|
2012-05-30 11:36:30 -07:00
|
|
|
} else { bump(rdr); }
|
2012-04-15 03:27:24 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if str::len(curr_line) != 0u {
|
|
|
|
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
|
|
|
}
|
|
|
|
let mut style = if code_to_the_left { trailing } else { isolated };
|
|
|
|
consume_non_eol_whitespace(rdr);
|
2012-05-30 11:36:30 -07:00
|
|
|
if !is_eof(rdr) && rdr.curr != '\n' && vec::len(lines) == 1u {
|
2012-04-15 03:27:24 -07:00
|
|
|
style = mixed;
|
|
|
|
}
|
|
|
|
#debug("<<< block comment");
|
|
|
|
ret {style: style, lines: lines, pos: p};
|
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn peeking_at_comment(rdr: string_reader) -> bool {
|
|
|
|
ret ((rdr.curr == '/' && nextch(rdr) == '/') ||
|
|
|
|
(rdr.curr == '/' && nextch(rdr) == '*')) ||
|
|
|
|
(rdr.curr == '#' && nextch(rdr) == '!');
|
2012-04-15 03:27:24 -07:00
|
|
|
}
|
|
|
|
|
2012-05-30 11:36:30 -07:00
|
|
|
fn consume_comment(rdr: string_reader, code_to_the_left: bool,
|
|
|
|
&comments: [cmnt]) {
|
2012-04-15 03:27:24 -07:00
|
|
|
#debug(">>> consume comment");
|
2012-05-30 11:36:30 -07:00
|
|
|
if rdr.curr == '/' && nextch(rdr) == '/' {
|
2012-04-15 03:27:24 -07:00
|
|
|
comments += [read_line_comments(rdr, code_to_the_left)];
|
2012-05-30 11:36:30 -07:00
|
|
|
} else if rdr.curr == '/' && nextch(rdr) == '*' {
|
2012-04-15 03:27:24 -07:00
|
|
|
comments += [read_block_comment(rdr, code_to_the_left)];
|
2012-05-30 11:36:30 -07:00
|
|
|
} else if rdr.curr == '#' && nextch(rdr) == '!' {
|
2012-05-22 17:49:16 -07:00
|
|
|
comments += [read_shebang_comment(rdr, code_to_the_left)];
|
2012-04-15 03:27:24 -07:00
|
|
|
} else { fail; }
|
|
|
|
#debug("<<< consume comment");
|
|
|
|
}
|
|
|
|
|
|
|
|
type lit = {lit: str, pos: uint};
|
|
|
|
|
2012-04-15 03:57:24 -07:00
|
|
|
fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
|
2012-04-15 03:27:24 -07:00
|
|
|
path: str,
|
|
|
|
srdr: io::reader) ->
|
|
|
|
{cmnts: [cmnt], lits: [lit]} {
|
2012-04-30 11:52:07 -07:00
|
|
|
let src = @str::from_bytes(srdr.read_whole_stream());
|
2012-06-09 00:53:34 -07:00
|
|
|
let itr = @interner::mk::<@str>(
|
|
|
|
{|x|str::hash(*x)},
|
|
|
|
{|x,y|str::eq(*x, *y)}
|
|
|
|
);
|
2012-06-15 09:32:17 -07:00
|
|
|
let rdr = lexer::new_low_level_string_reader
|
|
|
|
(span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr);
|
|
|
|
|
2012-04-15 03:27:24 -07:00
|
|
|
let mut comments: [cmnt] = [];
|
|
|
|
let mut literals: [lit] = [];
|
|
|
|
let mut first_read: bool = true;
|
2012-05-30 11:36:30 -07:00
|
|
|
while !is_eof(rdr) {
|
2012-04-15 03:27:24 -07:00
|
|
|
loop {
|
|
|
|
let mut code_to_the_left = !first_read;
|
|
|
|
consume_non_eol_whitespace(rdr);
|
|
|
|
if rdr.curr == '\n' {
|
|
|
|
code_to_the_left = false;
|
|
|
|
consume_whitespace_counting_blank_lines(rdr, comments);
|
|
|
|
}
|
|
|
|
while peeking_at_comment(rdr) {
|
|
|
|
consume_comment(rdr, code_to_the_left, comments);
|
|
|
|
consume_whitespace_counting_blank_lines(rdr, comments);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2012-06-15 09:32:17 -07:00
|
|
|
|
|
|
|
|
|
|
|
let bstart = rdr.pos;
|
|
|
|
//discard, and look ahead; we're working with internal state
|
|
|
|
let {tok: tok, sp: sp} = rdr.next_token();
|
|
|
|
if token::is_lit(tok) {
|
|
|
|
let s = get_str_from(rdr, bstart);
|
2012-06-14 22:02:50 -07:00
|
|
|
vec::push(literals, {lit: s, pos: sp.lo});
|
2012-04-15 03:27:24 -07:00
|
|
|
log(debug, "tok lit: " + s);
|
|
|
|
} else {
|
2012-06-15 09:32:17 -07:00
|
|
|
log(debug, "tok: " + token::to_str(*rdr.interner, tok));
|
2012-04-15 03:27:24 -07:00
|
|
|
}
|
|
|
|
first_read = false;
|
|
|
|
}
|
|
|
|
ret {cmnts: comments, lits: literals};
|
|
|
|
}
|