rust/src/libsyntax/parse/comments.rs

320 lines
9.9 KiB
Rust
Raw Normal View History

import io::println;//XXXXXXXXxxx
import util::interner;
2012-07-09 16:01:07 -07:00
import lexer::{string_reader, bump, is_eof, nextch,
2012-08-07 18:10:06 -07:00
is_whitespace, get_str_from, reader};
export cmnt;
export lit;
export cmnt_style;
export gather_comments_and_literals;
export is_doc_comment, doc_comment_style, strip_doc_comment_decoration;
export isolated, trailing, mixed, blank_line;
enum cmnt_style {
isolated, // No code on either side of each line of the comment
trailing, // Code exists to the left of the comment
mixed, // Code before /* foo */ and after the comment
blank_line, // Just a manual blank line "\n\n", for layout
}
type cmnt = {style: cmnt_style, lines: ~[~str], pos: uint};
fn is_doc_comment(s: ~str) -> bool {
s.starts_with(~"///") ||
s.starts_with(~"//!") ||
s.starts_with(~"/**") ||
s.starts_with(~"/*!")
}
fn doc_comment_style(comment: ~str) -> ast::attr_style {
assert is_doc_comment(comment);
if comment.starts_with(~"//!") || comment.starts_with(~"/*!") {
ast::attr_inner
} else {
ast::attr_outer
}
}
fn strip_doc_comment_decoration(comment: ~str) -> ~str {
/// remove whitespace-only lines from the start/end of lines
fn vertical_trim(lines: ~[~str]) -> ~[~str] {
let mut i = 0u, j = lines.len();
while i < j && lines[i].trim().is_empty() {
i += 1u;
}
while j > i && lines[j - 1u].trim().is_empty() {
j -= 1u;
}
2012-08-01 17:30:05 -07:00
return lines.slice(i, j);
}
// drop leftmost columns that contain only values in chars
fn block_trim(lines: ~[~str], chars: ~str, max: option<uint>) -> ~[~str] {
let mut i = max.get_default(uint::max_value);
Merge remote-tracking branch 'Dretch/prettydocs' Conflicts: src/compiletest/errors.rs src/libsyntax/parse/attr.rs src/libsyntax/parse/comments.rs src/test/compile-fail/ambig_impl_unify.rs src/test/compile-fail/assign-super.rs src/test/compile-fail/bad-for-loop.rs src/test/compile-fail/bad-var-env-capture-in-block-arg.rs src/test/compile-fail/block-arg-as-stmt-with-value.rs src/test/compile-fail/borrowck-assign-comp-idx.rs src/test/compile-fail/borrowck-lend-flow.rs src/test/compile-fail/borrowck-loan-blocks-move-cc.rs src/test/compile-fail/borrowck-loan-blocks-mut-uniq.rs src/test/compile-fail/borrowck-loan-rcvr.rs src/test/compile-fail/borrowck-loan-vec-content.rs src/test/compile-fail/borrowck-mut-vec-as-imm-slice-bad.rs src/test/compile-fail/cap-clause-with-stack-closure.rs src/test/compile-fail/do1.rs src/test/compile-fail/do2.rs src/test/compile-fail/empty-vec-trailing-comma.rs src/test/compile-fail/evec-subtyping.rs src/test/compile-fail/issue-1896.rs src/test/compile-fail/issue-2149.rs src/test/compile-fail/issue-2150.rs src/test/compile-fail/issue-2487-b.rs src/test/compile-fail/kindck-implicit-close-over-mut-var.rs src/test/compile-fail/liveness-issue-2163.rs src/test/compile-fail/liveness-use-in-index-lvalue.rs src/test/compile-fail/no-reuse-move-arc.rs src/test/compile-fail/no-send-res-ports.rs src/test/compile-fail/non-const.rs src/test/compile-fail/pure-higher-order.rs src/test/compile-fail/pure-loop-body.rs src/test/compile-fail/regions-addr-of-upvar-self.rs src/test/compile-fail/regions-escape-loop-via-vec.rs src/test/compile-fail/regions-scoping.rs src/test/compile-fail/seq-args.rs src/test/compile-fail/tstate-unsat-in-called-fn-expr.rs src/test/compile-fail/tstate-unsat-in-fn-expr.rs src/test/compile-fail/vec-add.rs src/test/compile-fail/vec-concat-bug.rs src/test/compile-fail/vector-no-ann.rs
2012-07-02 14:44:31 -07:00
for lines.each |line| {
if line.trim().is_empty() {
again;
}
Merge remote-tracking branch 'Dretch/prettydocs' Conflicts: src/compiletest/errors.rs src/libsyntax/parse/attr.rs src/libsyntax/parse/comments.rs src/test/compile-fail/ambig_impl_unify.rs src/test/compile-fail/assign-super.rs src/test/compile-fail/bad-for-loop.rs src/test/compile-fail/bad-var-env-capture-in-block-arg.rs src/test/compile-fail/block-arg-as-stmt-with-value.rs src/test/compile-fail/borrowck-assign-comp-idx.rs src/test/compile-fail/borrowck-lend-flow.rs src/test/compile-fail/borrowck-loan-blocks-move-cc.rs src/test/compile-fail/borrowck-loan-blocks-mut-uniq.rs src/test/compile-fail/borrowck-loan-rcvr.rs src/test/compile-fail/borrowck-loan-vec-content.rs src/test/compile-fail/borrowck-mut-vec-as-imm-slice-bad.rs src/test/compile-fail/cap-clause-with-stack-closure.rs src/test/compile-fail/do1.rs src/test/compile-fail/do2.rs src/test/compile-fail/empty-vec-trailing-comma.rs src/test/compile-fail/evec-subtyping.rs src/test/compile-fail/issue-1896.rs src/test/compile-fail/issue-2149.rs src/test/compile-fail/issue-2150.rs src/test/compile-fail/issue-2487-b.rs src/test/compile-fail/kindck-implicit-close-over-mut-var.rs src/test/compile-fail/liveness-issue-2163.rs src/test/compile-fail/liveness-use-in-index-lvalue.rs src/test/compile-fail/no-reuse-move-arc.rs src/test/compile-fail/no-send-res-ports.rs src/test/compile-fail/non-const.rs src/test/compile-fail/pure-higher-order.rs src/test/compile-fail/pure-loop-body.rs src/test/compile-fail/regions-addr-of-upvar-self.rs src/test/compile-fail/regions-escape-loop-via-vec.rs src/test/compile-fail/regions-scoping.rs src/test/compile-fail/seq-args.rs src/test/compile-fail/tstate-unsat-in-called-fn-expr.rs src/test/compile-fail/tstate-unsat-in-fn-expr.rs src/test/compile-fail/vec-add.rs src/test/compile-fail/vec-concat-bug.rs src/test/compile-fail/vector-no-ann.rs
2012-07-02 14:44:31 -07:00
for line.each_chari |j, c| {
if j >= i {
break;
}
if !chars.contains_char(c) {
i = j;
break;
}
}
}
2012-08-01 17:30:05 -07:00
return do lines.map |line| {
let chars = str::chars(line);
if i > chars.len() {
~""
} else {
str::from_chars(chars.slice(i, chars.len()))
}
};
}
if comment.starts_with(~"//") {
2012-08-01 17:30:05 -07:00
return comment.slice(3u, comment.len()).trim();
}
if comment.starts_with(~"/*") {
let lines = str::lines_any(comment.slice(3u, comment.len() - 2u));
let lines = vertical_trim(lines);
let lines = block_trim(lines, ~"\t ", none);
let lines = block_trim(lines, ~"*", some(1u));
let lines = block_trim(lines, ~"\t ", none);
2012-08-01 17:30:05 -07:00
return str::connect(lines, ~"\n");
}
fail ~"not a doc-comment: " + comment;
}
fn read_to_eol(rdr: string_reader) -> ~str {
let mut val = ~"";
2012-05-30 11:36:30 -07:00
while rdr.curr != '\n' && !is_eof(rdr) {
str::push_char(val, rdr.curr);
2012-05-30 11:36:30 -07:00
bump(rdr);
}
2012-05-30 11:36:30 -07:00
if rdr.curr == '\n' { bump(rdr); }
2012-08-01 17:30:05 -07:00
return val;
}
fn read_one_line_comment(rdr: string_reader) -> ~str {
let val = read_to_eol(rdr);
assert ((val[0] == '/' as u8 && val[1] == '/' as u8) ||
(val[0] == '#' as u8 && val[1] == '!' as u8));
2012-08-01 17:30:05 -07:00
return val;
}
2012-05-30 11:36:30 -07:00
fn consume_non_eol_whitespace(rdr: string_reader) {
while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
bump(rdr);
}
}
fn push_blank_line_comment(rdr: string_reader, &comments: ~[cmnt]) {
debug!{">>> blank-line comment"};
let v: ~[~str] = ~[];
vec::push(comments, {style: blank_line, lines: v, pos: rdr.chpos});
}
2012-05-30 11:36:30 -07:00
fn consume_whitespace_counting_blank_lines(rdr: string_reader,
&comments: ~[cmnt]) {
2012-05-30 11:36:30 -07:00
while is_whitespace(rdr.curr) && !is_eof(rdr) {
if rdr.col == 0u && rdr.curr == '\n' {
push_blank_line_comment(rdr, comments);
}
2012-05-30 11:36:30 -07:00
bump(rdr);
}
}
fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
&comments: ~[cmnt]) {
debug!{">>> shebang comment"};
let p = rdr.chpos;
debug!{"<<< shebang comment"};
vec::push(comments, {
style: if code_to_the_left { trailing } else { isolated },
Merge remote-tracking branch 'Dretch/prettydocs' Conflicts: src/compiletest/errors.rs src/libsyntax/parse/attr.rs src/libsyntax/parse/comments.rs src/test/compile-fail/ambig_impl_unify.rs src/test/compile-fail/assign-super.rs src/test/compile-fail/bad-for-loop.rs src/test/compile-fail/bad-var-env-capture-in-block-arg.rs src/test/compile-fail/block-arg-as-stmt-with-value.rs src/test/compile-fail/borrowck-assign-comp-idx.rs src/test/compile-fail/borrowck-lend-flow.rs src/test/compile-fail/borrowck-loan-blocks-move-cc.rs src/test/compile-fail/borrowck-loan-blocks-mut-uniq.rs src/test/compile-fail/borrowck-loan-rcvr.rs src/test/compile-fail/borrowck-loan-vec-content.rs src/test/compile-fail/borrowck-mut-vec-as-imm-slice-bad.rs src/test/compile-fail/cap-clause-with-stack-closure.rs src/test/compile-fail/do1.rs src/test/compile-fail/do2.rs src/test/compile-fail/empty-vec-trailing-comma.rs src/test/compile-fail/evec-subtyping.rs src/test/compile-fail/issue-1896.rs src/test/compile-fail/issue-2149.rs src/test/compile-fail/issue-2150.rs src/test/compile-fail/issue-2487-b.rs src/test/compile-fail/kindck-implicit-close-over-mut-var.rs src/test/compile-fail/liveness-issue-2163.rs src/test/compile-fail/liveness-use-in-index-lvalue.rs src/test/compile-fail/no-reuse-move-arc.rs src/test/compile-fail/no-send-res-ports.rs src/test/compile-fail/non-const.rs src/test/compile-fail/pure-higher-order.rs src/test/compile-fail/pure-loop-body.rs src/test/compile-fail/regions-addr-of-upvar-self.rs src/test/compile-fail/regions-escape-loop-via-vec.rs src/test/compile-fail/regions-scoping.rs src/test/compile-fail/seq-args.rs src/test/compile-fail/tstate-unsat-in-called-fn-expr.rs src/test/compile-fail/tstate-unsat-in-fn-expr.rs src/test/compile-fail/vec-add.rs src/test/compile-fail/vec-concat-bug.rs src/test/compile-fail/vector-no-ann.rs
2012-07-02 14:44:31 -07:00
lines: ~[read_one_line_comment(rdr)],
pos: p
});
}
fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
&comments: ~[cmnt]) {
debug!{">>> line comments"};
let p = rdr.chpos;
let mut lines: ~[~str] = ~[];
2012-05-30 11:36:30 -07:00
while rdr.curr == '/' && nextch(rdr) == '/' {
let line = read_one_line_comment(rdr);
log(debug, line);
if is_doc_comment(line) { // doc-comments are not put in comments
break;
}
vec::push(lines, line);
consume_non_eol_whitespace(rdr);
}
debug!{"<<< line comments"};
if !lines.is_empty() {
vec::push(comments, {
style: if code_to_the_left { trailing } else { isolated },
lines: lines,
pos: p
});
}
}
fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool {
let mut i: uint = begin;
2012-08-01 17:30:05 -07:00
while i != end {
if !is_whitespace(s[i] as char) { return false; } i += 1u;
}
return true;
}
fn trim_whitespace_prefix_and_push_line(&lines: ~[~str],
s: ~str, col: uint) unsafe {
let mut s1;
let len = str::len(s);
if all_whitespace(s, 0u, uint::min(len, col)) {
if col < len {
s1 = str::slice(s, col, len);
} else { s1 = ~""; }
} else { s1 = s; }
log(debug, ~"pushing line: " + s1);
vec::push(lines, s1);
}
fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
&comments: ~[cmnt]) {
debug!{">>> block comment"};
let p = rdr.chpos;
let mut lines: ~[~str] = ~[];
let mut col: uint = rdr.col;
2012-05-30 11:36:30 -07:00
bump(rdr);
bump(rdr);
// doc-comments are not really comments, they are attributes
if rdr.curr == '*' || rdr.curr == '!' {
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
bump(rdr);
}
if !is_eof(rdr) {
bump(rdr);
bump(rdr);
}
2012-08-01 17:30:05 -07:00
return;
}
let mut curr_line = ~"/*";
let mut level: int = 1;
while level > 0 {
debug!{"=== block comment level %d", level};
if is_eof(rdr) {(rdr as reader).fatal(~"unterminated block comment");}
if rdr.curr == '\n' {
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
curr_line = ~"";
2012-05-30 11:36:30 -07:00
bump(rdr);
} else {
str::push_char(curr_line, rdr.curr);
2012-05-30 11:36:30 -07:00
if rdr.curr == '/' && nextch(rdr) == '*' {
bump(rdr);
bump(rdr);
curr_line += ~"*";
level += 1;
} else {
2012-05-30 11:36:30 -07:00
if rdr.curr == '*' && nextch(rdr) == '/' {
bump(rdr);
bump(rdr);
curr_line += ~"/";
level -= 1;
2012-05-30 11:36:30 -07:00
} else { bump(rdr); }
}
}
}
if str::len(curr_line) != 0u {
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
}
let mut style = if code_to_the_left { trailing } else { isolated };
consume_non_eol_whitespace(rdr);
2012-05-30 11:36:30 -07:00
if !is_eof(rdr) && rdr.curr != '\n' && vec::len(lines) == 1u {
style = mixed;
}
debug!{"<<< block comment"};
vec::push(comments, {style: style, lines: lines, pos: p});
}
2012-05-30 11:36:30 -07:00
fn peeking_at_comment(rdr: string_reader) -> bool {
2012-08-01 17:30:05 -07:00
return ((rdr.curr == '/' && nextch(rdr) == '/') ||
2012-05-30 11:36:30 -07:00
(rdr.curr == '/' && nextch(rdr) == '*')) ||
(rdr.curr == '#' && nextch(rdr) == '!');
}
2012-05-30 11:36:30 -07:00
fn consume_comment(rdr: string_reader, code_to_the_left: bool,
&comments: ~[cmnt]) {
debug!{">>> consume comment"};
2012-05-30 11:36:30 -07:00
if rdr.curr == '/' && nextch(rdr) == '/' {
read_line_comments(rdr, code_to_the_left, comments);
2012-05-30 11:36:30 -07:00
} else if rdr.curr == '/' && nextch(rdr) == '*' {
read_block_comment(rdr, code_to_the_left, comments);
2012-05-30 11:36:30 -07:00
} else if rdr.curr == '#' && nextch(rdr) == '!' {
read_shebang_comment(rdr, code_to_the_left, comments);
} else { fail; }
debug!{"<<< consume comment"};
}
type lit = {lit: ~str, pos: uint};
2012-04-15 03:57:24 -07:00
fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
path: ~str,
srdr: io::reader) ->
{cmnts: ~[cmnt], lits: ~[lit]} {
let src = @str::from_bytes(srdr.read_whole_stream());
let itr = @interner::mk::<@~str>(
2012-06-30 16:19:07 -07:00
|x| str::hash(*x),
|x,y| str::eq(*x, *y)
2012-06-09 00:53:34 -07:00
);
2012-06-15 09:32:17 -07:00
let rdr = lexer::new_low_level_string_reader
(span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr);
let mut comments: ~[cmnt] = ~[];
let mut literals: ~[lit] = ~[];
let mut first_read: bool = true;
2012-05-30 11:36:30 -07:00
while !is_eof(rdr) {
loop {
let mut code_to_the_left = !first_read;
consume_non_eol_whitespace(rdr);
if rdr.curr == '\n' {
code_to_the_left = false;
consume_whitespace_counting_blank_lines(rdr, comments);
}
while peeking_at_comment(rdr) {
consume_comment(rdr, code_to_the_left, comments);
consume_whitespace_counting_blank_lines(rdr, comments);
}
break;
}
2012-06-15 09:32:17 -07:00
let bstart = rdr.pos;
2012-06-12 10:59:50 -07:00
rdr.next_token();
2012-06-15 09:32:17 -07:00
//discard, and look ahead; we're working with internal state
2012-06-12 10:59:50 -07:00
let {tok: tok, sp: sp} = rdr.peek();
2012-06-15 09:32:17 -07:00
if token::is_lit(tok) {
let s = get_str_from(rdr, bstart);
2012-06-14 22:02:50 -07:00
vec::push(literals, {lit: s, pos: sp.lo});
log(debug, ~"tok lit: " + s);
} else {
log(debug, ~"tok: " + token::to_str(*rdr.interner, tok));
}
first_read = false;
}
2012-08-01 17:30:05 -07:00
return {cmnts: comments, lits: literals};
}