Bugfix: make the parser handle the case where zero repetitions occur, by handling parse results on the basis of what names the matcher expects to bind, not on what names are actually bound.

This commit is contained in:
Paul Stansifer 2012-07-23 15:34:43 -07:00
parent 1c472564e3
commit f785cccf0f
5 changed files with 35 additions and 15 deletions

View File

@ -378,8 +378,9 @@ type matcher = spanned<matcher_>;
enum matcher_ {
/* match one token */
mtc_tok(token::token),
/* match repetitions of a sequence: body, separator, zero ok? : */
mtc_rep(~[matcher], option<token::token>, bool),
/* match repetitions of a sequence: body, separator, zero ok?,
lo, hi position-in-match-array used: */
mtc_rep(~[matcher], option<token::token>, bool, uint, uint),
/* parse a Rust NT: name to bind, name of NT, position in match array : */
mtc_bb(ident, ident, uint)
}

View File

@ -41,6 +41,7 @@ type matcher_pos = ~{
mut idx: uint,
mut up: matcher_pos_up, // mutable for swapping only
matches: ~[dvec<@arb_depth>],
match_lo: uint, match_hi: uint,
sp_lo: uint,
};
@ -55,17 +56,25 @@ fn count_names(ms: &[matcher]) -> uint {
vec::foldl(0u, ms, |ct, m| {
ct + alt m.node {
mtc_tok(_) { 0u }
mtc_rep(more_ms, _, _) { count_names(more_ms) }
mtc_rep(more_ms, _, _, _, _) { count_names(more_ms) }
mtc_bb(_,_,_) { 1u }
}})
}
#[warn(no_non_implicitly_copyable_typarams)]
fn new_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
fn initial_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
-> matcher_pos {
let mut match_idx_hi = 0u;
for ms.each() |elt| {
alt elt.node {
mtc_tok(_) {}
mtc_rep(_,_,_,_,hi) { match_idx_hi = hi; } //it is monotonic...
mtc_bb(_,_,pos) { match_idx_hi = pos+1u; } //...so latest is highest
}
}
~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none),
matches: copy vec::from_fn(count_names(ms), |_i| dvec::dvec()),
sp_lo: lo}
match_lo: 0u, match_hi: match_idx_hi, sp_lo: lo}
}
/* logically, an arb_depth should contain only one kind of nonterminal */
@ -79,7 +88,7 @@ fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth])
ret_val: hashmap<ident, @arb_depth>) {
alt m {
{node: mtc_tok(_), span: _} { }
{node: mtc_rep(more_ms, _, _), span: _} {
{node: mtc_rep(more_ms, _, _, _, _), span: _} {
for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) };
}
{node: mtc_bb(bind_name, _, idx), span: sp} {
@ -104,7 +113,7 @@ enum parse_result {
fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
-> parse_result {
let mut cur_eis = ~[];
vec::push(cur_eis, new_matcher_pos(ms, none, rdr.peek().sp.lo));
vec::push(cur_eis, initial_matcher_pos(ms, none, rdr.peek().sp.lo));
loop {
let mut bb_eis = ~[]; // black-box parsed by parser.rs
@ -141,10 +150,10 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
// I bet this is a perf problem: we're preemptively
// doing a lot of array work that will get thrown away
// most of the time.
for ei.matches.eachi() |idx, elt| {
let sub = elt.get();
// Some subtrees don't contain the name at all
if sub.len() == 0u { again; }
// Only touch the binders we have actually bound
for uint::range(ei.match_lo, ei.match_hi) |idx| {
let sub = ei.matches[idx].get();
new_pos.matches[idx]
.push(@seq(sub, mk_sp(ei.sp_lo,sp.hi)));
}
@ -176,10 +185,15 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
} else {
alt copy ei.elts[idx].node {
/* need to descend into sequence */
mtc_rep(matchers, sep, zero_ok) {
mtc_rep(matchers, sep, zero_ok, match_idx_lo, match_idx_hi){
if zero_ok {
let new_ei = copy ei;
new_ei.idx += 1u;
//we specifically matched zero repeats.
for uint::range(match_idx_lo, match_idx_hi) |idx| {
new_ei.matches[idx].push(@seq(~[], sp));
}
vec::push(cur_eis, new_ei);
}
@ -189,7 +203,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
vec::push(cur_eis, ~{
elts: matchers, sep: sep, mut idx: 0u,
mut up: matcher_pos_up(some(ei_t)),
matches: matches, sp_lo: sp.lo
matches: matches,
match_lo: match_idx_lo, match_hi: match_idx_hi,
sp_lo: sp.lo
});
}
mtc_bb(_,_,_) { vec::push(bb_eis, ei) }

View File

@ -21,7 +21,7 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident,
ms(mtc_bb(@~"lhs",@~"mtcs", 0u)),
ms(mtc_tok(FAT_ARROW)),
ms(mtc_bb(@~"rhs",@~"tt", 1u)),
], some(SEMI), false))];
], some(SEMI), false, 0u, 2u))];
let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic,
cx.parse_sess().interner, none, arg);

View File

@ -86,6 +86,8 @@ pure fn lookup_cur_ad_by_ad(r: tt_reader, start: @arb_depth) -> @arb_depth {
seq(ads, _) { ads[idx] }
}
}
unchecked {io::println(#fmt["%? / %?", copy r.repeat_idx,
copy r.repeat_len]);};
vec::foldl(start, r.repeat_idx, red)
}

View File

@ -1194,13 +1194,14 @@ class parser {
let m = if self.token == token::DOLLAR {
self.bump();
if self.token == token::LPAREN {
let name_idx_lo = *name_idx;
let ms = self.parse_matcher_subseq(name_idx, token::LPAREN,
token::RPAREN);
if ms.len() == 0u {
self.fatal(~"repetition body must be nonempty");
}
let (sep, zerok) = self.parse_sep_and_zerok();
mtc_rep(ms, sep, zerok)
mtc_rep(ms, sep, zerok, name_idx_lo, *name_idx)
} else {
let bound_to = self.parse_ident();
self.expect(token::COLON);