Bugfix: make the parser handle the case where zero repetitions occur, by handling parse results on the basis of what names the matcher expects to bind, not on what names are actually bound.

2012-07-23 15:34:43 -07:00 · 2012-07-23 15:34:43 -07:00 · f785cccf0f
commit f785cccf0f
parent 1c472564e3
5 changed files with 35 additions and 15 deletions
--- a/src/libsyntax/ast.rs
+++ b/src/libsyntax/ast.rs
@ -378,8 +378,9 @@ type matcher = spanned<matcher_>;
 enum matcher_ {
    /* match one token */
    mtc_tok(token::token),
-    /* match repetitions of a sequence: body, separator, zero ok? : */
-    mtc_rep(~[matcher], option<token::token>, bool),
+    /* match repetitions of a sequence: body, separator, zero ok?,
+    lo, hi position-in-match-array used: */
+    mtc_rep(~[matcher], option<token::token>, bool, uint, uint),
    /* parse a Rust NT: name to bind, name of NT, position in match array : */
    mtc_bb(ident, ident, uint)
 }
--- a/src/libsyntax/ext/tt/earley_parser.rs
+++ b/src/libsyntax/ext/tt/earley_parser.rs
@ -41,6 +41,7 @@ type matcher_pos = ~{
    mut idx: uint,
    mut up: matcher_pos_up, // mutable for swapping only
    matches: ~[dvec<@arb_depth>],
+    match_lo: uint, match_hi: uint,
    sp_lo: uint,
 };

@ -55,17 +56,25 @@ fn count_names(ms: &[matcher]) -> uint {
    vec::foldl(0u, ms, |ct, m| {
        ct + alt m.node {
          mtc_tok(_) { 0u }
-          mtc_rep(more_ms, _, _) { count_names(more_ms) }
+          mtc_rep(more_ms, _, _, _, _) { count_names(more_ms) }
          mtc_bb(_,_,_) { 1u }
        }})
 }

 #[warn(no_non_implicitly_copyable_typarams)]
-fn new_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
+fn initial_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
    -> matcher_pos {
+    let mut match_idx_hi = 0u;
+    for ms.each() |elt| {
+        alt elt.node {
+          mtc_tok(_) {}
+          mtc_rep(_,_,_,_,hi) { match_idx_hi = hi; } //it is monotonic...
+          mtc_bb(_,_,pos) { match_idx_hi = pos+1u; } //...so latest is highest
+        }
+    }
    ~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none),
      matches: copy vec::from_fn(count_names(ms), |_i| dvec::dvec()),
-      sp_lo: lo}
+      match_lo: 0u, match_hi: match_idx_hi, sp_lo: lo}
 }

 /* logically, an arb_depth should contain only one kind of nonterminal */
@ -79,7 +88,7 @@ fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth])
             ret_val: hashmap<ident, @arb_depth>) {
        alt m {
          {node: mtc_tok(_), span: _} { }
-          {node: mtc_rep(more_ms, _, _), span: _} {
+          {node: mtc_rep(more_ms, _, _, _, _), span: _} {
            for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) };
          }
          {node: mtc_bb(bind_name, _, idx), span: sp} {
@ -104,7 +113,7 @@ enum parse_result {
 fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
    -> parse_result {
    let mut cur_eis = ~[];
-    vec::push(cur_eis, new_matcher_pos(ms, none, rdr.peek().sp.lo));
+    vec::push(cur_eis, initial_matcher_pos(ms, none, rdr.peek().sp.lo));

    loop {
        let mut bb_eis = ~[]; // black-box parsed by parser.rs
@ -141,10 +150,10 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
                        // I bet this is a perf problem: we're preemptively
                        // doing a lot of array work that will get thrown away
                        // most of the time.
-                        for ei.matches.eachi() |idx, elt| {
-                            let sub = elt.get();
-                            // Some subtrees don't contain the name at all
-                            if sub.len() == 0u { again; }
+
+                        // Only touch the binders we have actually bound
+                        for uint::range(ei.match_lo, ei.match_hi) |idx| {
+                            let sub = ei.matches[idx].get();
                            new_pos.matches[idx]
                                .push(@seq(sub, mk_sp(ei.sp_lo,sp.hi)));
                        }
@ -176,10 +185,15 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
            } else {
                alt copy ei.elts[idx].node {
                  /* need to descend into sequence */
-                  mtc_rep(matchers, sep, zero_ok) {
+                  mtc_rep(matchers, sep, zero_ok, match_idx_lo, match_idx_hi){
                    if zero_ok {
                        let new_ei = copy ei;
                        new_ei.idx += 1u;
+                        //we specifically matched zero repeats.
+                        for uint::range(match_idx_lo, match_idx_hi) |idx| {
+                            new_ei.matches[idx].push(@seq(~[], sp));
+                        }
+
                        vec::push(cur_eis, new_ei);
                    }

@ -189,7 +203,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
                    vec::push(cur_eis, ~{
                        elts: matchers, sep: sep, mut idx: 0u,
                        mut up: matcher_pos_up(some(ei_t)),
-                        matches: matches, sp_lo: sp.lo
+                        matches: matches,
+                        match_lo: match_idx_lo, match_hi: match_idx_hi,
+                        sp_lo: sp.lo
                    });
                  }
                  mtc_bb(_,_,_) { vec::push(bb_eis, ei) }
--- a/src/libsyntax/ext/tt/macro_rules.rs
+++ b/src/libsyntax/ext/tt/macro_rules.rs
@ -21,7 +21,7 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident,
            ms(mtc_bb(@~"lhs",@~"mtcs", 0u)),
            ms(mtc_tok(FAT_ARROW)),
            ms(mtc_bb(@~"rhs",@~"tt", 1u)),
-        ], some(SEMI), false))];
+        ], some(SEMI), false, 0u, 2u))];

    let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic,
                                   cx.parse_sess().interner, none, arg);
--- a/src/libsyntax/ext/tt/transcribe.rs
+++ b/src/libsyntax/ext/tt/transcribe.rs
@ -86,6 +86,8 @@ pure fn lookup_cur_ad_by_ad(r: tt_reader, start: @arb_depth) -> @arb_depth {
          seq(ads, _) { ads[idx] }
        }
    }
+    unchecked {io::println(#fmt["%? / %?", copy r.repeat_idx,
+                                copy r.repeat_len]);};
    vec::foldl(start, r.repeat_idx, red)
 }

--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@ -1194,13 +1194,14 @@ class parser {
        let m = if self.token == token::DOLLAR {
            self.bump();
            if self.token == token::LPAREN {
+                let name_idx_lo = *name_idx;
                let ms = self.parse_matcher_subseq(name_idx, token::LPAREN,
                                                   token::RPAREN);
                if ms.len() == 0u {
                    self.fatal(~"repetition body must be nonempty");
                }
                let (sep, zerok) = self.parse_sep_and_zerok();
-                mtc_rep(ms, sep, zerok)
+                mtc_rep(ms, sep, zerok, name_idx_lo, *name_idx)
            } else {
                let bound_to = self.parse_ident();
                self.expect(token::COLON);