From f785cccf0f9a4ea90b7cb752ec12ba749621a86e Mon Sep 17 00:00:00 2001 From: Paul Stansifer Date: Mon, 23 Jul 2012 15:34:43 -0700 Subject: [PATCH] Bugfix: make the parser handle the case where zero repetitions occur, by handling parse results on the basis of what names the matcher expects to bind, not on what names are actually bound. --- src/libsyntax/ast.rs | 5 ++-- src/libsyntax/ext/tt/earley_parser.rs | 38 +++++++++++++++++++-------- src/libsyntax/ext/tt/macro_rules.rs | 2 +- src/libsyntax/ext/tt/transcribe.rs | 2 ++ src/libsyntax/parse/parser.rs | 3 ++- 5 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 11a1a88bf5f..761bfd906ad 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -378,8 +378,9 @@ type matcher = spanned; enum matcher_ { /* match one token */ mtc_tok(token::token), - /* match repetitions of a sequence: body, separator, zero ok? : */ - mtc_rep(~[matcher], option, bool), + /* match repetitions of a sequence: body, separator, zero ok?, + lo, hi position-in-match-array used: */ + mtc_rep(~[matcher], option, bool, uint, uint), /* parse a Rust NT: name to bind, name of NT, position in match array : */ mtc_bb(ident, ident, uint) } diff --git a/src/libsyntax/ext/tt/earley_parser.rs b/src/libsyntax/ext/tt/earley_parser.rs index 152b92a4b53..5015efa67df 100644 --- a/src/libsyntax/ext/tt/earley_parser.rs +++ b/src/libsyntax/ext/tt/earley_parser.rs @@ -41,6 +41,7 @@ type matcher_pos = ~{ mut idx: uint, mut up: matcher_pos_up, // mutable for swapping only matches: ~[dvec<@arb_depth>], + match_lo: uint, match_hi: uint, sp_lo: uint, }; @@ -55,17 +56,25 @@ fn count_names(ms: &[matcher]) -> uint { vec::foldl(0u, ms, |ct, m| { ct + alt m.node { mtc_tok(_) { 0u } - mtc_rep(more_ms, _, _) { count_names(more_ms) } + mtc_rep(more_ms, _, _, _, _) { count_names(more_ms) } mtc_bb(_,_,_) { 1u } }}) } #[warn(no_non_implicitly_copyable_typarams)] -fn new_matcher_pos(ms: ~[matcher], sep: option, lo: uint) +fn initial_matcher_pos(ms: ~[matcher], sep: option, lo: uint) -> matcher_pos { + let mut match_idx_hi = 0u; + for ms.each() |elt| { + alt elt.node { + mtc_tok(_) {} + mtc_rep(_,_,_,_,hi) { match_idx_hi = hi; } //it is monotonic... + mtc_bb(_,_,pos) { match_idx_hi = pos+1u; } //...so latest is highest + } + } ~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none), matches: copy vec::from_fn(count_names(ms), |_i| dvec::dvec()), - sp_lo: lo} + match_lo: 0u, match_hi: match_idx_hi, sp_lo: lo} } /* logically, an arb_depth should contain only one kind of nonterminal */ @@ -79,7 +88,7 @@ fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth]) ret_val: hashmap) { alt m { {node: mtc_tok(_), span: _} { } - {node: mtc_rep(more_ms, _, _), span: _} { + {node: mtc_rep(more_ms, _, _, _, _), span: _} { for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) }; } {node: mtc_bb(bind_name, _, idx), span: sp} { @@ -104,7 +113,7 @@ enum parse_result { fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) -> parse_result { let mut cur_eis = ~[]; - vec::push(cur_eis, new_matcher_pos(ms, none, rdr.peek().sp.lo)); + vec::push(cur_eis, initial_matcher_pos(ms, none, rdr.peek().sp.lo)); loop { let mut bb_eis = ~[]; // black-box parsed by parser.rs @@ -141,10 +150,10 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) // I bet this is a perf problem: we're preemptively // doing a lot of array work that will get thrown away // most of the time. - for ei.matches.eachi() |idx, elt| { - let sub = elt.get(); - // Some subtrees don't contain the name at all - if sub.len() == 0u { again; } + + // Only touch the binders we have actually bound + for uint::range(ei.match_lo, ei.match_hi) |idx| { + let sub = ei.matches[idx].get(); new_pos.matches[idx] .push(@seq(sub, mk_sp(ei.sp_lo,sp.hi))); } @@ -176,10 +185,15 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) } else { alt copy ei.elts[idx].node { /* need to descend into sequence */ - mtc_rep(matchers, sep, zero_ok) { + mtc_rep(matchers, sep, zero_ok, match_idx_lo, match_idx_hi){ if zero_ok { let new_ei = copy ei; new_ei.idx += 1u; + //we specifically matched zero repeats. + for uint::range(match_idx_lo, match_idx_hi) |idx| { + new_ei.matches[idx].push(@seq(~[], sp)); + } + vec::push(cur_eis, new_ei); } @@ -189,7 +203,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) vec::push(cur_eis, ~{ elts: matchers, sep: sep, mut idx: 0u, mut up: matcher_pos_up(some(ei_t)), - matches: matches, sp_lo: sp.lo + matches: matches, + match_lo: match_idx_lo, match_hi: match_idx_hi, + sp_lo: sp.lo }); } mtc_bb(_,_,_) { vec::push(bb_eis, ei) } diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 7bed3107f53..a1bb879965f 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -21,7 +21,7 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident, ms(mtc_bb(@~"lhs",@~"mtcs", 0u)), ms(mtc_tok(FAT_ARROW)), ms(mtc_bb(@~"rhs",@~"tt", 1u)), - ], some(SEMI), false))]; + ], some(SEMI), false, 0u, 2u))]; let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic, cx.parse_sess().interner, none, arg); diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index a9bc124b605..b9d4adc2339 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -86,6 +86,8 @@ pure fn lookup_cur_ad_by_ad(r: tt_reader, start: @arb_depth) -> @arb_depth { seq(ads, _) { ads[idx] } } } + unchecked {io::println(#fmt["%? / %?", copy r.repeat_idx, + copy r.repeat_len]);}; vec::foldl(start, r.repeat_idx, red) } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 656e064d694..b9fb9dab374 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1194,13 +1194,14 @@ class parser { let m = if self.token == token::DOLLAR { self.bump(); if self.token == token::LPAREN { + let name_idx_lo = *name_idx; let ms = self.parse_matcher_subseq(name_idx, token::LPAREN, token::RPAREN); if ms.len() == 0u { self.fatal(~"repetition body must be nonempty"); } let (sep, zerok) = self.parse_sep_and_zerok(); - mtc_rep(ms, sep, zerok) + mtc_rep(ms, sep, zerok, name_idx_lo, *name_idx) } else { let bound_to = self.parse_ident(); self.expect(token::COLON);