2012-06-27 17:29:35 -05:00
|
|
|
import util::interner::interner;
|
|
|
|
import diagnostic::span_handler;
|
2012-07-27 21:14:46 -05:00
|
|
|
import ast::{token_tree, tt_delim, tt_tok, tt_seq, tt_nonterminal,ident};
|
|
|
|
import earley_parser::{named_match, matched_seq, matched_nonterminal};
|
2012-06-27 17:29:35 -05:00
|
|
|
import codemap::span;
|
2012-07-27 21:14:46 -05:00
|
|
|
import parse::token::{EOF, INTERPOLATED, IDENT, token, nt_ident};
|
|
|
|
import std::map::{hashmap, box_str_hash};
|
2012-06-27 17:29:35 -05:00
|
|
|
|
|
|
|
export tt_reader, new_tt_reader, dup_tt_reader, tt_next_token;
|
|
|
|
|
|
|
|
enum tt_frame_up { /* to break a circularity */
|
|
|
|
tt_frame_up(option<tt_frame>)
|
|
|
|
}
|
|
|
|
|
2012-07-12 20:08:55 -05:00
|
|
|
/* FIXME #2811: figure out how to have a uniquely linked stack, and change to
|
|
|
|
`~` */
|
2012-06-27 17:29:35 -05:00
|
|
|
///an unzipping of `token_tree`s
|
|
|
|
type tt_frame = @{
|
2012-06-29 20:26:34 -05:00
|
|
|
readme: ~[ast::token_tree],
|
2012-06-27 17:29:35 -05:00
|
|
|
mut idx: uint,
|
2012-06-29 20:26:34 -05:00
|
|
|
dotdotdoted: bool,
|
2012-07-05 16:30:56 -05:00
|
|
|
sep: option<token>,
|
|
|
|
up: tt_frame_up,
|
2012-06-27 17:29:35 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
type tt_reader = @{
|
2012-07-06 20:04:28 -05:00
|
|
|
sp_diag: span_handler,
|
2012-07-14 00:57:48 -05:00
|
|
|
interner: @interner<@~str>,
|
2012-06-27 17:29:35 -05:00
|
|
|
mut cur: tt_frame,
|
|
|
|
/* for MBE-style macro transcription */
|
2012-07-27 21:14:46 -05:00
|
|
|
interpolations: std::map::hashmap<ident, @named_match>,
|
2012-06-29 20:26:34 -05:00
|
|
|
mut repeat_idx: ~[mut uint],
|
|
|
|
mut repeat_len: ~[uint],
|
2012-06-27 17:29:35 -05:00
|
|
|
/* cached: */
|
|
|
|
mut cur_tok: token,
|
|
|
|
mut cur_span: span
|
|
|
|
};
|
|
|
|
|
|
|
|
/** This can do Macro-By-Example transcription. On the other hand, if
|
2012-07-27 21:14:46 -05:00
|
|
|
* `src` contains no `tt_seq`s and `tt_nonterminal`s, `interp` can (and
|
2012-06-27 17:29:35 -05:00
|
|
|
* should) be none. */
|
2012-07-14 00:57:48 -05:00
|
|
|
fn new_tt_reader(sp_diag: span_handler, itr: @interner<@~str>,
|
2012-07-27 21:14:46 -05:00
|
|
|
interp: option<std::map::hashmap<ident,@named_match>>,
|
2012-06-29 20:26:34 -05:00
|
|
|
src: ~[ast::token_tree])
|
2012-06-27 17:29:35 -05:00
|
|
|
-> tt_reader {
|
2012-07-06 20:04:28 -05:00
|
|
|
let r = @{sp_diag: sp_diag, interner: itr,
|
2012-06-29 20:26:34 -05:00
|
|
|
mut cur: @{readme: src, mut idx: 0u, dotdotdoted: false,
|
2012-07-05 16:30:56 -05:00
|
|
|
sep: none, up: tt_frame_up(option::none)},
|
2012-06-27 17:29:35 -05:00
|
|
|
interpolations: alt interp { /* just a convienience */
|
2012-07-27 21:14:46 -05:00
|
|
|
none { std::map::box_str_hash::<@named_match>() }
|
2012-06-27 17:29:35 -05:00
|
|
|
some(x) { x }
|
|
|
|
},
|
2012-06-29 20:26:34 -05:00
|
|
|
mut repeat_idx: ~[mut], mut repeat_len: ~[],
|
2012-06-27 17:29:35 -05:00
|
|
|
/* dummy values, never read: */
|
|
|
|
mut cur_tok: EOF,
|
|
|
|
mut cur_span: ast_util::mk_sp(0u,0u)
|
|
|
|
};
|
|
|
|
tt_next_token(r); /* get cur_tok and cur_span set up */
|
|
|
|
ret r;
|
|
|
|
}
|
|
|
|
|
|
|
|
pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame {
|
2012-06-29 20:26:34 -05:00
|
|
|
@{readme: f.readme, mut idx: f.idx, dotdotdoted: f.dotdotdoted,
|
2012-07-05 16:30:56 -05:00
|
|
|
sep: f.sep, up: alt f.up {
|
2012-06-27 17:29:35 -05:00
|
|
|
tt_frame_up(some(up_frame)) {
|
|
|
|
tt_frame_up(some(dup_tt_frame(up_frame)))
|
|
|
|
}
|
|
|
|
tt_frame_up(none) { tt_frame_up(none) }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader {
|
2012-07-06 20:04:28 -05:00
|
|
|
@{sp_diag: r.sp_diag, interner: r.interner,
|
2012-06-27 17:29:35 -05:00
|
|
|
mut cur: dup_tt_frame(r.cur),
|
|
|
|
interpolations: r.interpolations,
|
2012-06-29 20:26:34 -05:00
|
|
|
mut repeat_idx: copy r.repeat_idx, mut repeat_len: copy r.repeat_len,
|
2012-06-27 17:29:35 -05:00
|
|
|
mut cur_tok: r.cur_tok, mut cur_span: r.cur_span}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-07-27 21:14:46 -05:00
|
|
|
pure fn lookup_cur_matched_by_matched(r: tt_reader,
|
|
|
|
start: @named_match) -> @named_match {
|
|
|
|
pure fn red(&&ad: @named_match, &&idx: uint) -> @named_match {
|
2012-06-29 20:26:34 -05:00
|
|
|
alt *ad {
|
2012-07-27 21:14:46 -05:00
|
|
|
matched_nonterminal(_) {
|
|
|
|
// end of the line; duplicate henceforth
|
|
|
|
ad
|
|
|
|
}
|
|
|
|
matched_seq(ads, _) { ads[idx] }
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
vec::foldl(start, r.repeat_idx, red)
|
|
|
|
}
|
|
|
|
|
2012-07-27 21:14:46 -05:00
|
|
|
fn lookup_cur_matched(r: tt_reader, name: ident) -> @named_match {
|
|
|
|
lookup_cur_matched_by_matched(r, r.interpolations.get(name))
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
enum lis {
|
2012-07-14 00:57:48 -05:00
|
|
|
lis_unconstrained, lis_constraint(uint, ident), lis_contradiction(~str)
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn lockstep_iter_size(&&t: token_tree, &&r: tt_reader) -> lis {
|
|
|
|
fn lis_merge(lhs: lis, rhs: lis) -> lis {
|
|
|
|
alt lhs {
|
|
|
|
lis_unconstrained { rhs }
|
|
|
|
lis_contradiction(_) { lhs }
|
|
|
|
lis_constraint(l_len, l_id) {
|
|
|
|
alt rhs {
|
|
|
|
lis_unconstrained { lhs }
|
|
|
|
lis_contradiction(_) { rhs }
|
|
|
|
lis_constraint(r_len, _) if l_len == r_len { lhs }
|
|
|
|
lis_constraint(r_len, r_id) {
|
2012-07-30 18:01:07 -05:00
|
|
|
lis_contradiction(fmt!{"Inconsistent lockstep iteration: \
|
2012-06-29 20:26:34 -05:00
|
|
|
'%s' has %u items, but '%s' has %u",
|
2012-07-30 18:01:07 -05:00
|
|
|
*l_id, l_len, *r_id, r_len})
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
alt t {
|
2012-07-27 21:14:46 -05:00
|
|
|
tt_delim(tts) | tt_seq(_, tts, _, _) {
|
2012-06-29 20:26:34 -05:00
|
|
|
vec::foldl(lis_unconstrained, tts, {|lis, tt|
|
|
|
|
lis_merge(lis, lockstep_iter_size(tt, r)) })
|
|
|
|
}
|
2012-07-27 21:14:46 -05:00
|
|
|
tt_tok(*) { lis_unconstrained }
|
|
|
|
tt_nonterminal(_, name) {
|
|
|
|
alt *lookup_cur_matched(r, name) {
|
|
|
|
matched_nonterminal(_) { lis_unconstrained }
|
|
|
|
matched_seq(ads, _) { lis_constraint(ads.len(), name) }
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-06-27 17:29:35 -05:00
|
|
|
fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} {
|
|
|
|
let ret_val = { tok: r.cur_tok, sp: r.cur_span };
|
2012-07-24 13:44:32 -05:00
|
|
|
while r.cur.idx >= r.cur.readme.len() {
|
2012-06-29 20:26:34 -05:00
|
|
|
/* done with this set; pop or repeat? */
|
|
|
|
if ! r.cur.dotdotdoted
|
|
|
|
|| r.repeat_idx.last() == r.repeat_len.last() - 1 {
|
2012-07-06 20:04:28 -05:00
|
|
|
|
2012-06-29 20:26:34 -05:00
|
|
|
alt r.cur.up {
|
|
|
|
tt_frame_up(none) {
|
|
|
|
r.cur_tok = EOF;
|
|
|
|
ret ret_val;
|
|
|
|
}
|
|
|
|
tt_frame_up(some(tt_f)) {
|
2012-07-06 20:04:28 -05:00
|
|
|
if r.cur.dotdotdoted {
|
|
|
|
vec::pop(r.repeat_idx); vec::pop(r.repeat_len);
|
|
|
|
}
|
|
|
|
|
2012-06-29 20:26:34 -05:00
|
|
|
r.cur = tt_f;
|
|
|
|
r.cur.idx += 1u;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-06 20:04:28 -05:00
|
|
|
} else { /* repeat */
|
2012-06-29 20:26:34 -05:00
|
|
|
r.cur.idx = 0u;
|
|
|
|
r.repeat_idx[r.repeat_idx.len() - 1u] += 1u;
|
2012-07-05 16:30:56 -05:00
|
|
|
alt r.cur.sep {
|
|
|
|
some(tk) {
|
|
|
|
r.cur_tok = tk; /* repeat same span, I guess */
|
|
|
|
ret ret_val;
|
|
|
|
}
|
|
|
|
none {}
|
|
|
|
}
|
2012-06-27 17:29:35 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
loop { /* because it's easiest, this handles `tt_delim` not starting
|
2012-07-27 21:14:46 -05:00
|
|
|
with a `tt_tok`, even though it won't happen */
|
2012-06-29 20:26:34 -05:00
|
|
|
alt r.cur.readme[r.cur.idx] {
|
2012-06-27 17:29:35 -05:00
|
|
|
tt_delim(tts) {
|
2012-06-29 20:26:34 -05:00
|
|
|
r.cur = @{readme: tts, mut idx: 0u, dotdotdoted: false,
|
2012-07-05 16:30:56 -05:00
|
|
|
sep: none, up: tt_frame_up(option::some(r.cur)) };
|
2012-07-06 20:04:28 -05:00
|
|
|
// if this could be 0-length, we'd need to potentially recur here
|
2012-06-27 17:29:35 -05:00
|
|
|
}
|
2012-07-27 21:14:46 -05:00
|
|
|
tt_tok(sp, tok) {
|
2012-06-27 17:29:35 -05:00
|
|
|
r.cur_span = sp; r.cur_tok = tok;
|
|
|
|
r.cur.idx += 1u;
|
|
|
|
ret ret_val;
|
|
|
|
}
|
2012-07-27 21:14:46 -05:00
|
|
|
tt_seq(sp, tts, sep, zerok) {
|
|
|
|
alt lockstep_iter_size(tt_seq(sp, tts, sep, zerok), r) {
|
2012-06-29 20:26:34 -05:00
|
|
|
lis_unconstrained {
|
2012-07-06 20:04:28 -05:00
|
|
|
r.sp_diag.span_fatal(
|
|
|
|
sp, /* blame macro writer */
|
2012-07-14 00:57:48 -05:00
|
|
|
~"attempted to repeat an expression containing no syntax \
|
2012-06-29 20:26:34 -05:00
|
|
|
variables matched as repeating at this depth");
|
|
|
|
}
|
2012-07-12 20:08:55 -05:00
|
|
|
lis_contradiction(msg) { /* FIXME #2887 blame macro invoker
|
|
|
|
instead*/
|
2012-07-06 20:04:28 -05:00
|
|
|
r.sp_diag.span_fatal(sp, msg);
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
lis_constraint(len, _) {
|
2012-07-06 20:04:28 -05:00
|
|
|
if len == 0 {
|
|
|
|
if !zerok {
|
2012-07-12 20:08:55 -05:00
|
|
|
r.sp_diag.span_fatal(sp, /* FIXME #2887 blame invoker
|
|
|
|
*/
|
2012-07-14 00:57:48 -05:00
|
|
|
~"this must repeat at least \
|
2012-07-06 20:04:28 -05:00
|
|
|
once");
|
|
|
|
}
|
2012-07-24 13:44:32 -05:00
|
|
|
|
|
|
|
r.cur.idx += 1u;
|
2012-07-06 20:04:28 -05:00
|
|
|
ret tt_next_token(r);
|
2012-07-24 13:44:32 -05:00
|
|
|
} else {
|
|
|
|
vec::push(r.repeat_len, len);
|
|
|
|
vec::push(r.repeat_idx, 0u);
|
|
|
|
r.cur = @{readme: tts, mut idx: 0u, dotdotdoted: true,
|
|
|
|
sep: sep, up: tt_frame_up(option::some(r.cur))};
|
2012-07-06 20:04:28 -05:00
|
|
|
}
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
}
|
2012-06-27 17:29:35 -05:00
|
|
|
}
|
2012-07-12 20:08:55 -05:00
|
|
|
// FIXME #2887: think about span stuff here
|
2012-07-27 21:14:46 -05:00
|
|
|
tt_nonterminal(sp, ident) {
|
|
|
|
alt *lookup_cur_matched(r, ident) {
|
2012-07-03 20:39:37 -05:00
|
|
|
/* sidestep the interpolation tricks for ident because
|
|
|
|
(a) idents can be in lots of places, so it'd be a pain
|
|
|
|
(b) we actually can, since it's a token. */
|
2012-07-27 21:14:46 -05:00
|
|
|
matched_nonterminal(nt_ident(sn,b)) {
|
2012-07-03 20:39:37 -05:00
|
|
|
r.cur_span = sp; r.cur_tok = IDENT(sn,b);
|
|
|
|
r.cur.idx += 1u;
|
|
|
|
ret ret_val;
|
|
|
|
}
|
2012-07-18 19:49:16 -05:00
|
|
|
matched_nonterminal(other_whole_nt) {
|
|
|
|
r.cur_span = sp; r.cur_tok = INTERPOLATED(other_whole_nt);
|
2012-07-03 20:39:37 -05:00
|
|
|
r.cur.idx += 1u;
|
2012-06-29 20:26:34 -05:00
|
|
|
ret ret_val;
|
|
|
|
}
|
2012-07-27 21:14:46 -05:00
|
|
|
matched_seq(*) {
|
2012-07-06 20:04:28 -05:00
|
|
|
r.sp_diag.span_fatal(
|
2012-06-29 20:26:34 -05:00
|
|
|
copy r.cur_span, /* blame the macro writer */
|
2012-07-30 18:01:07 -05:00
|
|
|
fmt!{"variable '%s' is still repeating at this depth",
|
|
|
|
*ident});
|
2012-06-29 20:26:34 -05:00
|
|
|
}
|
|
|
|
}
|
2012-06-27 17:29:35 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|