From 39590d81f01b97da574de3298f664eeabf84d255 Mon Sep 17 00:00:00 2001 From: Paul Stansifer Date: Wed, 27 Jun 2012 15:29:35 -0700 Subject: [PATCH] Some rearranging in perparation for MBE-style TT transcription. --- src/libsyntax/ast.rs | 9 +- src/libsyntax/ext/earley_parser.rs | 40 +++++++-- src/libsyntax/ext/tt/transcribe.rs | 114 +++++++++++++++++++++++++ src/libsyntax/parse.rs | 9 +- src/libsyntax/parse/attr.rs | 3 +- src/libsyntax/parse/lexer.rs | 128 +++++------------------------ src/libsyntax/parse/parser.rs | 15 +++- src/libsyntax/syntax.rc | 3 + 8 files changed, 198 insertions(+), 123 deletions(-) create mode 100644 src/libsyntax/ext/tt/transcribe.rs diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index d28d0819f96..46ca8240f6e 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -374,12 +374,17 @@ enum blk_sort { */ #[auto_serialize] +#[doc="For macro invocations; parsing is delegated to the macro"] enum token_tree { - /* for macro invocations; parsing is the macro's job */ tt_delim(~[token_tree]), - tt_flat(span, token::token) + tt_flat(span, token::token), + /* These only make sense for right-hand-sides of MBE macros*/ + tt_dotdotdot(~[token_tree]), + tt_interpolate(ident) } + + #[auto_serialize] type matcher = spanned; diff --git a/src/libsyntax/ext/earley_parser.rs b/src/libsyntax/ext/earley_parser.rs index a6e47e0941c..89f4fe4d670 100644 --- a/src/libsyntax/ext/earley_parser.rs +++ b/src/libsyntax/ext/earley_parser.rs @@ -1,12 +1,15 @@ // Earley-like parser for macros. import parse::token; import parse::token::{token, EOF, to_str, whole_nt}; -import parse::lexer::{reader, tt_reader, tt_reader_as_reader}; +import parse::lexer::*; //resolve bug? +//import parse::lexer::{reader, tt_reader, tt_reader_as_reader}; import parse::parser::{parser,SOURCE_FILE}; -import parse::common::parser_common; +//import parse::common::parser_common; +import parse::common::*; //resolve bug? import parse::parse_sess; import dvec::{dvec, extensions}; -import ast::{matcher, mtc_tok, mtc_rep, mtc_bb}; +import ast::{matcher, mtc_tok, mtc_rep, mtc_bb, ident}; +import std::map::{hashmap, box_str_hash}; /* This is an Earley-like parser, without support for nonterminals. This means that there are no completer or predictor rules, and therefore no need to @@ -66,8 +69,31 @@ enum arb_depth { leaf(whole_nt), seq(~[@arb_depth]) } type earley_item = matcher_pos; +fn nameize(&&p_s: parse_sess, ms: ~[matcher], &&res: ~[@arb_depth]) + -> hashmap { + fn n_rec(&&p_s: parse_sess, &&m: matcher, &&res: ~[@arb_depth], + &&ret_val: hashmap) { + alt m { + {node: mtc_tok(_), span: _} { } + {node: mtc_rep(more_ms, _, _), span: _} { + for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) }; + } + {node: mtc_bb(bind_name, _, idx), span: sp} { + if ret_val.contains_key(bind_name) { + p_s.span_diagnostic.span_fatal(sp, "Duplicated bind name: " + + *bind_name) + } + ret_val.insert(bind_name, res[idx]); + } + } + } + let ret_val = box_str_hash::<@arb_depth>(); + for ms.each() |m| { n_rec(p_s, m, res, ret_val) }; + ret ret_val; +} + fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) - -> ~[@arb_depth] { + -> hashmap { let mut cur_eis = ~[]; vec::push(cur_eis, new_matcher_pos(ms, none)); @@ -164,9 +190,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) /* error messages here could be improved with links to orig. rules */ if tok == EOF { - if eof_eis.len() == 1u { - let ret_val = vec::map(eof_eis[0u].matches, |dv| dv.pop()); - ret ret_val; /* success */ + if eof_eis.len() == 1u { /* success */ + ret nameize(sess, ms, + vec::map(eof_eis[0u].matches, |dv| dv.pop())); } else if eof_eis.len() > 1u { rdr.fatal("Ambiguity: multiple successful parses"); } else { diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs new file mode 100644 index 00000000000..246e24e617a --- /dev/null +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -0,0 +1,114 @@ +import util::interner::interner; +import diagnostic::span_handler; +import ast::{tt_delim,tt_flat,tt_dotdotdot,tt_interpolate,ident}; +import ext::earley_parser::arb_depth; +import codemap::span; +import parse::token::{EOF,token}; + +export tt_reader, new_tt_reader, dup_tt_reader, tt_next_token; + +enum tt_frame_up { /* to break a circularity */ + tt_frame_up(option) +} + +/* TODO: figure out how to have a uniquely linked stack, and change to `~` */ +///an unzipping of `token_tree`s +type tt_frame = @{ + readme: [ast::token_tree]/~, + mut idx: uint, + up: tt_frame_up +}; + +type tt_reader = @{ + span_diagnostic: span_handler, + interner: @interner<@str>, + mut cur: tt_frame, + /* for MBE-style macro transcription */ + interpolations: std::map::hashmap, + /* cached: */ + mut cur_tok: token, + mut cur_span: span +}; + +/** This can do Macro-By-Example transcription. On the other hand, if + * `doc` contains no `tt_dotdotdot`s and `tt_interpolate`s, `interp` can (and + * should) be none. */ +fn new_tt_reader(span_diagnostic: span_handler, itr: @interner<@str>, + interp: option>, + src: [ast::token_tree]/~) + -> tt_reader { + let r = @{span_diagnostic: span_diagnostic, interner: itr, + mut cur: @{readme: src, mut idx: 0u, + up: tt_frame_up(option::none)}, + interpolations: alt interp { /* just a convienience */ + none { std::map::box_str_hash::<@arb_depth>() } + some(x) { x } + }, + /* dummy values, never read: */ + mut cur_tok: EOF, + mut cur_span: ast_util::mk_sp(0u,0u) + }; + tt_next_token(r); /* get cur_tok and cur_span set up */ + ret r; +} + +pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame { + @{readme: f.readme, mut idx: f.idx, + up: alt f.up { + tt_frame_up(some(up_frame)) { + tt_frame_up(some(dup_tt_frame(up_frame))) + } + tt_frame_up(none) { tt_frame_up(none) } + } + } +} + +pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader { + @{span_diagnostic: r.span_diagnostic, interner: r.interner, + mut cur: dup_tt_frame(r.cur), + interpolations: r.interpolations, + mut cur_tok: r.cur_tok, mut cur_span: r.cur_span} +} + + +fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} { + let ret_val = { tok: r.cur_tok, sp: r.cur_span }; + if r.cur.idx >= vec::len(r.cur.readme) { + /* done with this set; pop */ + alt r.cur.up { + tt_frame_up(none) { + r.cur_tok = EOF; + ret ret_val; + } + tt_frame_up(some(tt_f)) { + r.cur = tt_f; + /* the above `if` would need to be a `while` if we didn't know + that the last thing in a `tt_delim` is always a `tt_flat` */ + r.cur.idx += 1u; + } + } + } + /* if `tt_delim`s could be 0-length, we'd need to be able to switch + between popping and pushing until we got to an actual `tt_flat` */ + loop { /* because it's easiest, this handles `tt_delim` not starting + with a `tt_flat`, even though it won't happen */ + alt copy r.cur.readme[r.cur.idx] { + tt_delim(tts) { + r.cur = @{readme: tts, mut idx: 0u, + up: tt_frame_up(option::some(r.cur)) }; + } + tt_flat(sp, tok) { + r.cur_span = sp; r.cur_tok = tok; + r.cur.idx += 1u; + ret ret_val; + } + tt_dotdotdot(tts) { + fail; + } + tt_interpolate(ident) { + fail; + } + } + } + +} \ No newline at end of file diff --git a/src/libsyntax/parse.rs b/src/libsyntax/parse.rs index 04f533be5bf..9c143257e9e 100644 --- a/src/libsyntax/parse.rs +++ b/src/libsyntax/parse.rs @@ -13,8 +13,10 @@ export parse_expr_from_source_str, parse_item_from_source_str; export parse_from_source_str; import parser::parser; -import attr::parser_attr; -import common::parser_common; +//import attr::parser_attr; +import attr::*; //resolve bug? +//import common::parser_common; +import common::*; //resolve bug? import ast::node_id; import util::interner; // FIXME (#1935): resolve badness @@ -199,6 +201,7 @@ fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str, fn new_parser_from_tt(sess: parse_sess, cfg: ast::crate_cfg, tt: ~[ast::token_tree]) -> parser { - let trdr = lexer::new_tt_reader(sess.span_diagnostic, sess.interner, tt); + let trdr = lexer::new_tt_reader(sess.span_diagnostic, sess.interner, + none, tt); ret parser(sess, cfg, trdr as reader, parser::SOURCE_FILE) } diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index d804a927edb..d7ae4995520 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -1,6 +1,7 @@ import either::{either, left, right}; import ast_util::spanned; -import common::{parser_common, seq_sep_trailing_disallowed}; +import common::*; //resolve bug? +//import common::{parser_common, seq_sep_trailing_disallowed}; export attr_or_ext; export parser_attr; diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 333ff95c74d..7742e2dc9a2 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -1,11 +1,11 @@ -import util::interner; -import util::interner::intern; -import diagnostic; -import ast::{tt_delim,tt_flat}; +import util::interner::{interner,intern}; +import diagnostic::span_handler; import codemap::span; +import ext::tt::transcribe::{tt_reader, new_tt_reader, dup_tt_reader, + tt_next_token}; export reader, string_reader, new_string_reader, is_whitespace; -export tt_reader, new_tt_reader, dup_tt_reader; +export tt_reader, new_tt_reader; export nextch, is_eof, bump, get_str_from, new_low_level_string_reader; export string_reader_as_reader, tt_reader_as_reader; @@ -13,91 +13,38 @@ iface reader { fn is_eof() -> bool; fn next_token() -> {tok: token::token, sp: span}; fn fatal(str) -> !; - fn span_diag() -> diagnostic::span_handler; - fn interner() -> @interner::interner<@str>; + fn span_diag() -> span_handler; + fn interner() -> @interner<@str>; fn peek() -> {tok: token::token, sp: span}; fn dup() -> reader; } -enum tt_frame_up { /* to break a circularity */ - tt_frame_up(option) -} - -/* FIXME (#2811): figure out how to have a uniquely linked stack, - and change to `~` */ -/// an unzipping of `token_tree`s -type tt_frame = @{ - readme: ~[ast::token_tree], - mut idx: uint, - up: tt_frame_up -}; - -type tt_reader = @{ - span_diagnostic: diagnostic::span_handler, - interner: @interner::interner<@str>, - mut cur: tt_frame, - /* cached: */ - mut cur_tok: token::token, - mut cur_span: span -}; - -fn new_tt_reader(span_diagnostic: diagnostic::span_handler, - itr: @interner::interner<@str>, src: ~[ast::token_tree]) - -> tt_reader { - let r = @{span_diagnostic: span_diagnostic, interner: itr, - mut cur: @{readme: src, mut idx: 0u, - up: tt_frame_up(option::none)}, - /* dummy values, never read: */ - mut cur_tok: token::EOF, - mut cur_span: ast_util::mk_sp(0u,0u) - }; - tt_next_token(r); /* get cur_tok and cur_span set up */ - ret r; -} - -pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame { - @{readme: f.readme, mut idx: f.idx, - up: alt f.up { - tt_frame_up(some(up_frame)) { - tt_frame_up(some(dup_tt_frame(up_frame))) - } - tt_frame_up(none) { tt_frame_up(none) } - } - } -} - -pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader { - @{span_diagnostic: r.span_diagnostic, interner: r.interner, - mut cur: dup_tt_frame(r.cur), - mut cur_tok: r.cur_tok, mut cur_span: r.cur_span} -} - type string_reader = @{ - span_diagnostic: diagnostic::span_handler, + span_diagnostic: span_handler, src: @str, mut col: uint, mut pos: uint, mut curr: char, mut chpos: uint, filemap: codemap::filemap, - interner: @interner::interner<@str>, + interner: @interner<@str>, /* cached: */ mut peek_tok: token::token, mut peek_span: span }; -fn new_string_reader(span_diagnostic: diagnostic::span_handler, +fn new_string_reader(span_diagnostic: span_handler, filemap: codemap::filemap, - itr: @interner::interner<@str>) -> string_reader { + itr: @interner<@str>) -> string_reader { let r = new_low_level_string_reader(span_diagnostic, filemap, itr); string_advance_token(r); /* fill in peek_* */ ret r; } /* For comments.rs, which hackily pokes into 'pos' and 'curr' */ -fn new_low_level_string_reader(span_diagnostic: diagnostic::span_handler, +fn new_low_level_string_reader(span_diagnostic: span_handler, filemap: codemap::filemap, - itr: @interner::interner<@str>) + itr: @interner<@str>) -> string_reader { let r = @{span_diagnostic: span_diagnostic, src: filemap.src, mut col: 0u, mut pos: 0u, mut curr: -1 as char, @@ -131,8 +78,8 @@ impl string_reader_as_reader of reader for string_reader { fn fatal(m: str) -> ! { self.span_diagnostic.span_fatal(copy self.peek_span, m) } - fn span_diag() -> diagnostic::span_handler { self.span_diagnostic } - fn interner() -> @interner::interner<@str> { self.interner } + fn span_diag() -> span_handler { self.span_diagnostic } + fn interner() -> @interner<@str> { self.interner } fn peek() -> {tok: token::token, sp: span} { {tok: self.peek_tok, sp: self.peek_span} } @@ -153,8 +100,8 @@ impl tt_reader_as_reader of reader for tt_reader { fn fatal(m: str) -> ! { self.span_diagnostic.span_fatal(copy self.cur_span, m); } - fn span_diag() -> diagnostic::span_handler { self.span_diagnostic } - fn interner() -> @interner::interner<@str> { self.interner } + fn span_diag() -> span_handler { self.span_diagnostic } + fn interner() -> @interner<@str> { self.interner } fn peek() -> {tok: token::token, sp: span} { { tok: self.cur_tok, sp: self.cur_span } } @@ -178,42 +125,6 @@ fn string_advance_token(&&r: string_reader) { } -fn tt_next_token(&&r: tt_reader) -> {tok: token::token, sp: span} { - let ret_val = { tok: r.cur_tok, sp: r.cur_span }; - if r.cur.idx >= vec::len(r.cur.readme) { - /* done with this set; pop */ - alt r.cur.up { - tt_frame_up(none) { - r.cur_tok = token::EOF; - ret ret_val; - } - tt_frame_up(some(tt_f)) { - r.cur = tt_f; - /* the above `if` would need to be a `while` if we didn't know - that the last thing in a `tt_delim` is always a `tt_flat` */ - r.cur.idx += 1u; - } - } - } - /* if `tt_delim`s could be 0-length, we'd need to be able to switch - between popping and pushing until we got to an actual `tt_flat` */ - loop { /* because it's easiest, this handles `tt_delim` not starting - with a `tt_flat`, even though it won't happen */ - alt copy r.cur.readme[r.cur.idx] { - tt_delim(tts) { - r.cur = @{readme: tts, mut idx: 0u, - up: tt_frame_up(option::some(r.cur)) }; - } - tt_flat(sp, tok) { - r.cur_span = sp; r.cur_tok = tok; - r.cur.idx += 1u; - ret ret_val; - } - } - } - -} - fn get_str_from(rdr: string_reader, start: uint) -> str unsafe { // I'm pretty skeptical about this subtraction. What if there's a // multi-byte character before the mark? @@ -548,7 +459,7 @@ fn next_token_inner(rdr: string_reader) -> token::token { let is_mod_name = c == ':' && nextch(rdr) == ':'; // FIXME: perform NFKC normalization here. (Issue #2253) - ret token::IDENT(interner::intern(*rdr.interner, + ret token::IDENT(intern(*rdr.interner, @accum_str), is_mod_name); } if is_dec_digit(c) { @@ -713,8 +624,7 @@ fn next_token_inner(rdr: string_reader) -> token::token { } } bump(rdr); - ret token::LIT_STR(interner::intern(*rdr.interner, - @accum_str)); + ret token::LIT_STR(intern(*rdr.interner, @accum_str)); } '-' { if nextch(rdr) == '>' { diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 55e75f07f27..90c5b3f3720 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -70,6 +70,7 @@ class parser { let mut buffer_start: int; let mut buffer_end: int; let mut restriction: restriction; + let mut quote_depth: uint; // not (yet) related to the quasiquoter let reader: reader; let keywords: hashmap; let restricted_keywords: hashmap; @@ -94,6 +95,7 @@ class parser { self.buffer_start = 0; self.buffer_end = 0; self.restriction = UNRESTRICTED; + self.quote_depth = 0u; self.keywords = token::keyword_table(); self.restricted_keywords = token::restricted_keyword_table(); } @@ -1067,6 +1069,11 @@ class parser { } fn parse_tt_flat(p: parser, delim_ok: bool) -> token_tree { + if p.eat_keyword("many") && p.quote_depth > 0u { + ret tt_dotdotdot( + p.parse_seq(token::LPAREN, token::RPAREN, seq_sep_none(), + |p| p.parse_token_tree()).node); + } alt p.token { token::RPAREN | token::RBRACE | token::RBRACKET if !delim_ok { @@ -1076,6 +1083,11 @@ class parser { token::EOF { p.fatal("file ended in the middle of a macro invocation"); } + /* we ought to allow different depths of unquotation */ + token::DOLLAR if p.quote_depth > 0u { + p.bump(); + ret tt_interpolate(p.parse_ident()); + } _ { /* ok */ } } let res = tt_flat(p.span, p.token); @@ -1104,10 +1116,11 @@ class parser { common::seq_sep_none(), |p| p.parse_matcher(@mut 0u)).node; let tt = self.parse_token_tree(); + //let tt_rhs = self.parse_token_tree(); alt tt { tt_delim(tts) { let rdr = lexer::new_tt_reader(self.reader.span_diag(), - self.reader.interner(), tts) + self.reader.interner(), none, tts) as reader; ext::earley_parser::parse(self.sess, self.cfg, rdr, ms); } diff --git a/src/libsyntax/syntax.rc b/src/libsyntax/syntax.rc index bb0f82e89fc..4e61bf09426 100644 --- a/src/libsyntax/syntax.rc +++ b/src/libsyntax/syntax.rc @@ -66,6 +66,9 @@ mod ext { mod build; mod earley_parser; + mod tt { + mod transcribe; + } mod fmt; mod env;