port mbe to soa tokens

This commit is contained in:
Aleksey Kladov 2021-12-12 19:06:40 +03:00
parent 965585748e
commit 1055a6111a
7 changed files with 130 additions and 183 deletions

View File

@ -10,7 +10,7 @@ mod parser;
mod expander;
mod syntax_bridge;
mod tt_iter;
mod subtree_source;
mod to_parser_tokens;
#[cfg(test)]
mod benchmark;

View File

@ -1,174 +0,0 @@
//! Our parser is generic over the source of tokens it parses.
//!
//! This module defines tokens sourced from declarative macros.
use parser::{Token, TokenSource};
use syntax::{lex_single_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T};
use tt::buffer::TokenBuffer;
#[derive(Debug, Clone, Eq, PartialEq)]
struct TtToken {
tt: Token,
text: SmolStr,
}
pub(crate) struct SubtreeTokenSource {
cached: Vec<TtToken>,
curr: (Token, usize),
}
impl<'a> SubtreeTokenSource {
pub(crate) fn new(buffer: &TokenBuffer) -> SubtreeTokenSource {
let mut current = buffer.begin();
let mut cached = Vec::with_capacity(100);
while !current.eof() {
let cursor = current;
let tt = cursor.token_tree();
// Check if it is lifetime
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
if punct.char == '\'' {
let next = cursor.bump();
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(ident), _)) =
next.token_tree()
{
let text = SmolStr::new("'".to_string() + &ident.text);
cached.push(TtToken {
tt: Token { kind: LIFETIME_IDENT, is_jointed_to_next: false },
text,
});
current = next.bump();
continue;
} else {
panic!("Next token must be ident : {:#?}", next.token_tree());
}
}
}
current = match tt {
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
cached.push(convert_leaf(leaf));
cursor.bump()
}
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
if let Some(d) = subtree.delimiter_kind() {
cached.push(convert_delim(d, false));
}
cursor.subtree().unwrap()
}
None => match cursor.end() {
Some(subtree) => {
if let Some(d) = subtree.delimiter_kind() {
cached.push(convert_delim(d, true));
}
cursor.bump()
}
None => continue,
},
};
}
let mut res = SubtreeTokenSource {
curr: (Token { kind: EOF, is_jointed_to_next: false }, 0),
cached,
};
res.curr = (res.token(0), 0);
res
}
fn token(&self, pos: usize) -> Token {
match self.cached.get(pos) {
Some(it) => it.tt,
None => Token { kind: EOF, is_jointed_to_next: false },
}
}
}
impl<'a> TokenSource for SubtreeTokenSource {
fn current(&self) -> Token {
self.curr.0
}
/// Lookahead n token
fn lookahead_nth(&self, n: usize) -> Token {
self.token(self.curr.1 + n)
}
/// bump cursor to next token
fn bump(&mut self) {
if self.current().kind == EOF {
return;
}
self.curr = (self.token(self.curr.1 + 1), self.curr.1 + 1);
}
/// Is the current token a specified keyword?
fn is_keyword(&self, kw: &str) -> bool {
match self.cached.get(self.curr.1) {
Some(t) => t.text == *kw,
None => false,
}
}
}
fn convert_delim(d: tt::DelimiterKind, closing: bool) -> TtToken {
let (kinds, texts) = match d {
tt::DelimiterKind::Parenthesis => ([T!['('], T![')']], "()"),
tt::DelimiterKind::Brace => ([T!['{'], T!['}']], "{}"),
tt::DelimiterKind::Bracket => ([T!['['], T![']']], "[]"),
};
let idx = closing as usize;
let kind = kinds[idx];
let text = &texts[idx..texts.len() - (1 - idx)];
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) }
}
fn convert_literal(l: &tt::Literal) -> TtToken {
let is_negated = l.text.starts_with('-');
let inner_text = &l.text[if is_negated { 1 } else { 0 }..];
let kind = lex_single_syntax_kind(inner_text)
.map(|(kind, _error)| kind)
.filter(|kind| {
kind.is_literal() && (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
})
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &l));
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: l.text.clone() }
}
fn convert_ident(ident: &tt::Ident) -> TtToken {
let kind = match ident.text.as_ref() {
"true" => T![true],
"false" => T![false],
"_" => UNDERSCORE,
i if i.starts_with('\'') => LIFETIME_IDENT,
_ => SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT),
};
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: ident.text.clone() }
}
fn convert_punct(p: tt::Punct) -> TtToken {
let kind = match SyntaxKind::from_char(p.char) {
None => panic!("{:#?} is not a valid punct", p),
Some(kind) => kind,
};
let text = {
let mut buf = [0u8; 4];
let s: &str = p.char.encode_utf8(&mut buf);
SmolStr::new(s)
};
TtToken { tt: Token { kind, is_jointed_to_next: p.spacing == tt::Spacing::Joint }, text }
}
fn convert_leaf(leaf: &tt::Leaf) -> TtToken {
match leaf {
tt::Leaf::Literal(l) => convert_literal(l),
tt::Leaf::Ident(ident) => convert_ident(ident),
tt::Leaf::Punct(punct) => convert_punct(*punct),
}
}

View File

@ -12,7 +12,7 @@ use syntax::{
use tt::buffer::{Cursor, TokenBuffer};
use crate::{
subtree_source::SubtreeTokenSource, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
to_parser_tokens::to_parser_tokens, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
};
/// Convert the syntax node to a `TokenTree` (what macro
@ -56,9 +56,9 @@ pub fn token_tree_to_syntax_node(
}
_ => TokenBuffer::from_subtree(tt),
};
let mut token_source = SubtreeTokenSource::new(&buffer);
let parser_tokens = to_parser_tokens(&buffer);
let mut tree_sink = TtTreeSink::new(buffer.begin());
parser::parse(&mut token_source, &mut tree_sink, entry_point);
parser::parse(&parser_tokens, &mut tree_sink, entry_point);
if tree_sink.roots.len() != 1 {
return Err(ExpandError::ConversionError);
}

View File

@ -0,0 +1,97 @@
//! Convert macro-by-example tokens which are specific to macro expansion into a
//! format that works for our parser.
use syntax::{lex_single_syntax_kind, SyntaxKind, SyntaxKind::*, T};
use tt::buffer::TokenBuffer;
pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
let mut res = parser::Tokens::default();
let mut current = buffer.begin();
while !current.eof() {
let cursor = current;
let tt = cursor.token_tree();
// Check if it is lifetime
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
if punct.char == '\'' {
let next = cursor.bump();
match next.token_tree() {
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(_ident), _)) => {
res.push(LIFETIME_IDENT);
current = next.bump();
continue;
}
_ => panic!("Next token must be ident : {:#?}", next.token_tree()),
}
}
}
current = match tt {
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
match leaf {
tt::Leaf::Literal(lit) => {
let is_negated = lit.text.starts_with('-');
let inner_text = &lit.text[if is_negated { 1 } else { 0 }..];
let kind = lex_single_syntax_kind(inner_text)
.map(|(kind, _error)| kind)
.filter(|kind| {
kind.is_literal()
&& (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
})
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit));
res.push(kind);
}
tt::Leaf::Ident(ident) => match ident.text.as_ref() {
"_" => res.push(T![_]),
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
_ => match SyntaxKind::from_keyword(&ident.text) {
Some(kind) => res.push(kind),
None => {
let contextual_keyword =
SyntaxKind::from_contextual_keyword(&ident.text)
.unwrap_or(SyntaxKind::IDENT);
res.push_ident(contextual_keyword);
}
},
},
tt::Leaf::Punct(punct) => {
let kind = SyntaxKind::from_char(punct.char)
.unwrap_or_else(|| panic!("{:#?} is not a valid punct", punct));
res.push(kind);
res.was_joint(punct.spacing == tt::Spacing::Joint);
}
}
cursor.bump()
}
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
if let Some(d) = subtree.delimiter_kind() {
res.push(match d {
tt::DelimiterKind::Parenthesis => T!['('],
tt::DelimiterKind::Brace => T!['{'],
tt::DelimiterKind::Bracket => T!['['],
});
}
cursor.subtree().unwrap()
}
None => match cursor.end() {
Some(subtree) => {
if let Some(d) = subtree.delimiter_kind() {
res.push(match d {
tt::DelimiterKind::Parenthesis => T![')'],
tt::DelimiterKind::Brace => T!['}'],
tt::DelimiterKind::Bracket => T![']'],
})
}
cursor.bump()
}
None => continue,
},
};
}
res
}

View File

@ -1,7 +1,7 @@
//! A "Parser" structure for token trees. We use this when parsing a declarative
//! macro definition into a list of patterns and templates.
use crate::{subtree_source::SubtreeTokenSource, ExpandError, ExpandResult, ParserEntryPoint};
use crate::{to_parser_tokens::to_parser_tokens, ExpandError, ExpandResult, ParserEntryPoint};
use parser::TreeSink;
use syntax::SyntaxKind;
@ -116,10 +116,10 @@ impl<'a> TtIter<'a> {
}
let buffer = TokenBuffer::from_tokens(self.inner.as_slice());
let mut src = SubtreeTokenSource::new(&buffer);
let parser_tokens = to_parser_tokens(&buffer);
let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false };
parser::parse(&mut src, &mut sink, entry_point);
parser::parse(&parser_tokens, &mut sink, entry_point);
let mut err = if !sink.cursor.is_root() || sink.error {
Some(err!("expected {:?}", entry_point))

View File

@ -1,8 +1,11 @@
//! The Rust parser.
//!
//! NOTE: The crate is undergoing refactors, don't believe everything the docs
//! say :-)
//!
//! The parser doesn't know about concrete representation of tokens and syntax
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead.
//! As a consequence, this crate does not contain a lexer.
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As
//! a consequence, this crate does not contain a lexer.
//!
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
//! sequence of tokens. Parsing routines use [`Parser`] to inspect current

View File

@ -1,3 +1,8 @@
//! Input for the parser -- a sequence of tokens.
//!
//! As of now, parser doesn't have access to the *text* of the tokens, and makes
//! decisions based solely on their classification.
use crate::SyntaxKind;
#[allow(non_camel_case_types)]
@ -28,6 +33,22 @@ impl Tokens {
pub fn push(&mut self, kind: SyntaxKind) {
self.push_impl(kind, SyntaxKind::EOF)
}
/// Sets jointness for the last token we've pushed.
///
/// This is a separate API rather than an argument to the `push` to make it
/// convenient both for textual and mbe tokens. With text, you know whether
/// the *previous* token was joint, with mbe, you know whether the *current*
/// one is joint. This API allows for styles of usage:
///
/// ```
/// // In text:
/// tokens.was_joint(prev_joint);
/// tokens.push(curr);
///
/// // In MBE:
/// token.push(curr);
/// tokens.push(curr_joint)
/// ```
pub fn was_joint(&mut self, yes: bool) {
let idx = self.len();
if yes && idx > 0 {