port mbe to soa tokens
This commit is contained in:
parent
965585748e
commit
1055a6111a
@ -10,7 +10,7 @@ mod parser;
|
||||
mod expander;
|
||||
mod syntax_bridge;
|
||||
mod tt_iter;
|
||||
mod subtree_source;
|
||||
mod to_parser_tokens;
|
||||
|
||||
#[cfg(test)]
|
||||
mod benchmark;
|
||||
|
@ -1,174 +0,0 @@
|
||||
//! Our parser is generic over the source of tokens it parses.
|
||||
//!
|
||||
//! This module defines tokens sourced from declarative macros.
|
||||
|
||||
use parser::{Token, TokenSource};
|
||||
use syntax::{lex_single_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T};
|
||||
use tt::buffer::TokenBuffer;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
struct TtToken {
|
||||
tt: Token,
|
||||
text: SmolStr,
|
||||
}
|
||||
|
||||
pub(crate) struct SubtreeTokenSource {
|
||||
cached: Vec<TtToken>,
|
||||
curr: (Token, usize),
|
||||
}
|
||||
|
||||
impl<'a> SubtreeTokenSource {
|
||||
pub(crate) fn new(buffer: &TokenBuffer) -> SubtreeTokenSource {
|
||||
let mut current = buffer.begin();
|
||||
let mut cached = Vec::with_capacity(100);
|
||||
|
||||
while !current.eof() {
|
||||
let cursor = current;
|
||||
let tt = cursor.token_tree();
|
||||
|
||||
// Check if it is lifetime
|
||||
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
|
||||
if punct.char == '\'' {
|
||||
let next = cursor.bump();
|
||||
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(ident), _)) =
|
||||
next.token_tree()
|
||||
{
|
||||
let text = SmolStr::new("'".to_string() + &ident.text);
|
||||
cached.push(TtToken {
|
||||
tt: Token { kind: LIFETIME_IDENT, is_jointed_to_next: false },
|
||||
text,
|
||||
});
|
||||
current = next.bump();
|
||||
continue;
|
||||
} else {
|
||||
panic!("Next token must be ident : {:#?}", next.token_tree());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current = match tt {
|
||||
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
|
||||
cached.push(convert_leaf(leaf));
|
||||
cursor.bump()
|
||||
}
|
||||
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
|
||||
if let Some(d) = subtree.delimiter_kind() {
|
||||
cached.push(convert_delim(d, false));
|
||||
}
|
||||
cursor.subtree().unwrap()
|
||||
}
|
||||
None => match cursor.end() {
|
||||
Some(subtree) => {
|
||||
if let Some(d) = subtree.delimiter_kind() {
|
||||
cached.push(convert_delim(d, true));
|
||||
}
|
||||
cursor.bump()
|
||||
}
|
||||
None => continue,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let mut res = SubtreeTokenSource {
|
||||
curr: (Token { kind: EOF, is_jointed_to_next: false }, 0),
|
||||
cached,
|
||||
};
|
||||
res.curr = (res.token(0), 0);
|
||||
res
|
||||
}
|
||||
|
||||
fn token(&self, pos: usize) -> Token {
|
||||
match self.cached.get(pos) {
|
||||
Some(it) => it.tt,
|
||||
None => Token { kind: EOF, is_jointed_to_next: false },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenSource for SubtreeTokenSource {
|
||||
fn current(&self) -> Token {
|
||||
self.curr.0
|
||||
}
|
||||
|
||||
/// Lookahead n token
|
||||
fn lookahead_nth(&self, n: usize) -> Token {
|
||||
self.token(self.curr.1 + n)
|
||||
}
|
||||
|
||||
/// bump cursor to next token
|
||||
fn bump(&mut self) {
|
||||
if self.current().kind == EOF {
|
||||
return;
|
||||
}
|
||||
self.curr = (self.token(self.curr.1 + 1), self.curr.1 + 1);
|
||||
}
|
||||
|
||||
/// Is the current token a specified keyword?
|
||||
fn is_keyword(&self, kw: &str) -> bool {
|
||||
match self.cached.get(self.curr.1) {
|
||||
Some(t) => t.text == *kw,
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_delim(d: tt::DelimiterKind, closing: bool) -> TtToken {
|
||||
let (kinds, texts) = match d {
|
||||
tt::DelimiterKind::Parenthesis => ([T!['('], T![')']], "()"),
|
||||
tt::DelimiterKind::Brace => ([T!['{'], T!['}']], "{}"),
|
||||
tt::DelimiterKind::Bracket => ([T!['['], T![']']], "[]"),
|
||||
};
|
||||
|
||||
let idx = closing as usize;
|
||||
let kind = kinds[idx];
|
||||
let text = &texts[idx..texts.len() - (1 - idx)];
|
||||
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) }
|
||||
}
|
||||
|
||||
fn convert_literal(l: &tt::Literal) -> TtToken {
|
||||
let is_negated = l.text.starts_with('-');
|
||||
let inner_text = &l.text[if is_negated { 1 } else { 0 }..];
|
||||
|
||||
let kind = lex_single_syntax_kind(inner_text)
|
||||
.map(|(kind, _error)| kind)
|
||||
.filter(|kind| {
|
||||
kind.is_literal() && (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
|
||||
})
|
||||
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &l));
|
||||
|
||||
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: l.text.clone() }
|
||||
}
|
||||
|
||||
fn convert_ident(ident: &tt::Ident) -> TtToken {
|
||||
let kind = match ident.text.as_ref() {
|
||||
"true" => T![true],
|
||||
"false" => T![false],
|
||||
"_" => UNDERSCORE,
|
||||
i if i.starts_with('\'') => LIFETIME_IDENT,
|
||||
_ => SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT),
|
||||
};
|
||||
|
||||
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: ident.text.clone() }
|
||||
}
|
||||
|
||||
fn convert_punct(p: tt::Punct) -> TtToken {
|
||||
let kind = match SyntaxKind::from_char(p.char) {
|
||||
None => panic!("{:#?} is not a valid punct", p),
|
||||
Some(kind) => kind,
|
||||
};
|
||||
|
||||
let text = {
|
||||
let mut buf = [0u8; 4];
|
||||
let s: &str = p.char.encode_utf8(&mut buf);
|
||||
SmolStr::new(s)
|
||||
};
|
||||
TtToken { tt: Token { kind, is_jointed_to_next: p.spacing == tt::Spacing::Joint }, text }
|
||||
}
|
||||
|
||||
fn convert_leaf(leaf: &tt::Leaf) -> TtToken {
|
||||
match leaf {
|
||||
tt::Leaf::Literal(l) => convert_literal(l),
|
||||
tt::Leaf::Ident(ident) => convert_ident(ident),
|
||||
tt::Leaf::Punct(punct) => convert_punct(*punct),
|
||||
}
|
||||
}
|
@ -12,7 +12,7 @@ use syntax::{
|
||||
use tt::buffer::{Cursor, TokenBuffer};
|
||||
|
||||
use crate::{
|
||||
subtree_source::SubtreeTokenSource, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
|
||||
to_parser_tokens::to_parser_tokens, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
|
||||
};
|
||||
|
||||
/// Convert the syntax node to a `TokenTree` (what macro
|
||||
@ -56,9 +56,9 @@ pub fn token_tree_to_syntax_node(
|
||||
}
|
||||
_ => TokenBuffer::from_subtree(tt),
|
||||
};
|
||||
let mut token_source = SubtreeTokenSource::new(&buffer);
|
||||
let parser_tokens = to_parser_tokens(&buffer);
|
||||
let mut tree_sink = TtTreeSink::new(buffer.begin());
|
||||
parser::parse(&mut token_source, &mut tree_sink, entry_point);
|
||||
parser::parse(&parser_tokens, &mut tree_sink, entry_point);
|
||||
if tree_sink.roots.len() != 1 {
|
||||
return Err(ExpandError::ConversionError);
|
||||
}
|
||||
|
97
crates/mbe/src/to_parser_tokens.rs
Normal file
97
crates/mbe/src/to_parser_tokens.rs
Normal file
@ -0,0 +1,97 @@
|
||||
//! Convert macro-by-example tokens which are specific to macro expansion into a
|
||||
//! format that works for our parser.
|
||||
|
||||
use syntax::{lex_single_syntax_kind, SyntaxKind, SyntaxKind::*, T};
|
||||
use tt::buffer::TokenBuffer;
|
||||
|
||||
pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
|
||||
let mut res = parser::Tokens::default();
|
||||
|
||||
let mut current = buffer.begin();
|
||||
|
||||
while !current.eof() {
|
||||
let cursor = current;
|
||||
let tt = cursor.token_tree();
|
||||
|
||||
// Check if it is lifetime
|
||||
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
|
||||
if punct.char == '\'' {
|
||||
let next = cursor.bump();
|
||||
match next.token_tree() {
|
||||
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(_ident), _)) => {
|
||||
res.push(LIFETIME_IDENT);
|
||||
current = next.bump();
|
||||
continue;
|
||||
}
|
||||
_ => panic!("Next token must be ident : {:#?}", next.token_tree()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current = match tt {
|
||||
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
|
||||
match leaf {
|
||||
tt::Leaf::Literal(lit) => {
|
||||
let is_negated = lit.text.starts_with('-');
|
||||
let inner_text = &lit.text[if is_negated { 1 } else { 0 }..];
|
||||
|
||||
let kind = lex_single_syntax_kind(inner_text)
|
||||
.map(|(kind, _error)| kind)
|
||||
.filter(|kind| {
|
||||
kind.is_literal()
|
||||
&& (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
|
||||
})
|
||||
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit));
|
||||
|
||||
res.push(kind);
|
||||
}
|
||||
tt::Leaf::Ident(ident) => match ident.text.as_ref() {
|
||||
"_" => res.push(T![_]),
|
||||
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
|
||||
_ => match SyntaxKind::from_keyword(&ident.text) {
|
||||
Some(kind) => res.push(kind),
|
||||
None => {
|
||||
let contextual_keyword =
|
||||
SyntaxKind::from_contextual_keyword(&ident.text)
|
||||
.unwrap_or(SyntaxKind::IDENT);
|
||||
res.push_ident(contextual_keyword);
|
||||
}
|
||||
},
|
||||
},
|
||||
tt::Leaf::Punct(punct) => {
|
||||
let kind = SyntaxKind::from_char(punct.char)
|
||||
.unwrap_or_else(|| panic!("{:#?} is not a valid punct", punct));
|
||||
res.push(kind);
|
||||
res.was_joint(punct.spacing == tt::Spacing::Joint);
|
||||
}
|
||||
}
|
||||
cursor.bump()
|
||||
}
|
||||
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
|
||||
if let Some(d) = subtree.delimiter_kind() {
|
||||
res.push(match d {
|
||||
tt::DelimiterKind::Parenthesis => T!['('],
|
||||
tt::DelimiterKind::Brace => T!['{'],
|
||||
tt::DelimiterKind::Bracket => T!['['],
|
||||
});
|
||||
}
|
||||
cursor.subtree().unwrap()
|
||||
}
|
||||
None => match cursor.end() {
|
||||
Some(subtree) => {
|
||||
if let Some(d) = subtree.delimiter_kind() {
|
||||
res.push(match d {
|
||||
tt::DelimiterKind::Parenthesis => T![')'],
|
||||
tt::DelimiterKind::Brace => T!['}'],
|
||||
tt::DelimiterKind::Bracket => T![']'],
|
||||
})
|
||||
}
|
||||
cursor.bump()
|
||||
}
|
||||
None => continue,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
res
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
//! A "Parser" structure for token trees. We use this when parsing a declarative
|
||||
//! macro definition into a list of patterns and templates.
|
||||
|
||||
use crate::{subtree_source::SubtreeTokenSource, ExpandError, ExpandResult, ParserEntryPoint};
|
||||
use crate::{to_parser_tokens::to_parser_tokens, ExpandError, ExpandResult, ParserEntryPoint};
|
||||
|
||||
use parser::TreeSink;
|
||||
use syntax::SyntaxKind;
|
||||
@ -116,10 +116,10 @@ impl<'a> TtIter<'a> {
|
||||
}
|
||||
|
||||
let buffer = TokenBuffer::from_tokens(self.inner.as_slice());
|
||||
let mut src = SubtreeTokenSource::new(&buffer);
|
||||
let parser_tokens = to_parser_tokens(&buffer);
|
||||
let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false };
|
||||
|
||||
parser::parse(&mut src, &mut sink, entry_point);
|
||||
parser::parse(&parser_tokens, &mut sink, entry_point);
|
||||
|
||||
let mut err = if !sink.cursor.is_root() || sink.error {
|
||||
Some(err!("expected {:?}", entry_point))
|
||||
|
@ -1,8 +1,11 @@
|
||||
//! The Rust parser.
|
||||
//!
|
||||
//! NOTE: The crate is undergoing refactors, don't believe everything the docs
|
||||
//! say :-)
|
||||
//!
|
||||
//! The parser doesn't know about concrete representation of tokens and syntax
|
||||
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead.
|
||||
//! As a consequence, this crate does not contain a lexer.
|
||||
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As
|
||||
//! a consequence, this crate does not contain a lexer.
|
||||
//!
|
||||
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
|
||||
//! sequence of tokens. Parsing routines use [`Parser`] to inspect current
|
||||
|
@ -1,3 +1,8 @@
|
||||
//! Input for the parser -- a sequence of tokens.
|
||||
//!
|
||||
//! As of now, parser doesn't have access to the *text* of the tokens, and makes
|
||||
//! decisions based solely on their classification.
|
||||
|
||||
use crate::SyntaxKind;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
@ -28,6 +33,22 @@ impl Tokens {
|
||||
pub fn push(&mut self, kind: SyntaxKind) {
|
||||
self.push_impl(kind, SyntaxKind::EOF)
|
||||
}
|
||||
/// Sets jointness for the last token we've pushed.
|
||||
///
|
||||
/// This is a separate API rather than an argument to the `push` to make it
|
||||
/// convenient both for textual and mbe tokens. With text, you know whether
|
||||
/// the *previous* token was joint, with mbe, you know whether the *current*
|
||||
/// one is joint. This API allows for styles of usage:
|
||||
///
|
||||
/// ```
|
||||
/// // In text:
|
||||
/// tokens.was_joint(prev_joint);
|
||||
/// tokens.push(curr);
|
||||
///
|
||||
/// // In MBE:
|
||||
/// token.push(curr);
|
||||
/// tokens.push(curr_joint)
|
||||
/// ```
|
||||
pub fn was_joint(&mut self, yes: bool) {
|
||||
let idx = self.len();
|
||||
if yes && idx > 0 {
|
||||
|
Loading…
x
Reference in New Issue
Block a user