Switch parser to use tokens
This commit is contained in:
parent
d5ad0f3ca0
commit
26bfd6023f
@ -296,10 +296,7 @@ fn lhs(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)>
|
||||
T![&] => {
|
||||
m = p.start();
|
||||
p.bump(T![&]);
|
||||
if p.at(IDENT)
|
||||
&& p.at_contextual_kw("raw")
|
||||
&& (p.nth_at(1, T![mut]) || p.nth_at(1, T![const]))
|
||||
{
|
||||
if p.at_contextual_kw(T![raw]) && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) {
|
||||
p.bump_remap(T![raw]);
|
||||
p.bump_any();
|
||||
} else {
|
||||
|
@ -122,14 +122,14 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
|
||||
has_mods = true;
|
||||
abi(p);
|
||||
}
|
||||
if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == T![trait] {
|
||||
if p.at_contextual_kw(T![auto]) && p.nth(1) == T![trait] {
|
||||
p.bump_remap(T![auto]);
|
||||
has_mods = true;
|
||||
}
|
||||
|
||||
// test default_item
|
||||
// default impl T for Foo {}
|
||||
if p.at(IDENT) && p.at_contextual_kw("default") {
|
||||
if p.at_contextual_kw(T![default]) {
|
||||
match p.nth(1) {
|
||||
T![fn] | T![type] | T![const] | T![impl] => {
|
||||
p.bump_remap(T![default]);
|
||||
@ -176,7 +176,7 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
|
||||
|
||||
// test existential_type
|
||||
// existential type Foo: Fn() -> usize;
|
||||
if p.at(IDENT) && p.at_contextual_kw("existential") && p.nth(1) == T![type] {
|
||||
if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] {
|
||||
p.bump_remap(T![existential]);
|
||||
has_mods = true;
|
||||
}
|
||||
@ -224,10 +224,10 @@ fn opt_item_without_modifiers(p: &mut Parser, m: Marker) -> Result<(), Marker> {
|
||||
T![type] => type_alias(p, m),
|
||||
T![struct] => adt::strukt(p, m),
|
||||
T![enum] => adt::enum_(p, m),
|
||||
IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => adt::union(p, m),
|
||||
IDENT if p.at_contextual_kw(T![union]) && p.nth(1) == IDENT => adt::union(p, m),
|
||||
|
||||
T![macro] => macro_def(p, m),
|
||||
IDENT if p.at_contextual_kw("macro_rules") && p.nth(1) == BANG => macro_rules(p, m),
|
||||
IDENT if p.at_contextual_kw(T![macro_rules]) && p.nth(1) == BANG => macro_rules(p, m),
|
||||
|
||||
T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m),
|
||||
T![static] => consts::static_(p, m),
|
||||
@ -319,7 +319,7 @@ pub(crate) fn extern_item_list(p: &mut Parser) {
|
||||
}
|
||||
|
||||
fn macro_rules(p: &mut Parser, m: Marker) {
|
||||
assert!(p.at_contextual_kw("macro_rules"));
|
||||
assert!(p.at_contextual_kw(T![macro_rules]));
|
||||
p.bump_remap(T![macro_rules]);
|
||||
p.expect(T![!]);
|
||||
|
||||
|
@ -10,7 +10,7 @@ pub(super) fn strukt(p: &mut Parser, m: Marker) {
|
||||
// test union_item
|
||||
// struct U { i: i32, f: f32 }
|
||||
pub(super) fn union(p: &mut Parser, m: Marker) {
|
||||
assert!(p.at_contextual_kw("union"));
|
||||
assert!(p.at_contextual_kw(T![union]));
|
||||
p.bump_remap(T![union]);
|
||||
struct_or_union(p, m, false);
|
||||
}
|
||||
|
@ -26,6 +26,8 @@ pub(crate) use token_set::TokenSet;
|
||||
|
||||
pub use syntax_kind::SyntaxKind;
|
||||
|
||||
use crate::tokens::Tokens;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ParseError(pub Box<String>);
|
||||
|
||||
@ -53,6 +55,7 @@ pub struct Token {
|
||||
|
||||
/// Is the current token joined to the next one (`> >` vs `>>`).
|
||||
pub is_jointed_to_next: bool,
|
||||
pub contextual_kw: SyntaxKind,
|
||||
}
|
||||
|
||||
/// `TreeSink` abstracts details of a particular syntax tree implementation.
|
||||
@ -93,15 +96,11 @@ pub enum ParserEntryPoint {
|
||||
}
|
||||
|
||||
/// Parse given tokens into the given sink as a rust file.
|
||||
pub fn parse_source_file(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
|
||||
parse(token_source, tree_sink, ParserEntryPoint::SourceFile);
|
||||
pub fn parse_source_file(tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
|
||||
parse(tokens, tree_sink, ParserEntryPoint::SourceFile);
|
||||
}
|
||||
|
||||
pub fn parse(
|
||||
token_source: &mut dyn TokenSource,
|
||||
tree_sink: &mut dyn TreeSink,
|
||||
entry_point: ParserEntryPoint,
|
||||
) {
|
||||
pub fn parse(tokens: &Tokens, tree_sink: &mut dyn TreeSink, entry_point: ParserEntryPoint) {
|
||||
let entry_point: fn(&'_ mut parser::Parser) = match entry_point {
|
||||
ParserEntryPoint::SourceFile => grammar::entry_points::source_file,
|
||||
ParserEntryPoint::Path => grammar::entry_points::path,
|
||||
@ -119,7 +118,7 @@ pub fn parse(
|
||||
ParserEntryPoint::Attr => grammar::entry_points::attr,
|
||||
};
|
||||
|
||||
let mut p = parser::Parser::new(token_source);
|
||||
let mut p = parser::Parser::new(tokens);
|
||||
entry_point(&mut p);
|
||||
let events = p.finish();
|
||||
event::process(tree_sink, events);
|
||||
@ -142,9 +141,9 @@ impl Reparser {
|
||||
///
|
||||
/// Tokens must start with `{`, end with `}` and form a valid brace
|
||||
/// sequence.
|
||||
pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
|
||||
pub fn parse(self, tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
|
||||
let Reparser(r) = self;
|
||||
let mut p = parser::Parser::new(token_source);
|
||||
let mut p = parser::Parser::new(tokens);
|
||||
r(&mut p);
|
||||
let events = p.finish();
|
||||
event::process(tree_sink, events);
|
||||
|
@ -7,9 +7,10 @@ use limit::Limit;
|
||||
|
||||
use crate::{
|
||||
event::Event,
|
||||
tokens::Tokens,
|
||||
ParseError,
|
||||
SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
|
||||
TokenSet, TokenSource, T,
|
||||
TokenSet, T,
|
||||
};
|
||||
|
||||
/// `Parser` struct provides the low-level API for
|
||||
@ -22,7 +23,8 @@ use crate::{
|
||||
/// "start expression, consume number literal,
|
||||
/// finish expression". See `Event` docs for more.
|
||||
pub(crate) struct Parser<'t> {
|
||||
token_source: &'t mut dyn TokenSource,
|
||||
tokens: &'t Tokens,
|
||||
pos: usize,
|
||||
events: Vec<Event>,
|
||||
steps: Cell<u32>,
|
||||
}
|
||||
@ -30,8 +32,8 @@ pub(crate) struct Parser<'t> {
|
||||
static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> {
|
||||
Parser { token_source, events: Vec::new(), steps: Cell::new(0) }
|
||||
pub(super) fn new(tokens: &'t Tokens) -> Parser<'t> {
|
||||
Parser { tokens, pos: 0, events: Vec::new(), steps: Cell::new(0) }
|
||||
}
|
||||
|
||||
pub(crate) fn finish(self) -> Vec<Event> {
|
||||
@ -54,7 +56,7 @@ impl<'t> Parser<'t> {
|
||||
assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
|
||||
self.steps.set(steps + 1);
|
||||
|
||||
self.token_source.lookahead_nth(n).kind
|
||||
self.tokens.get(self.pos + n).kind
|
||||
}
|
||||
|
||||
/// Checks if the current token is `kind`.
|
||||
@ -90,7 +92,7 @@ impl<'t> Parser<'t> {
|
||||
T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
|
||||
T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
|
||||
|
||||
_ => self.token_source.lookahead_nth(n).kind == kind,
|
||||
_ => self.tokens.get(self.pos + n).kind == kind,
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,24 +131,24 @@ impl<'t> Parser<'t> {
|
||||
}
|
||||
|
||||
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
|
||||
let t1 = self.token_source.lookahead_nth(n);
|
||||
let t1 = self.tokens.get(self.pos + n);
|
||||
if t1.kind != k1 || !t1.is_jointed_to_next {
|
||||
return false;
|
||||
}
|
||||
let t2 = self.token_source.lookahead_nth(n + 1);
|
||||
let t2 = self.tokens.get(self.pos + n + 1);
|
||||
t2.kind == k2
|
||||
}
|
||||
|
||||
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
|
||||
let t1 = self.token_source.lookahead_nth(n);
|
||||
let t1 = self.tokens.get(self.pos + n);
|
||||
if t1.kind != k1 || !t1.is_jointed_to_next {
|
||||
return false;
|
||||
}
|
||||
let t2 = self.token_source.lookahead_nth(n + 1);
|
||||
let t2 = self.tokens.get(self.pos + n + 1);
|
||||
if t2.kind != k2 || !t2.is_jointed_to_next {
|
||||
return false;
|
||||
}
|
||||
let t3 = self.token_source.lookahead_nth(n + 2);
|
||||
let t3 = self.tokens.get(self.pos + n + 2);
|
||||
t3.kind == k3
|
||||
}
|
||||
|
||||
@ -156,8 +158,8 @@ impl<'t> Parser<'t> {
|
||||
}
|
||||
|
||||
/// Checks if the current token is contextual keyword with text `t`.
|
||||
pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
|
||||
self.token_source.is_keyword(kw)
|
||||
pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
|
||||
self.tokens.get(self.pos).contextual_kw == kw
|
||||
}
|
||||
|
||||
/// Starts a new node in the syntax tree. All nodes and tokens
|
||||
@ -243,10 +245,7 @@ impl<'t> Parser<'t> {
|
||||
}
|
||||
|
||||
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
|
||||
for _ in 0..n_raw_tokens {
|
||||
self.token_source.bump();
|
||||
}
|
||||
|
||||
self.pos += n_raw_tokens as usize;
|
||||
self.push_event(Event::Token { kind, n_raw_tokens });
|
||||
}
|
||||
|
||||
|
@ -1,9 +1,8 @@
|
||||
use crate::SyntaxKind;
|
||||
use crate::{SyntaxKind, Token};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
type bits = u64;
|
||||
|
||||
pub type IdentKind = u8;
|
||||
|
||||
/// Main input to the parser.
|
||||
///
|
||||
/// A sequence of tokens represented internally as a struct of arrays.
|
||||
@ -11,17 +10,17 @@ pub type IdentKind = u8;
|
||||
pub struct Tokens {
|
||||
kind: Vec<SyntaxKind>,
|
||||
joint: Vec<bits>,
|
||||
ident_kind: Vec<IdentKind>,
|
||||
contextual_kw: Vec<SyntaxKind>,
|
||||
}
|
||||
|
||||
impl Tokens {
|
||||
pub fn push(&mut self, was_joint: bool, kind: SyntaxKind) {
|
||||
self.push_impl(was_joint, kind, 0)
|
||||
self.push_impl(was_joint, kind, SyntaxKind::EOF)
|
||||
}
|
||||
pub fn push_ident(&mut self, ident_kind: IdentKind) {
|
||||
self.push_impl(false, SyntaxKind::IDENT, ident_kind)
|
||||
pub fn push_ident(&mut self, contextual_kw: SyntaxKind) {
|
||||
self.push_impl(false, SyntaxKind::IDENT, contextual_kw)
|
||||
}
|
||||
fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, ctx: IdentKind) {
|
||||
fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, contextual_kw: SyntaxKind) {
|
||||
let idx = self.len();
|
||||
if idx % (bits::BITS as usize) == 0 {
|
||||
self.joint.push(0);
|
||||
@ -30,7 +29,7 @@ impl Tokens {
|
||||
self.set_joint(idx - 1);
|
||||
}
|
||||
self.kind.push(kind);
|
||||
self.ident_kind.push(ctx);
|
||||
self.contextual_kw.push(contextual_kw);
|
||||
}
|
||||
fn set_joint(&mut self, n: usize) {
|
||||
let (idx, b_idx) = self.bit_index(n);
|
||||
@ -49,18 +48,18 @@ impl Tokens {
|
||||
pub fn len(&self) -> usize {
|
||||
self.kind.len()
|
||||
}
|
||||
pub(crate) fn get(&self, idx: usize) -> (SyntaxKind, bool, IdentKind) {
|
||||
pub(crate) fn get(&self, idx: usize) -> Token {
|
||||
if idx > self.len() {
|
||||
return self.eof();
|
||||
}
|
||||
let kind = self.kind[idx];
|
||||
let joint = self.get_joint(idx);
|
||||
let ident_kind = self.ident_kind[idx];
|
||||
(kind, joint, ident_kind)
|
||||
let is_jointed_to_next = self.get_joint(idx);
|
||||
let contextual_kw = self.contextual_kw[idx];
|
||||
Token { kind, is_jointed_to_next, contextual_kw }
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn eof(&self) -> (SyntaxKind, bool, IdentKind) {
|
||||
(SyntaxKind::EOF, false, 0)
|
||||
fn eof(&self) -> Token {
|
||||
Token { kind: SyntaxKind::EOF, is_jointed_to_next: false, contextual_kw: SyntaxKind::EOF }
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user