From 26bfd6023ffbc7fbd66bc4857e6c74b35e7fc9b4 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 14 Nov 2021 22:13:44 +0300 Subject: [PATCH] Switch parser to use tokens --- crates/parser/src/grammar/expressions.rs | 5 +--- crates/parser/src/grammar/items.rs | 12 ++++----- crates/parser/src/grammar/items/adt.rs | 2 +- crates/parser/src/lib.rs | 19 +++++++------- crates/parser/src/parser.rs | 33 ++++++++++++------------ crates/parser/src/tokens.rs | 29 ++++++++++----------- 6 files changed, 47 insertions(+), 53 deletions(-) diff --git a/crates/parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs index 54eb96d84e5..4b9c579a052 100644 --- a/crates/parser/src/grammar/expressions.rs +++ b/crates/parser/src/grammar/expressions.rs @@ -296,10 +296,7 @@ fn lhs(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)> T![&] => { m = p.start(); p.bump(T![&]); - if p.at(IDENT) - && p.at_contextual_kw("raw") - && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) - { + if p.at_contextual_kw(T![raw]) && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) { p.bump_remap(T![raw]); p.bump_any(); } else { diff --git a/crates/parser/src/grammar/items.rs b/crates/parser/src/grammar/items.rs index 39be0e1a192..896efaf3757 100644 --- a/crates/parser/src/grammar/items.rs +++ b/crates/parser/src/grammar/items.rs @@ -122,14 +122,14 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> { has_mods = true; abi(p); } - if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == T![trait] { + if p.at_contextual_kw(T![auto]) && p.nth(1) == T![trait] { p.bump_remap(T![auto]); has_mods = true; } // test default_item // default impl T for Foo {} - if p.at(IDENT) && p.at_contextual_kw("default") { + if p.at_contextual_kw(T![default]) { match p.nth(1) { T![fn] | T![type] | T![const] | T![impl] => { p.bump_remap(T![default]); @@ -176,7 +176,7 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> { // test existential_type // existential type Foo: Fn() -> usize; - if p.at(IDENT) && p.at_contextual_kw("existential") && p.nth(1) == T![type] { + if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] { p.bump_remap(T![existential]); has_mods = true; } @@ -224,10 +224,10 @@ fn opt_item_without_modifiers(p: &mut Parser, m: Marker) -> Result<(), Marker> { T![type] => type_alias(p, m), T![struct] => adt::strukt(p, m), T![enum] => adt::enum_(p, m), - IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => adt::union(p, m), + IDENT if p.at_contextual_kw(T![union]) && p.nth(1) == IDENT => adt::union(p, m), T![macro] => macro_def(p, m), - IDENT if p.at_contextual_kw("macro_rules") && p.nth(1) == BANG => macro_rules(p, m), + IDENT if p.at_contextual_kw(T![macro_rules]) && p.nth(1) == BANG => macro_rules(p, m), T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m), T![static] => consts::static_(p, m), @@ -319,7 +319,7 @@ pub(crate) fn extern_item_list(p: &mut Parser) { } fn macro_rules(p: &mut Parser, m: Marker) { - assert!(p.at_contextual_kw("macro_rules")); + assert!(p.at_contextual_kw(T![macro_rules])); p.bump_remap(T![macro_rules]); p.expect(T![!]); diff --git a/crates/parser/src/grammar/items/adt.rs b/crates/parser/src/grammar/items/adt.rs index c5bd5b14bae..83b7ff05786 100644 --- a/crates/parser/src/grammar/items/adt.rs +++ b/crates/parser/src/grammar/items/adt.rs @@ -10,7 +10,7 @@ pub(super) fn strukt(p: &mut Parser, m: Marker) { // test union_item // struct U { i: i32, f: f32 } pub(super) fn union(p: &mut Parser, m: Marker) { - assert!(p.at_contextual_kw("union")); + assert!(p.at_contextual_kw(T![union])); p.bump_remap(T![union]); struct_or_union(p, m, false); } diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 720ecf6fb62..fd447194bf9 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -26,6 +26,8 @@ pub(crate) use token_set::TokenSet; pub use syntax_kind::SyntaxKind; +use crate::tokens::Tokens; + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ParseError(pub Box); @@ -53,6 +55,7 @@ pub struct Token { /// Is the current token joined to the next one (`> >` vs `>>`). pub is_jointed_to_next: bool, + pub contextual_kw: SyntaxKind, } /// `TreeSink` abstracts details of a particular syntax tree implementation. @@ -93,15 +96,11 @@ pub enum ParserEntryPoint { } /// Parse given tokens into the given sink as a rust file. -pub fn parse_source_file(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { - parse(token_source, tree_sink, ParserEntryPoint::SourceFile); +pub fn parse_source_file(tokens: &Tokens, tree_sink: &mut dyn TreeSink) { + parse(tokens, tree_sink, ParserEntryPoint::SourceFile); } -pub fn parse( - token_source: &mut dyn TokenSource, - tree_sink: &mut dyn TreeSink, - entry_point: ParserEntryPoint, -) { +pub fn parse(tokens: &Tokens, tree_sink: &mut dyn TreeSink, entry_point: ParserEntryPoint) { let entry_point: fn(&'_ mut parser::Parser) = match entry_point { ParserEntryPoint::SourceFile => grammar::entry_points::source_file, ParserEntryPoint::Path => grammar::entry_points::path, @@ -119,7 +118,7 @@ pub fn parse( ParserEntryPoint::Attr => grammar::entry_points::attr, }; - let mut p = parser::Parser::new(token_source); + let mut p = parser::Parser::new(tokens); entry_point(&mut p); let events = p.finish(); event::process(tree_sink, events); @@ -142,9 +141,9 @@ impl Reparser { /// /// Tokens must start with `{`, end with `}` and form a valid brace /// sequence. - pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { + pub fn parse(self, tokens: &Tokens, tree_sink: &mut dyn TreeSink) { let Reparser(r) = self; - let mut p = parser::Parser::new(token_source); + let mut p = parser::Parser::new(tokens); r(&mut p); let events = p.finish(); event::process(tree_sink, events); diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 44c5f8e12f5..759f87f4966 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -7,9 +7,10 @@ use limit::Limit; use crate::{ event::Event, + tokens::Tokens, ParseError, SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, - TokenSet, TokenSource, T, + TokenSet, T, }; /// `Parser` struct provides the low-level API for @@ -22,7 +23,8 @@ use crate::{ /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. pub(crate) struct Parser<'t> { - token_source: &'t mut dyn TokenSource, + tokens: &'t Tokens, + pos: usize, events: Vec, steps: Cell, } @@ -30,8 +32,8 @@ pub(crate) struct Parser<'t> { static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000); impl<'t> Parser<'t> { - pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> { - Parser { token_source, events: Vec::new(), steps: Cell::new(0) } + pub(super) fn new(tokens: &'t Tokens) -> Parser<'t> { + Parser { tokens, pos: 0, events: Vec::new(), steps: Cell::new(0) } } pub(crate) fn finish(self) -> Vec { @@ -54,7 +56,7 @@ impl<'t> Parser<'t> { assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck"); self.steps.set(steps + 1); - self.token_source.lookahead_nth(n).kind + self.tokens.get(self.pos + n).kind } /// Checks if the current token is `kind`. @@ -90,7 +92,7 @@ impl<'t> Parser<'t> { T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), - _ => self.token_source.lookahead_nth(n).kind == kind, + _ => self.tokens.get(self.pos + n).kind == kind, } } @@ -129,24 +131,24 @@ impl<'t> Parser<'t> { } fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { - let t1 = self.token_source.lookahead_nth(n); + let t1 = self.tokens.get(self.pos + n); if t1.kind != k1 || !t1.is_jointed_to_next { return false; } - let t2 = self.token_source.lookahead_nth(n + 1); + let t2 = self.tokens.get(self.pos + n + 1); t2.kind == k2 } fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { - let t1 = self.token_source.lookahead_nth(n); + let t1 = self.tokens.get(self.pos + n); if t1.kind != k1 || !t1.is_jointed_to_next { return false; } - let t2 = self.token_source.lookahead_nth(n + 1); + let t2 = self.tokens.get(self.pos + n + 1); if t2.kind != k2 || !t2.is_jointed_to_next { return false; } - let t3 = self.token_source.lookahead_nth(n + 2); + let t3 = self.tokens.get(self.pos + n + 2); t3.kind == k3 } @@ -156,8 +158,8 @@ impl<'t> Parser<'t> { } /// Checks if the current token is contextual keyword with text `t`. - pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { - self.token_source.is_keyword(kw) + pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool { + self.tokens.get(self.pos).contextual_kw == kw } /// Starts a new node in the syntax tree. All nodes and tokens @@ -243,10 +245,7 @@ impl<'t> Parser<'t> { } fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - for _ in 0..n_raw_tokens { - self.token_source.bump(); - } - + self.pos += n_raw_tokens as usize; self.push_event(Event::Token { kind, n_raw_tokens }); } diff --git a/crates/parser/src/tokens.rs b/crates/parser/src/tokens.rs index 2fd807f9b3c..495d9713ea9 100644 --- a/crates/parser/src/tokens.rs +++ b/crates/parser/src/tokens.rs @@ -1,9 +1,8 @@ -use crate::SyntaxKind; +use crate::{SyntaxKind, Token}; +#[allow(non_camel_case_types)] type bits = u64; -pub type IdentKind = u8; - /// Main input to the parser. /// /// A sequence of tokens represented internally as a struct of arrays. @@ -11,17 +10,17 @@ pub type IdentKind = u8; pub struct Tokens { kind: Vec, joint: Vec, - ident_kind: Vec, + contextual_kw: Vec, } impl Tokens { pub fn push(&mut self, was_joint: bool, kind: SyntaxKind) { - self.push_impl(was_joint, kind, 0) + self.push_impl(was_joint, kind, SyntaxKind::EOF) } - pub fn push_ident(&mut self, ident_kind: IdentKind) { - self.push_impl(false, SyntaxKind::IDENT, ident_kind) + pub fn push_ident(&mut self, contextual_kw: SyntaxKind) { + self.push_impl(false, SyntaxKind::IDENT, contextual_kw) } - fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, ctx: IdentKind) { + fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, contextual_kw: SyntaxKind) { let idx = self.len(); if idx % (bits::BITS as usize) == 0 { self.joint.push(0); @@ -30,7 +29,7 @@ impl Tokens { self.set_joint(idx - 1); } self.kind.push(kind); - self.ident_kind.push(ctx); + self.contextual_kw.push(contextual_kw); } fn set_joint(&mut self, n: usize) { let (idx, b_idx) = self.bit_index(n); @@ -49,18 +48,18 @@ impl Tokens { pub fn len(&self) -> usize { self.kind.len() } - pub(crate) fn get(&self, idx: usize) -> (SyntaxKind, bool, IdentKind) { + pub(crate) fn get(&self, idx: usize) -> Token { if idx > self.len() { return self.eof(); } let kind = self.kind[idx]; - let joint = self.get_joint(idx); - let ident_kind = self.ident_kind[idx]; - (kind, joint, ident_kind) + let is_jointed_to_next = self.get_joint(idx); + let contextual_kw = self.contextual_kw[idx]; + Token { kind, is_jointed_to_next, contextual_kw } } #[cold] - fn eof(&self) -> (SyntaxKind, bool, IdentKind) { - (SyntaxKind::EOF, false, 0) + fn eof(&self) -> Token { + Token { kind: SyntaxKind::EOF, is_jointed_to_next: false, contextual_kw: SyntaxKind::EOF } } }