From 3517c175ac537b47dd3e36cc7fb1edd60b02c039 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 21:08:59 +0300 Subject: [PATCH 01/10] rename Sink -> TreeSink --- crates/ra_syntax/src/parsing/builder.rs | 4 ++-- crates/ra_syntax/src/parsing/parser_impl.rs | 4 ++-- crates/ra_syntax/src/parsing/parser_impl/event.rs | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index 9090c60c2ea..118f43b2caf 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,5 +1,5 @@ use crate::{ - parsing::parser_impl::Sink, + parsing::parser_impl::TreeSink, syntax_node::{GreenNode, RaTypes}, SmolStr, SyntaxKind, SyntaxError, }; @@ -17,7 +17,7 @@ impl GreenBuilder { } } -impl Sink for GreenBuilder { +impl TreeSink for GreenBuilder { type Tree = (GreenNode, Vec<SyntaxError>); fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 8cce1ab01ce..02baed76b6c 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -18,7 +18,7 @@ use crate::{ use crate::SyntaxKind::{self, EOF, TOMBSTONE}; -pub(super) trait Sink { +pub(super) trait TreeSink { type Tree; /// Adds new leaf to the current branch. @@ -40,7 +40,7 @@ pub(super) trait Sink { } /// Parse a sequence of tokens into the representative node tree -pub(super) fn parse_with<S: Sink>( +pub(super) fn parse_with<S: TreeSink>( sink: S, text: &str, tokens: &[Token], diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs index 2ddbdd34d22..9663fba3590 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/event.rs @@ -3,7 +3,7 @@ //! parser, so as to allow to evolve the tree representation //! and the parser algorithm independently. //! -//! The `Sink` trait is the bridge between the parser and the +//! The `TreeSink` trait is the bridge between the parser and the //! tree builder: the parser produces a stream of events like //! `start node`, `finish node`, and `FileBuilder` converts //! this stream to a real tree. @@ -20,7 +20,7 @@ use crate::{ }, parsing::{ lexer::Token, - parser_impl::Sink, + parser_impl::TreeSink, }, }; @@ -93,7 +93,7 @@ impl Event { } } -pub(super) struct EventProcessor<'a, S: Sink> { +pub(super) struct EventProcessor<'a, S: TreeSink> { sink: S, text_pos: TextUnit, text: &'a str, @@ -102,7 +102,7 @@ pub(super) struct EventProcessor<'a, S: Sink> { events: &'a mut [Event], } -impl<'a, S: Sink> EventProcessor<'a, S> { +impl<'a, S: TreeSink> EventProcessor<'a, S> { pub(super) fn new( sink: S, text: &'a str, From 0c81b9deeed81bfb2cf8142af9d748317d5d71a1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 21:50:07 +0300 Subject: [PATCH 02/10] route parsing via TokenSource trait --- crates/ra_syntax/src/parsing/parser_api.rs | 4 +- crates/ra_syntax/src/parsing/parser_impl.rs | 50 +++++++------- .../src/parsing/parser_impl/input.rs | 69 +++++++++---------- 3 files changed, 59 insertions(+), 64 deletions(-) diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 781c407debb..813ae494c52 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -17,7 +17,9 @@ use crate::{ /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); +pub(crate) struct Parser<'t>( + pub(super) ParserImpl<crate::parsing::parser_impl::input::ParserInput<'t>>, +); impl<'t> Parser<'t> { /// Returns the kind of the current token. diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 02baed76b6c..c0d2b6ec12c 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -1,5 +1,5 @@ mod event; -mod input; +pub(crate) mod input; use std::cell::Cell; @@ -11,7 +11,7 @@ use crate::{ parser_api::Parser, parser_impl::{ event::{Event, EventProcessor}, - input::{InputPosition, ParserInput}, + input::InputPosition, }, }, }; @@ -39,6 +39,12 @@ pub(super) trait TreeSink { fn finish(self) -> Self::Tree; } +pub(super) trait TokenSource { + fn token_kind(&self, pos: InputPosition) -> SyntaxKind; + fn is_token_joint_to_next(&self, pos: InputPosition) -> bool; + fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool; +} + /// Parse a sequence of tokens into the representative node tree pub(super) fn parse_with<S: TreeSink>( sink: S, @@ -48,7 +54,7 @@ pub(super) fn parse_with<S: TreeSink>( ) -> S::Tree { let mut events = { let input = input::ParserInput::new(text, tokens); - let parser_impl = ParserImpl::new(&input); + let parser_impl = ParserImpl::new(input); let mut parser_api = Parser(parser_impl); parser(&mut parser_api); parser_api.0.into_events() @@ -59,17 +65,17 @@ pub(super) fn parse_with<S: TreeSink>( /// Implementation details of `Parser`, extracted /// to a separate struct in order not to pollute /// the public API of the `Parser`. -pub(super) struct ParserImpl<'t> { - parser_input: &'t ParserInput<'t>, +pub(super) struct ParserImpl<S> { + token_source: S, pos: InputPosition, events: Vec<Event>, steps: Cell<u32>, } -impl<'t> ParserImpl<'t> { - fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { +impl<S: TokenSource> ParserImpl<S> { + fn new(token_source: S) -> ParserImpl<S> { ParserImpl { - parser_input: inp, + token_source, pos: InputPosition::new(), events: Vec::new(), steps: Cell::new(0), @@ -82,11 +88,9 @@ impl<'t> ParserImpl<'t> { } pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.parser_input.kind(self.pos); - let c2 = self.parser_input.kind(self.pos + 1); - if self.parser_input.token_start_at(self.pos + 1) - == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) - { + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + if self.token_source.is_token_joint_to_next(self.pos) { Some((c1, c2)) } else { None @@ -94,14 +98,11 @@ impl<'t> ParserImpl<'t> { } pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.parser_input.kind(self.pos); - let c2 = self.parser_input.kind(self.pos + 1); - let c3 = self.parser_input.kind(self.pos + 2); - if self.parser_input.token_start_at(self.pos + 1) - == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) - && self.parser_input.token_start_at(self.pos + 2) - == self.parser_input.token_start_at(self.pos + 1) - + self.parser_input.token_len(self.pos + 1) + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + let c3 = self.token_source.token_kind(self.pos + 2); + if self.token_source.is_token_joint_to_next(self.pos) + && self.token_source.is_token_joint_to_next(self.pos + 1) { Some((c1, c2, c3)) } else { @@ -114,12 +115,11 @@ impl<'t> ParserImpl<'t> { let steps = self.steps.get(); assert!(steps <= 10_000_000, "the parser seems stuck"); self.steps.set(steps + 1); - - self.parser_input.kind(self.pos + n) + self.token_source.token_kind(self.pos + n) } - pub(super) fn at_kw(&self, t: &str) -> bool { - self.parser_input.token_text(self.pos) == t + pub(super) fn at_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(self.pos, kw) } /// Start parsing right behind the last event. diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 275d949189a..8ebbd38259f 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -1,10 +1,40 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::lexer::Token, + parsing::{ + parser_impl::TokenSource, + lexer::Token, + }, }; use std::ops::{Add, AddAssign}; +impl<'t> TokenSource for ParserInput<'t> { + fn token_kind(&self, pos: InputPosition) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { + let idx_curr = pos.0 as usize; + let idx_next = pos.0 as usize; + if !(idx_next < self.tokens.len()) { + return true; + } + self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + } + fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return false; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + + self.text[range] == *kw + } +} + pub(crate) struct ParserInput<'t> { text: &'t str, /// start position of each token(expect whitespace and comment) @@ -41,43 +71,6 @@ impl<'t> ParserInput<'t> { ParserInput { text, start_offsets, tokens } } - - /// Get the syntax kind of token at given input position. - pub fn kind(&self, pos: InputPosition) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return EOF; - } - self.tokens[idx].kind - } - - /// Get the length of a token at given input position. - pub fn token_len(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.tokens[idx].len - } - - /// Get the start position of a taken at given input position. - pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.start_offsets[idx] - } - - /// Get the raw text of a token at given input position. - pub fn token_text(&self, pos: InputPosition) -> &'t str { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return ""; - } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); - &self.text[range] - } } #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] From d2bce118ae72ee5cf96b8c6ac687914cb842363c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 22:02:03 +0300 Subject: [PATCH 03/10] switch to dynamic dispatch for TokenSource Benchmarks show no difference. This is probably because we are bottlenecked on memory allocations, and we should fix that, but we are not optimizing for performance just yet. changes. Lines starting # with '#' will be ignored, and an empty message aborts the commit. # # On branch token-source # Changes to be committed: # modified: crates/ra_syntax/src/parsing/parser_api.rs # modified: crates/ra_syntax/src/parsing/parser_impl.rs # --- crates/ra_syntax/src/parsing/parser_api.rs | 6 ++---- crates/ra_syntax/src/parsing/parser_impl.rs | 10 +++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 813ae494c52..aed23a6a491 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -4,7 +4,7 @@ use crate::{ SyntaxKind::{self, ERROR}, parsing::{ token_set::TokenSet, - parser_impl::ParserImpl + parser_impl::ParserImpl, }, }; @@ -17,9 +17,7 @@ use crate::{ /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t>( - pub(super) ParserImpl<crate::parsing::parser_impl::input::ParserInput<'t>>, -); +pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); impl<'t> Parser<'t> { /// Returns the kind of the current token. diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index c0d2b6ec12c..96de32fc256 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -54,7 +54,7 @@ pub(super) fn parse_with<S: TreeSink>( ) -> S::Tree { let mut events = { let input = input::ParserInput::new(text, tokens); - let parser_impl = ParserImpl::new(input); + let parser_impl = ParserImpl::new(&input); let mut parser_api = Parser(parser_impl); parser(&mut parser_api); parser_api.0.into_events() @@ -65,15 +65,15 @@ pub(super) fn parse_with<S: TreeSink>( /// Implementation details of `Parser`, extracted /// to a separate struct in order not to pollute /// the public API of the `Parser`. -pub(super) struct ParserImpl<S> { - token_source: S, +pub(super) struct ParserImpl<'a> { + token_source: &'a dyn TokenSource, pos: InputPosition, events: Vec<Event>, steps: Cell<u32>, } -impl<S: TokenSource> ParserImpl<S> { - fn new(token_source: S) -> ParserImpl<S> { +impl<'a> ParserImpl<'a> { + fn new(token_source: &'a dyn TokenSource) -> ParserImpl<'a> { ParserImpl { token_source, pos: InputPosition::new(), From 2b5e336ce7172914686b33c8ac1522911366fcf0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 22:19:12 +0300 Subject: [PATCH 04/10] move abstract traits to top --- crates/ra_syntax/src/parsing.rs | 50 ++++++++++++++++++- crates/ra_syntax/src/parsing/builder.rs | 2 +- crates/ra_syntax/src/parsing/parser_impl.rs | 40 ++------------- .../src/parsing/parser_impl/input.rs | 32 ++---------- 4 files changed, 59 insertions(+), 65 deletions(-) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 761accd7be4..6c2c5f78b77 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -8,7 +8,7 @@ mod grammar; mod reparsing; use crate::{ - SyntaxError, + SyntaxError, SyntaxKind, SmolStr, parsing::builder::GreenBuilder, syntax_node::GreenNode, }; @@ -23,3 +23,51 @@ pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { parser_impl::parse_with(GreenBuilder::new(), text, &tokens, grammar::root); (green, errors) } + +/// `TreeSink` abstracts details of a particular syntax tree implementation. +trait TreeSink { + type Tree; + + /// Adds new leaf to the current branch. + fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); + + /// Start new branch and make it current. + fn start_branch(&mut self, kind: SyntaxKind); + + /// Finish current branch and restore previous + /// branch as current. + fn finish_branch(&mut self); + + fn error(&mut self, error: SyntaxError); + + /// Complete tree building. Make sure that + /// `start_branch` and `finish_branch` calls + /// are paired! + fn finish(self) -> Self::Tree; +} + +/// `TokenSource` abstracts the source of the tokens parser operates one. +/// +/// Hopefully this will allow us to treat text and token trees in the same way! +trait TokenSource { + fn token_kind(&self, pos: TokenPos) -> SyntaxKind; + fn is_token_joint_to_next(&self, pos: TokenPos) -> bool; + fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool; +} + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Default)] +pub(crate) struct TokenPos(pub u32); + +impl std::ops::Add<u32> for TokenPos { + type Output = TokenPos; + + fn add(self, rhs: u32) -> TokenPos { + TokenPos(self.0 + rhs) + } +} + +impl std::ops::AddAssign<u32> for TokenPos { + fn add_assign(&mut self, rhs: u32) { + self.0 += rhs + } +} diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index 118f43b2caf..a05e7f84b5c 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,5 +1,5 @@ use crate::{ - parsing::parser_impl::TreeSink, + parsing::TreeSink, syntax_node::{GreenNode, RaTypes}, SmolStr, SyntaxKind, SyntaxError, }; diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 96de32fc256..89439e07465 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -4,47 +4,17 @@ pub(crate) mod input; use std::cell::Cell; use crate::{ - SmolStr, - syntax_error::{ParseError, SyntaxError}, + syntax_error::ParseError, parsing::{ + TreeSink, TokenSource, TokenPos, lexer::Token, parser_api::Parser, - parser_impl::{ - event::{Event, EventProcessor}, - input::InputPosition, - }, + parser_impl::event::{Event, EventProcessor}, }, }; use crate::SyntaxKind::{self, EOF, TOMBSTONE}; -pub(super) trait TreeSink { - type Tree; - - /// Adds new leaf to the current branch. - fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); - - /// Start new branch and make it current. - fn start_branch(&mut self, kind: SyntaxKind); - - /// Finish current branch and restore previous - /// branch as current. - fn finish_branch(&mut self); - - fn error(&mut self, error: SyntaxError); - - /// Complete tree building. Make sure that - /// `start_branch` and `finish_branch` calls - /// are paired! - fn finish(self) -> Self::Tree; -} - -pub(super) trait TokenSource { - fn token_kind(&self, pos: InputPosition) -> SyntaxKind; - fn is_token_joint_to_next(&self, pos: InputPosition) -> bool; - fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool; -} - /// Parse a sequence of tokens into the representative node tree pub(super) fn parse_with<S: TreeSink>( sink: S, @@ -67,7 +37,7 @@ pub(super) fn parse_with<S: TreeSink>( /// the public API of the `Parser`. pub(super) struct ParserImpl<'a> { token_source: &'a dyn TokenSource, - pos: InputPosition, + pos: TokenPos, events: Vec<Event>, steps: Cell<u32>, } @@ -76,7 +46,7 @@ impl<'a> ParserImpl<'a> { fn new(token_source: &'a dyn TokenSource) -> ParserImpl<'a> { ParserImpl { token_source, - pos: InputPosition::new(), + pos: TokenPos::default(), events: Vec::new(), steps: Cell::new(0), } diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 8ebbd38259f..e9735e5260c 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -1,22 +1,21 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, parsing::{ + TokenPos, parser_impl::TokenSource, lexer::Token, }, }; -use std::ops::{Add, AddAssign}; - impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: InputPosition) -> SyntaxKind { + fn token_kind(&self, pos: TokenPos) -> SyntaxKind { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return EOF; } self.tokens[idx].kind } - fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { + fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { let idx_curr = pos.0 as usize; let idx_next = pos.0 as usize; if !(idx_next < self.tokens.len()) { @@ -24,7 +23,7 @@ impl<'t> TokenSource for ParserInput<'t> { } self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] } - fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { + fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return false; @@ -72,26 +71,3 @@ impl<'t> ParserInput<'t> { ParserInput { text, start_offsets, tokens } } } - -#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub(crate) struct InputPosition(u32); - -impl InputPosition { - pub fn new() -> Self { - InputPosition(0) - } -} - -impl Add<u32> for InputPosition { - type Output = InputPosition; - - fn add(self, rhs: u32) -> InputPosition { - InputPosition(self.0 + rhs) - } -} - -impl AddAssign<u32> for InputPosition { - fn add_assign(&mut self, rhs: u32) { - self.0 += rhs - } -} From e72ad0a2faac98972544dd42316ccf8090717102 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 22:27:49 +0300 Subject: [PATCH 05/10] fix off by one error --- crates/ra_syntax/src/parsing/parser_impl/input.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index e9735e5260c..11b32b9cec2 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -17,7 +17,7 @@ impl<'t> TokenSource for ParserInput<'t> { } fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize; + let idx_next = pos.0 as usize + 1; if !(idx_next < self.tokens.len()) { return true; } From 2acb21e8f72896c7a2855ca6042d0ee1870d8643 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 22:44:06 +0300 Subject: [PATCH 06/10] merge parse_impl and parser_api --- crates/ra_syntax/src/parsing/parser_api.rs | 108 +++++++++++-- crates/ra_syntax/src/parsing/parser_impl.rs | 163 ++------------------ 2 files changed, 101 insertions(+), 170 deletions(-) diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index aed23a6a491..92d7895d3ec 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -1,10 +1,14 @@ +use std::cell::Cell; + use drop_bomb::DropBomb; use crate::{ - SyntaxKind::{self, ERROR}, + syntax_error::ParseError, + SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, parsing::{ + TokenSource, TokenPos, token_set::TokenSet, - parser_impl::ParserImpl, + parser_impl::event::Event, }, }; @@ -17,9 +21,22 @@ use crate::{ /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); +pub(crate) struct Parser<'t> { + token_source: &'t dyn TokenSource, + pos: TokenPos, + events: Vec<Event>, + steps: Cell<u32>, +} impl<'t> Parser<'t> { + pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { + Parser { token_source, pos: TokenPos::default(), events: Vec::new(), steps: Cell::new(0) } + } + + pub(crate) fn finish(self) -> Vec<Event> { + self.events + } + /// Returns the kind of the current token. /// If parser has already reached the end of input, /// the special `EOF` kind is returned. @@ -32,7 +49,13 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `>>`. pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - self.0.current2() + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + if self.token_source.is_token_joint_to_next(self.pos) { + Some((c1, c2)) + } else { + None + } } /// Returns the kinds of the current three tokens, if they are not separated @@ -40,13 +63,25 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `=>>`. pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - self.0.current3() + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + let c3 = self.token_source.token_kind(self.pos + 2); + if self.token_source.is_token_joint_to_next(self.pos) + && self.token_source.is_token_joint_to_next(self.pos + 1) + { + Some((c1, c2, c3)) + } else { + None + } } /// Lookahead operation: returns the kind of the next nth /// token. pub(crate) fn nth(&self, n: u32) -> SyntaxKind { - self.0.nth(n) + let steps = self.steps.get(); + assert!(steps <= 10_000_000, "the parser seems stuck"); + self.steps.set(steps + 1); + self.token_source.token_kind(self.pos + n) } /// Checks if the current token is `kind`. @@ -60,20 +95,26 @@ impl<'t> Parser<'t> { } /// Checks if the current token is contextual keyword with text `t`. - pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { - self.0.at_kw(t) + pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(self.pos, kw) } /// Starts a new node in the syntax tree. All nodes and tokens /// consumed between the `start` and the corresponding `Marker::complete` /// belong to the same node. pub(crate) fn start(&mut self) -> Marker { - Marker::new(self.0.start()) + let pos = self.events.len() as u32; + self.push_event(Event::tombstone()); + Marker::new(pos) } /// Advances the parser by one token unconditionally. pub(crate) fn bump(&mut self) { - self.0.bump(); + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1); } /// Advances the parser by one token, remapping its kind. @@ -83,14 +124,18 @@ impl<'t> Parser<'t> { /// `union` keyword, and keyword is what ends up in the /// final tree. pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { - self.0.bump_remap(kind); + if self.nth(0) == EOF { + // TODO: panic!? + return; + } + self.do_bump(kind, 1); } /// Advances the parser by `n` tokens, remapping its kind. /// This is useful to create compound tokens from parts. For /// example, an `<<` token is two consecutive remapped `<` tokens pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { - self.0.bump_compound(kind, n); + self.do_bump(kind, n); } /// Emit error with the `message` @@ -98,7 +143,8 @@ impl<'t> Parser<'t> { /// structured errors with spans and notes, like rustc /// does. pub(crate) fn error<T: Into<String>>(&mut self, message: T) { - self.0.error(message.into()) + let msg = ParseError(message.into()); + self.push_event(Event::Error { msg }) } /// Consume the next token if `kind` matches. @@ -136,6 +182,15 @@ impl<'t> Parser<'t> { m.complete(self, ERROR); }; } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.pos += u32::from(n_raw_tokens); + self.push_event(Event::Token { kind, n_raw_tokens }); + } + + fn push_event(&mut self, event: Event) { + self.events.push(event) + } } /// See `Parser::start`. @@ -154,7 +209,14 @@ impl Marker { /// operation like `.precede()` to deal with forward_parent. pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { self.bomb.defuse(); - p.0.complete(self.pos, kind); + let idx = self.pos as usize; + match p.events[idx] { + Event::Start { kind: ref mut slot, .. } => { + *slot = kind; + } + _ => unreachable!(), + } + p.push_event(Event::Finish); CompletedMarker::new(self.pos, kind) } @@ -162,7 +224,13 @@ impl Marker { /// are attached to its parent instead. pub(crate) fn abandon(mut self, p: &mut Parser) { self.bomb.defuse(); - p.0.abandon(self.pos); + let idx = self.pos as usize; + if idx == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), + _ => unreachable!(), + } + } } } @@ -186,7 +254,15 @@ impl CompletedMarker { /// then mark `NEWSTART` as `START`'s parent with saving its relative /// distance to `NEWSTART` into forward_parent(=2 in this case); pub(crate) fn precede(self, p: &mut Parser) -> Marker { - Marker::new(p.0.precede(self.0)) + let new_pos = p.start(); + let idx = self.0 as usize; + match p.events[idx] { + Event::Start { ref mut forward_parent, .. } => { + *forward_parent = Some(new_pos.pos - self.0); + } + _ => unreachable!(), + } + new_pos } pub(crate) fn kind(&self) -> SyntaxKind { diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 89439e07465..6eed0e656a4 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -1,20 +1,13 @@ -mod event; -pub(crate) mod input; +pub(super) mod event; +pub(super) mod input; -use std::cell::Cell; - -use crate::{ - syntax_error::ParseError, - parsing::{ - TreeSink, TokenSource, TokenPos, - lexer::Token, - parser_api::Parser, - parser_impl::event::{Event, EventProcessor}, - }, +use crate::parsing::{ + TreeSink, TokenSource, + lexer::Token, + parser_api::Parser, + parser_impl::event::EventProcessor, }; -use crate::SyntaxKind::{self, EOF, TOMBSTONE}; - /// Parse a sequence of tokens into the representative node tree pub(super) fn parse_with<S: TreeSink>( sink: S, @@ -24,147 +17,9 @@ pub(super) fn parse_with<S: TreeSink>( ) -> S::Tree { let mut events = { let input = input::ParserInput::new(text, tokens); - let parser_impl = ParserImpl::new(&input); - let mut parser_api = Parser(parser_impl); + let mut parser_api = Parser::new(&input); parser(&mut parser_api); - parser_api.0.into_events() + parser_api.finish() }; EventProcessor::new(sink, text, tokens, &mut events).process().finish() } - -/// Implementation details of `Parser`, extracted -/// to a separate struct in order not to pollute -/// the public API of the `Parser`. -pub(super) struct ParserImpl<'a> { - token_source: &'a dyn TokenSource, - pos: TokenPos, - events: Vec<Event>, - steps: Cell<u32>, -} - -impl<'a> ParserImpl<'a> { - fn new(token_source: &'a dyn TokenSource) -> ParserImpl<'a> { - ParserImpl { - token_source, - pos: TokenPos::default(), - events: Vec::new(), - steps: Cell::new(0), - } - } - - fn into_events(self) -> Vec<Event> { - assert_eq!(self.nth(0), EOF); - self.events - } - - pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - if self.token_source.is_token_joint_to_next(self.pos) { - Some((c1, c2)) - } else { - None - } - } - - pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - let c3 = self.token_source.token_kind(self.pos + 2); - if self.token_source.is_token_joint_to_next(self.pos) - && self.token_source.is_token_joint_to_next(self.pos + 1) - { - Some((c1, c2, c3)) - } else { - None - } - } - - /// Get the syntax kind of the nth token. - pub(super) fn nth(&self, n: u32) -> SyntaxKind { - let steps = self.steps.get(); - assert!(steps <= 10_000_000, "the parser seems stuck"); - self.steps.set(steps + 1); - self.token_source.token_kind(self.pos + n) - } - - pub(super) fn at_kw(&self, kw: &str) -> bool { - self.token_source.is_keyword(self.pos, kw) - } - - /// Start parsing right behind the last event. - pub(super) fn start(&mut self) -> u32 { - let pos = self.events.len() as u32; - self.push_event(Event::tombstone()); - pos - } - - /// Advances the parser by one token unconditionally. - pub(super) fn bump(&mut self) { - let kind = self.nth(0); - if kind == EOF { - return; - } - self.do_bump(kind, 1); - } - - pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { - if self.nth(0) == EOF { - // TODO: panic!? - return; - } - self.do_bump(kind, 1); - } - - pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { - self.do_bump(kind, n); - } - - fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - self.pos += u32::from(n_raw_tokens); - self.push_event(Event::Token { kind, n_raw_tokens }); - } - - /// Append one Error event to the back of events. - pub(super) fn error(&mut self, msg: String) { - self.push_event(Event::Error { msg: ParseError(msg) }) - } - - /// Complete an event with appending a `Finish` event. - pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { - match self.events[pos as usize] { - Event::Start { kind: ref mut slot, .. } => { - *slot = kind; - } - _ => unreachable!(), - } - self.push_event(Event::Finish); - } - - /// Ignore the dummy `Start` event. - pub(super) fn abandon(&mut self, pos: u32) { - let idx = pos as usize; - if idx == self.events.len() - 1 { - match self.events.pop() { - Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), - _ => unreachable!(), - } - } - } - - /// Save the relative distance of a completed event to its forward_parent. - pub(super) fn precede(&mut self, pos: u32) -> u32 { - let new_pos = self.start(); - match self.events[pos as usize] { - Event::Start { ref mut forward_parent, .. } => { - *forward_parent = Some(new_pos - pos); - } - _ => unreachable!(), - } - new_pos - } - - fn push_event(&mut self, event: Event) { - self.events.push(event) - } -} From cce23fddba4241202ebd29cce44db4ce9a08793a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 22:52:32 +0300 Subject: [PATCH 07/10] flattern module structure --- crates/ra_syntax/src/parsing.rs | 29 +++++++++++++++---- .../src/parsing/{parser_impl => }/event.rs | 4 +-- .../src/parsing/{parser_impl => }/input.rs | 3 +- crates/ra_syntax/src/parsing/parser_api.rs | 2 +- crates/ra_syntax/src/parsing/reparsing.rs | 6 ++-- 5 files changed, 30 insertions(+), 14 deletions(-) rename crates/ra_syntax/src/parsing/{parser_impl => }/event.rs (99%) rename crates/ra_syntax/src/parsing/{parser_impl => }/input.rs (97%) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 6c2c5f78b77..f74c365d5cc 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -2,14 +2,20 @@ mod token_set; mod builder; mod lexer; -mod parser_impl; +mod event; +mod input; mod parser_api; mod grammar; mod reparsing; use crate::{ SyntaxError, SyntaxKind, SmolStr, - parsing::builder::GreenBuilder, + parsing::{ + builder::GreenBuilder, + input::ParserInput, + event::EventProcessor, + parser_api::Parser, + }, syntax_node::GreenNode, }; @@ -19,9 +25,22 @@ pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { let tokens = tokenize(&text); - let (green, errors) = - parser_impl::parse_with(GreenBuilder::new(), text, &tokens, grammar::root); - (green, errors) + parse_with(GreenBuilder::new(), text, &tokens, grammar::root) +} + +fn parse_with<S: TreeSink>( + tree_sink: S, + text: &str, + tokens: &[Token], + f: fn(&mut Parser), +) -> S::Tree { + let mut events = { + let input = ParserInput::new(text, &tokens); + let mut p = Parser::new(&input); + f(&mut p); + p.finish() + }; + EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish() } /// `TreeSink` abstracts details of a particular syntax tree implementation. diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/event.rs similarity index 99% rename from crates/ra_syntax/src/parsing/parser_impl/event.rs rename to crates/ra_syntax/src/parsing/event.rs index 9663fba3590..893a42e9ac6 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ b/crates/ra_syntax/src/parsing/event.rs @@ -20,7 +20,7 @@ use crate::{ }, parsing::{ lexer::Token, - parser_impl::TreeSink, + TreeSink, }, }; @@ -113,7 +113,7 @@ impl<'a, S: TreeSink> EventProcessor<'a, S> { } /// Generate the syntax tree with the control of events. - pub(super) fn process(mut self) -> S { + pub(crate) fn process(mut self) -> S { let mut forward_parents = Vec::new(); for i in 0..self.events.len() { diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/input.rs similarity index 97% rename from crates/ra_syntax/src/parsing/parser_impl/input.rs rename to crates/ra_syntax/src/parsing/input.rs index 11b32b9cec2..0f1810df55b 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/input.rs @@ -1,8 +1,7 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, parsing::{ - TokenPos, - parser_impl::TokenSource, + TokenPos, TokenSource, lexer::Token, }, }; diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 92d7895d3ec..99f6183a4e9 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -8,7 +8,7 @@ use crate::{ parsing::{ TokenSource, TokenPos, token_set::TokenSet, - parser_impl::event::Event, + event::Event, }, }; diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index edf3fa2910b..f45326dffef 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -4,8 +4,7 @@ use crate::{ syntax_node::{GreenNode, SyntaxNode}, syntax_error::SyntaxError, parsing::{ - grammar, - parser_impl, + grammar, parse_with, builder::GreenBuilder, parser_api::Parser, lexer::{tokenize, Token}, @@ -62,8 +61,7 @@ fn reparse_block<'node>( if !is_balanced(&tokens) { return None; } - let (green, new_errors) = - parser_impl::parse_with(GreenBuilder::new(), &text, &tokens, reparser); + let (green, new_errors) = parse_with(GreenBuilder::new(), &text, &tokens, reparser); Some((node, green, new_errors)) } From 4c1f9b8d4e9ab9ba3b16d2b03f3c8bcc7f61706e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 22:58:56 +0300 Subject: [PATCH 08/10] remove TokenPos --- crates/ra_syntax/src/parsing.rs | 23 +++-------------- crates/ra_syntax/src/parsing/input.rs | 24 ++++++++--------- crates/ra_syntax/src/parsing/parser_api.rs | 30 +++++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index f74c365d5cc..5de6ff8c112 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -69,24 +69,7 @@ trait TreeSink { /// /// Hopefully this will allow us to treat text and token trees in the same way! trait TokenSource { - fn token_kind(&self, pos: TokenPos) -> SyntaxKind; - fn is_token_joint_to_next(&self, pos: TokenPos) -> bool; - fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool; -} - -#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Default)] -pub(crate) struct TokenPos(pub u32); - -impl std::ops::Add<u32> for TokenPos { - type Output = TokenPos; - - fn add(self, rhs: u32) -> TokenPos { - TokenPos(self.0 + rhs) - } -} - -impl std::ops::AddAssign<u32> for TokenPos { - fn add_assign(&mut self, rhs: u32) { - self.0 += rhs - } + fn token_kind(&self, pos: usize) -> SyntaxKind; + fn is_token_joint_to_next(&self, pos: usize) -> bool; + fn is_keyword(&self, pos: usize, kw: &str) -> bool; } diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs index 0f1810df55b..96c03bb1185 100644 --- a/crates/ra_syntax/src/parsing/input.rs +++ b/crates/ra_syntax/src/parsing/input.rs @@ -1,33 +1,29 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, parsing::{ - TokenPos, TokenSource, + TokenSource, lexer::Token, }, }; impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: TokenPos) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { + fn token_kind(&self, pos: usize) -> SyntaxKind { + if !(pos < self.tokens.len()) { return EOF; } - self.tokens[idx].kind + self.tokens[pos].kind } - fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { - let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize + 1; - if !(idx_next < self.tokens.len()) { + fn is_token_joint_to_next(&self, pos: usize) -> bool { + if !(pos + 1 < self.tokens.len()) { return true; } - self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + self.start_offsets[pos] + self.tokens[pos].len == self.start_offsets[pos + 1] } - fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { + fn is_keyword(&self, pos: usize, kw: &str) -> bool { + if !(pos < self.tokens.len()) { return false; } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + let range = TextRange::offset_len(self.start_offsets[pos], self.tokens[pos].len); self.text[range] == *kw } diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 99f6183a4e9..988fcb51880 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -6,7 +6,7 @@ use crate::{ syntax_error::ParseError, SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, parsing::{ - TokenSource, TokenPos, + TokenSource, token_set::TokenSet, event::Event, }, @@ -23,14 +23,14 @@ use crate::{ /// finish expression". See `Event` docs for more. pub(crate) struct Parser<'t> { token_source: &'t dyn TokenSource, - pos: TokenPos, + token_pos: usize, events: Vec<Event>, steps: Cell<u32>, } impl<'t> Parser<'t> { pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { - Parser { token_source, pos: TokenPos::default(), events: Vec::new(), steps: Cell::new(0) } + Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } } pub(crate) fn finish(self) -> Vec<Event> { @@ -49,9 +49,9 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `>>`. pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - if self.token_source.is_token_joint_to_next(self.pos) { + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + if self.token_source.is_token_joint_to_next(self.token_pos) { Some((c1, c2)) } else { None @@ -63,11 +63,11 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `=>>`. pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - let c3 = self.token_source.token_kind(self.pos + 2); - if self.token_source.is_token_joint_to_next(self.pos) - && self.token_source.is_token_joint_to_next(self.pos + 1) + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + let c3 = self.token_source.token_kind(self.token_pos + 2); + if self.token_source.is_token_joint_to_next(self.token_pos) + && self.token_source.is_token_joint_to_next(self.token_pos + 1) { Some((c1, c2, c3)) } else { @@ -77,11 +77,11 @@ impl<'t> Parser<'t> { /// Lookahead operation: returns the kind of the next nth /// token. - pub(crate) fn nth(&self, n: u32) -> SyntaxKind { + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { let steps = self.steps.get(); assert!(steps <= 10_000_000, "the parser seems stuck"); self.steps.set(steps + 1); - self.token_source.token_kind(self.pos + n) + self.token_source.token_kind(self.token_pos + n) } /// Checks if the current token is `kind`. @@ -96,7 +96,7 @@ impl<'t> Parser<'t> { /// Checks if the current token is contextual keyword with text `t`. pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { - self.token_source.is_keyword(self.pos, kw) + self.token_source.is_keyword(self.token_pos, kw) } /// Starts a new node in the syntax tree. All nodes and tokens @@ -184,7 +184,7 @@ impl<'t> Parser<'t> { } fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - self.pos += u32::from(n_raw_tokens); + self.token_pos += usize::from(n_raw_tokens); self.push_event(Event::Token { kind, n_raw_tokens }); } From 61992dc1cd4956038e3c15439c1203f21e05af06 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 23:05:59 +0300 Subject: [PATCH 09/10] simplify --- crates/ra_syntax/src/parsing.rs | 4 +-- crates/ra_syntax/src/parsing/grammar.rs | 2 +- .../src/parsing/{parser_api.rs => parser.rs} | 0 crates/ra_syntax/src/parsing/parser_impl.rs | 25 ------------------- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- 5 files changed, 4 insertions(+), 29 deletions(-) rename crates/ra_syntax/src/parsing/{parser_api.rs => parser.rs} (100%) delete mode 100644 crates/ra_syntax/src/parsing/parser_impl.rs diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 5de6ff8c112..941ec501e50 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -4,7 +4,7 @@ mod builder; mod lexer; mod event; mod input; -mod parser_api; +mod parser; mod grammar; mod reparsing; @@ -14,7 +14,7 @@ use crate::{ builder::GreenBuilder, input::ParserInput, event::EventProcessor, - parser_api::Parser, + parser::Parser, }, syntax_node::GreenNode, }; diff --git a/crates/ra_syntax/src/parsing/grammar.rs b/crates/ra_syntax/src/parsing/grammar.rs index bcdcd9f5786..7ca9c223cc0 100644 --- a/crates/ra_syntax/src/parsing/grammar.rs +++ b/crates/ra_syntax/src/parsing/grammar.rs @@ -41,7 +41,7 @@ use crate::{ SyntaxKind::{self, *}, parsing::{ token_set::TokenSet, - parser_api::{CompletedMarker, Marker, Parser} + parser::{CompletedMarker, Marker, Parser} }, }; diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser.rs similarity index 100% rename from crates/ra_syntax/src/parsing/parser_api.rs rename to crates/ra_syntax/src/parsing/parser.rs diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs deleted file mode 100644 index 6eed0e656a4..00000000000 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ /dev/null @@ -1,25 +0,0 @@ -pub(super) mod event; -pub(super) mod input; - -use crate::parsing::{ - TreeSink, TokenSource, - lexer::Token, - parser_api::Parser, - parser_impl::event::EventProcessor, -}; - -/// Parse a sequence of tokens into the representative node tree -pub(super) fn parse_with<S: TreeSink>( - sink: S, - text: &str, - tokens: &[Token], - parser: fn(&mut Parser), -) -> S::Tree { - let mut events = { - let input = input::ParserInput::new(text, tokens); - let mut parser_api = Parser::new(&input); - parser(&mut parser_api); - parser_api.finish() - }; - EventProcessor::new(sink, text, tokens, &mut events).process().finish() -} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index f45326dffef..674b15f9a27 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -6,7 +6,7 @@ use crate::{ parsing::{ grammar, parse_with, builder::GreenBuilder, - parser_api::Parser, + parser::Parser, lexer::{tokenize, Token}, } }; From 882c47f1870f15cb2aaad8871ccbad1c51520f49 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 20 Feb 2019 23:17:07 +0300 Subject: [PATCH 10/10] move syntax error to parser --- crates/ra_syntax/src/parsing.rs | 9 ++++++--- crates/ra_syntax/src/parsing/builder.rs | 19 +++++++++++++------ crates/ra_syntax/src/parsing/event.rs | 11 ++--------- crates/ra_syntax/src/parsing/parser.rs | 3 +-- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- crates/ra_syntax/src/syntax_error.rs | 5 +---- 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 941ec501e50..138d1394af9 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -9,7 +9,7 @@ mod grammar; mod reparsing; use crate::{ - SyntaxError, SyntaxKind, SmolStr, + SyntaxKind, SmolStr, SyntaxError, parsing::{ builder::GreenBuilder, input::ParserInput, @@ -21,11 +21,14 @@ use crate::{ pub use self::lexer::{tokenize, Token}; +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ParseError(pub String); + pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { let tokens = tokenize(&text); - parse_with(GreenBuilder::new(), text, &tokens, grammar::root) + parse_with(GreenBuilder::default(), text, &tokens, grammar::root) } fn parse_with<S: TreeSink>( @@ -57,7 +60,7 @@ trait TreeSink { /// branch as current. fn finish_branch(&mut self); - fn error(&mut self, error: SyntaxError); + fn error(&mut self, error: ParseError); /// Complete tree building. Make sure that /// `start_branch` and `finish_branch` calls diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index a05e7f84b5c..ee0e2cce7ae 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,19 +1,24 @@ use crate::{ - parsing::TreeSink, + SmolStr, SyntaxKind, SyntaxError, SyntaxErrorKind, TextUnit, + parsing::{TreeSink, ParseError}, syntax_node::{GreenNode, RaTypes}, - SmolStr, SyntaxKind, SyntaxError, }; use rowan::GreenNodeBuilder; pub(crate) struct GreenBuilder { + text_pos: TextUnit, errors: Vec<SyntaxError>, inner: GreenNodeBuilder<RaTypes>, } -impl GreenBuilder { - pub(crate) fn new() -> GreenBuilder { - GreenBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() } +impl Default for GreenBuilder { + fn default() -> GreenBuilder { + GreenBuilder { + text_pos: TextUnit::default(), + errors: Vec::new(), + inner: GreenNodeBuilder::new(), + } } } @@ -21,6 +26,7 @@ impl TreeSink for GreenBuilder { type Tree = (GreenNode, Vec<SyntaxError>); fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { + self.text_pos += TextUnit::of_str(text.as_str()); self.inner.leaf(kind, text); } @@ -32,7 +38,8 @@ impl TreeSink for GreenBuilder { self.inner.finish_internal(); } - fn error(&mut self, error: SyntaxError) { + fn error(&mut self, error: ParseError) { + let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), self.text_pos); self.errors.push(error) } diff --git a/crates/ra_syntax/src/parsing/event.rs b/crates/ra_syntax/src/parsing/event.rs index 893a42e9ac6..f6f020eaba4 100644 --- a/crates/ra_syntax/src/parsing/event.rs +++ b/crates/ra_syntax/src/parsing/event.rs @@ -13,14 +13,9 @@ use crate::{ SmolStr, SyntaxKind::{self, *}, TextRange, TextUnit, - syntax_error::{ - ParseError, - SyntaxError, - SyntaxErrorKind, - }, parsing::{ + ParseError, TreeSink, lexer::Token, - TreeSink, }, }; @@ -159,9 +154,7 @@ impl<'a, S: TreeSink> EventProcessor<'a, S> { .sum::<TextUnit>(); self.leaf(kind, len, n_raw_tokens); } - Event::Error { msg } => self - .sink - .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), + Event::Error { msg } => self.sink.error(msg), } } self.sink diff --git a/crates/ra_syntax/src/parsing/parser.rs b/crates/ra_syntax/src/parsing/parser.rs index 988fcb51880..923b0f2b20d 100644 --- a/crates/ra_syntax/src/parsing/parser.rs +++ b/crates/ra_syntax/src/parsing/parser.rs @@ -3,10 +3,9 @@ use std::cell::Cell; use drop_bomb::DropBomb; use crate::{ - syntax_error::ParseError, SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, parsing::{ - TokenSource, + TokenSource, ParseError, token_set::TokenSet, event::Event, }, diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 674b15f9a27..f2d218ab903 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -61,7 +61,7 @@ fn reparse_block<'node>( if !is_balanced(&tokens) { return None; } - let (green, new_errors) = parse_with(GreenBuilder::new(), &text, &tokens, reparser); + let (green, new_errors) = parse_with(GreenBuilder::default(), &text, &tokens, reparser); Some((node, green, new_errors)) } diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index 4ff99809066..1a00fcc27a3 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs @@ -1,6 +1,6 @@ use std::fmt; -use crate::{TextRange, TextUnit}; +use crate::{TextRange, TextUnit, parsing::ParseError}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SyntaxError { @@ -95,9 +95,6 @@ pub enum SyntaxErrorKind { InvalidMatchInnerAttr, } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ParseError(pub String); - impl fmt::Display for SyntaxErrorKind { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use self::SyntaxErrorKind::*;