Merge #863
863: Token source r=matklad a=matklad Some reshuffling of parser's API with the eye towards extracting parse **without** syntax tree into a separate crate, to be used with macro expansion Co-authored-by: Aleksey Kladov <aleksey.kladov@gmail.com>
This commit is contained in:
commit
c84561bb62
@ -2,24 +2,77 @@
|
||||
mod token_set;
|
||||
mod builder;
|
||||
mod lexer;
|
||||
mod parser_impl;
|
||||
mod parser_api;
|
||||
mod event;
|
||||
mod input;
|
||||
mod parser;
|
||||
mod grammar;
|
||||
mod reparsing;
|
||||
|
||||
use crate::{
|
||||
SyntaxError,
|
||||
parsing::builder::GreenBuilder,
|
||||
SyntaxKind, SmolStr, SyntaxError,
|
||||
parsing::{
|
||||
builder::GreenBuilder,
|
||||
input::ParserInput,
|
||||
event::EventProcessor,
|
||||
parser::Parser,
|
||||
},
|
||||
syntax_node::GreenNode,
|
||||
};
|
||||
|
||||
pub use self::lexer::{tokenize, Token};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ParseError(pub String);
|
||||
|
||||
pub(crate) use self::reparsing::incremental_reparse;
|
||||
|
||||
pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
|
||||
let tokens = tokenize(&text);
|
||||
let (green, errors) =
|
||||
parser_impl::parse_with(GreenBuilder::new(), text, &tokens, grammar::root);
|
||||
(green, errors)
|
||||
parse_with(GreenBuilder::default(), text, &tokens, grammar::root)
|
||||
}
|
||||
|
||||
fn parse_with<S: TreeSink>(
|
||||
tree_sink: S,
|
||||
text: &str,
|
||||
tokens: &[Token],
|
||||
f: fn(&mut Parser),
|
||||
) -> S::Tree {
|
||||
let mut events = {
|
||||
let input = ParserInput::new(text, &tokens);
|
||||
let mut p = Parser::new(&input);
|
||||
f(&mut p);
|
||||
p.finish()
|
||||
};
|
||||
EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish()
|
||||
}
|
||||
|
||||
/// `TreeSink` abstracts details of a particular syntax tree implementation.
|
||||
trait TreeSink {
|
||||
type Tree;
|
||||
|
||||
/// Adds new leaf to the current branch.
|
||||
fn leaf(&mut self, kind: SyntaxKind, text: SmolStr);
|
||||
|
||||
/// Start new branch and make it current.
|
||||
fn start_branch(&mut self, kind: SyntaxKind);
|
||||
|
||||
/// Finish current branch and restore previous
|
||||
/// branch as current.
|
||||
fn finish_branch(&mut self);
|
||||
|
||||
fn error(&mut self, error: ParseError);
|
||||
|
||||
/// Complete tree building. Make sure that
|
||||
/// `start_branch` and `finish_branch` calls
|
||||
/// are paired!
|
||||
fn finish(self) -> Self::Tree;
|
||||
}
|
||||
|
||||
/// `TokenSource` abstracts the source of the tokens parser operates one.
|
||||
///
|
||||
/// Hopefully this will allow us to treat text and token trees in the same way!
|
||||
trait TokenSource {
|
||||
fn token_kind(&self, pos: usize) -> SyntaxKind;
|
||||
fn is_token_joint_to_next(&self, pos: usize) -> bool;
|
||||
fn is_keyword(&self, pos: usize, kw: &str) -> bool;
|
||||
}
|
||||
|
@ -1,26 +1,32 @@
|
||||
use crate::{
|
||||
parsing::parser_impl::Sink,
|
||||
SmolStr, SyntaxKind, SyntaxError, SyntaxErrorKind, TextUnit,
|
||||
parsing::{TreeSink, ParseError},
|
||||
syntax_node::{GreenNode, RaTypes},
|
||||
SmolStr, SyntaxKind, SyntaxError,
|
||||
};
|
||||
|
||||
use rowan::GreenNodeBuilder;
|
||||
|
||||
pub(crate) struct GreenBuilder {
|
||||
text_pos: TextUnit,
|
||||
errors: Vec<SyntaxError>,
|
||||
inner: GreenNodeBuilder<RaTypes>,
|
||||
}
|
||||
|
||||
impl GreenBuilder {
|
||||
pub(crate) fn new() -> GreenBuilder {
|
||||
GreenBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() }
|
||||
impl Default for GreenBuilder {
|
||||
fn default() -> GreenBuilder {
|
||||
GreenBuilder {
|
||||
text_pos: TextUnit::default(),
|
||||
errors: Vec::new(),
|
||||
inner: GreenNodeBuilder::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sink for GreenBuilder {
|
||||
impl TreeSink for GreenBuilder {
|
||||
type Tree = (GreenNode, Vec<SyntaxError>);
|
||||
|
||||
fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) {
|
||||
self.text_pos += TextUnit::of_str(text.as_str());
|
||||
self.inner.leaf(kind, text);
|
||||
}
|
||||
|
||||
@ -32,7 +38,8 @@ impl Sink for GreenBuilder {
|
||||
self.inner.finish_internal();
|
||||
}
|
||||
|
||||
fn error(&mut self, error: SyntaxError) {
|
||||
fn error(&mut self, error: ParseError) {
|
||||
let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), self.text_pos);
|
||||
self.errors.push(error)
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
//! parser, so as to allow to evolve the tree representation
|
||||
//! and the parser algorithm independently.
|
||||
//!
|
||||
//! The `Sink` trait is the bridge between the parser and the
|
||||
//! The `TreeSink` trait is the bridge between the parser and the
|
||||
//! tree builder: the parser produces a stream of events like
|
||||
//! `start node`, `finish node`, and `FileBuilder` converts
|
||||
//! this stream to a real tree.
|
||||
@ -13,14 +13,9 @@ use crate::{
|
||||
SmolStr,
|
||||
SyntaxKind::{self, *},
|
||||
TextRange, TextUnit,
|
||||
syntax_error::{
|
||||
ParseError,
|
||||
SyntaxError,
|
||||
SyntaxErrorKind,
|
||||
},
|
||||
parsing::{
|
||||
ParseError, TreeSink,
|
||||
lexer::Token,
|
||||
parser_impl::Sink,
|
||||
},
|
||||
};
|
||||
|
||||
@ -93,7 +88,7 @@ impl Event {
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct EventProcessor<'a, S: Sink> {
|
||||
pub(super) struct EventProcessor<'a, S: TreeSink> {
|
||||
sink: S,
|
||||
text_pos: TextUnit,
|
||||
text: &'a str,
|
||||
@ -102,7 +97,7 @@ pub(super) struct EventProcessor<'a, S: Sink> {
|
||||
events: &'a mut [Event],
|
||||
}
|
||||
|
||||
impl<'a, S: Sink> EventProcessor<'a, S> {
|
||||
impl<'a, S: TreeSink> EventProcessor<'a, S> {
|
||||
pub(super) fn new(
|
||||
sink: S,
|
||||
text: &'a str,
|
||||
@ -113,7 +108,7 @@ impl<'a, S: Sink> EventProcessor<'a, S> {
|
||||
}
|
||||
|
||||
/// Generate the syntax tree with the control of events.
|
||||
pub(super) fn process(mut self) -> S {
|
||||
pub(crate) fn process(mut self) -> S {
|
||||
let mut forward_parents = Vec::new();
|
||||
|
||||
for i in 0..self.events.len() {
|
||||
@ -159,9 +154,7 @@ impl<'a, S: Sink> EventProcessor<'a, S> {
|
||||
.sum::<TextUnit>();
|
||||
self.leaf(kind, len, n_raw_tokens);
|
||||
}
|
||||
Event::Error { msg } => self
|
||||
.sink
|
||||
.error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)),
|
||||
Event::Error { msg } => self.sink.error(msg),
|
||||
}
|
||||
}
|
||||
self.sink
|
@ -41,7 +41,7 @@ use crate::{
|
||||
SyntaxKind::{self, *},
|
||||
parsing::{
|
||||
token_set::TokenSet,
|
||||
parser_api::{CompletedMarker, Marker, Parser}
|
||||
parser::{CompletedMarker, Marker, Parser}
|
||||
},
|
||||
};
|
||||
|
||||
|
68
crates/ra_syntax/src/parsing/input.rs
Normal file
68
crates/ra_syntax/src/parsing/input.rs
Normal file
@ -0,0 +1,68 @@
|
||||
use crate::{
|
||||
SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit,
|
||||
parsing::{
|
||||
TokenSource,
|
||||
lexer::Token,
|
||||
},
|
||||
};
|
||||
|
||||
impl<'t> TokenSource for ParserInput<'t> {
|
||||
fn token_kind(&self, pos: usize) -> SyntaxKind {
|
||||
if !(pos < self.tokens.len()) {
|
||||
return EOF;
|
||||
}
|
||||
self.tokens[pos].kind
|
||||
}
|
||||
fn is_token_joint_to_next(&self, pos: usize) -> bool {
|
||||
if !(pos + 1 < self.tokens.len()) {
|
||||
return true;
|
||||
}
|
||||
self.start_offsets[pos] + self.tokens[pos].len == self.start_offsets[pos + 1]
|
||||
}
|
||||
fn is_keyword(&self, pos: usize, kw: &str) -> bool {
|
||||
if !(pos < self.tokens.len()) {
|
||||
return false;
|
||||
}
|
||||
let range = TextRange::offset_len(self.start_offsets[pos], self.tokens[pos].len);
|
||||
|
||||
self.text[range] == *kw
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ParserInput<'t> {
|
||||
text: &'t str,
|
||||
/// start position of each token(expect whitespace and comment)
|
||||
/// ```non-rust
|
||||
/// struct Foo;
|
||||
/// ^------^---
|
||||
/// | | ^-
|
||||
/// 0 7 10
|
||||
/// ```
|
||||
/// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
|
||||
start_offsets: Vec<TextUnit>,
|
||||
/// non-whitespace/comment tokens
|
||||
/// ```non-rust
|
||||
/// struct Foo {}
|
||||
/// ^^^^^^ ^^^ ^^
|
||||
/// ```
|
||||
/// tokens: `[struct, Foo, {, }]`
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl<'t> ParserInput<'t> {
|
||||
/// Generate input from tokens(expect comment and whitespace).
|
||||
pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut start_offsets = Vec::new();
|
||||
let mut len = 0.into();
|
||||
for &token in raw_tokens.iter() {
|
||||
if !token.kind.is_trivia() {
|
||||
tokens.push(token);
|
||||
start_offsets.push(len);
|
||||
}
|
||||
len += token.len;
|
||||
}
|
||||
|
||||
ParserInput { text, start_offsets, tokens }
|
||||
}
|
||||
}
|
@ -1,10 +1,13 @@
|
||||
use std::cell::Cell;
|
||||
|
||||
use drop_bomb::DropBomb;
|
||||
|
||||
use crate::{
|
||||
SyntaxKind::{self, ERROR},
|
||||
SyntaxKind::{self, ERROR, EOF, TOMBSTONE},
|
||||
parsing::{
|
||||
TokenSource, ParseError,
|
||||
token_set::TokenSet,
|
||||
parser_impl::ParserImpl
|
||||
event::Event,
|
||||
},
|
||||
};
|
||||
|
||||
@ -17,9 +20,22 @@ use crate::{
|
||||
/// tree, but rather a flat stream of events of the form
|
||||
/// "start expression, consume number literal,
|
||||
/// finish expression". See `Event` docs for more.
|
||||
pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>);
|
||||
pub(crate) struct Parser<'t> {
|
||||
token_source: &'t dyn TokenSource,
|
||||
token_pos: usize,
|
||||
events: Vec<Event>,
|
||||
steps: Cell<u32>,
|
||||
}
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> {
|
||||
Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) }
|
||||
}
|
||||
|
||||
pub(crate) fn finish(self) -> Vec<Event> {
|
||||
self.events
|
||||
}
|
||||
|
||||
/// Returns the kind of the current token.
|
||||
/// If parser has already reached the end of input,
|
||||
/// the special `EOF` kind is returned.
|
||||
@ -32,7 +48,13 @@ impl<'t> Parser<'t> {
|
||||
///
|
||||
/// Useful for parsing things like `>>`.
|
||||
pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> {
|
||||
self.0.current2()
|
||||
let c1 = self.token_source.token_kind(self.token_pos);
|
||||
let c2 = self.token_source.token_kind(self.token_pos + 1);
|
||||
if self.token_source.is_token_joint_to_next(self.token_pos) {
|
||||
Some((c1, c2))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the kinds of the current three tokens, if they are not separated
|
||||
@ -40,13 +62,25 @@ impl<'t> Parser<'t> {
|
||||
///
|
||||
/// Useful for parsing things like `=>>`.
|
||||
pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> {
|
||||
self.0.current3()
|
||||
let c1 = self.token_source.token_kind(self.token_pos);
|
||||
let c2 = self.token_source.token_kind(self.token_pos + 1);
|
||||
let c3 = self.token_source.token_kind(self.token_pos + 2);
|
||||
if self.token_source.is_token_joint_to_next(self.token_pos)
|
||||
&& self.token_source.is_token_joint_to_next(self.token_pos + 1)
|
||||
{
|
||||
Some((c1, c2, c3))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Lookahead operation: returns the kind of the next nth
|
||||
/// token.
|
||||
pub(crate) fn nth(&self, n: u32) -> SyntaxKind {
|
||||
self.0.nth(n)
|
||||
pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
|
||||
let steps = self.steps.get();
|
||||
assert!(steps <= 10_000_000, "the parser seems stuck");
|
||||
self.steps.set(steps + 1);
|
||||
self.token_source.token_kind(self.token_pos + n)
|
||||
}
|
||||
|
||||
/// Checks if the current token is `kind`.
|
||||
@ -60,20 +94,26 @@ impl<'t> Parser<'t> {
|
||||
}
|
||||
|
||||
/// Checks if the current token is contextual keyword with text `t`.
|
||||
pub(crate) fn at_contextual_kw(&self, t: &str) -> bool {
|
||||
self.0.at_kw(t)
|
||||
pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
|
||||
self.token_source.is_keyword(self.token_pos, kw)
|
||||
}
|
||||
|
||||
/// Starts a new node in the syntax tree. All nodes and tokens
|
||||
/// consumed between the `start` and the corresponding `Marker::complete`
|
||||
/// belong to the same node.
|
||||
pub(crate) fn start(&mut self) -> Marker {
|
||||
Marker::new(self.0.start())
|
||||
let pos = self.events.len() as u32;
|
||||
self.push_event(Event::tombstone());
|
||||
Marker::new(pos)
|
||||
}
|
||||
|
||||
/// Advances the parser by one token unconditionally.
|
||||
pub(crate) fn bump(&mut self) {
|
||||
self.0.bump();
|
||||
let kind = self.nth(0);
|
||||
if kind == EOF {
|
||||
return;
|
||||
}
|
||||
self.do_bump(kind, 1);
|
||||
}
|
||||
|
||||
/// Advances the parser by one token, remapping its kind.
|
||||
@ -83,14 +123,18 @@ impl<'t> Parser<'t> {
|
||||
/// `union` keyword, and keyword is what ends up in the
|
||||
/// final tree.
|
||||
pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
|
||||
self.0.bump_remap(kind);
|
||||
if self.nth(0) == EOF {
|
||||
// TODO: panic!?
|
||||
return;
|
||||
}
|
||||
self.do_bump(kind, 1);
|
||||
}
|
||||
|
||||
/// Advances the parser by `n` tokens, remapping its kind.
|
||||
/// This is useful to create compound tokens from parts. For
|
||||
/// example, an `<<` token is two consecutive remapped `<` tokens
|
||||
pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) {
|
||||
self.0.bump_compound(kind, n);
|
||||
self.do_bump(kind, n);
|
||||
}
|
||||
|
||||
/// Emit error with the `message`
|
||||
@ -98,7 +142,8 @@ impl<'t> Parser<'t> {
|
||||
/// structured errors with spans and notes, like rustc
|
||||
/// does.
|
||||
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
|
||||
self.0.error(message.into())
|
||||
let msg = ParseError(message.into());
|
||||
self.push_event(Event::Error { msg })
|
||||
}
|
||||
|
||||
/// Consume the next token if `kind` matches.
|
||||
@ -136,6 +181,15 @@ impl<'t> Parser<'t> {
|
||||
m.complete(self, ERROR);
|
||||
};
|
||||
}
|
||||
|
||||
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
|
||||
self.token_pos += usize::from(n_raw_tokens);
|
||||
self.push_event(Event::Token { kind, n_raw_tokens });
|
||||
}
|
||||
|
||||
fn push_event(&mut self, event: Event) {
|
||||
self.events.push(event)
|
||||
}
|
||||
}
|
||||
|
||||
/// See `Parser::start`.
|
||||
@ -154,7 +208,14 @@ impl Marker {
|
||||
/// operation like `.precede()` to deal with forward_parent.
|
||||
pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
|
||||
self.bomb.defuse();
|
||||
p.0.complete(self.pos, kind);
|
||||
let idx = self.pos as usize;
|
||||
match p.events[idx] {
|
||||
Event::Start { kind: ref mut slot, .. } => {
|
||||
*slot = kind;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
p.push_event(Event::Finish);
|
||||
CompletedMarker::new(self.pos, kind)
|
||||
}
|
||||
|
||||
@ -162,7 +223,13 @@ impl Marker {
|
||||
/// are attached to its parent instead.
|
||||
pub(crate) fn abandon(mut self, p: &mut Parser) {
|
||||
self.bomb.defuse();
|
||||
p.0.abandon(self.pos);
|
||||
let idx = self.pos as usize;
|
||||
if idx == p.events.len() - 1 {
|
||||
match p.events.pop() {
|
||||
Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -186,7 +253,15 @@ impl CompletedMarker {
|
||||
/// then mark `NEWSTART` as `START`'s parent with saving its relative
|
||||
/// distance to `NEWSTART` into forward_parent(=2 in this case);
|
||||
pub(crate) fn precede(self, p: &mut Parser) -> Marker {
|
||||
Marker::new(p.0.precede(self.0))
|
||||
let new_pos = p.start();
|
||||
let idx = self.0 as usize;
|
||||
match p.events[idx] {
|
||||
Event::Start { ref mut forward_parent, .. } => {
|
||||
*forward_parent = Some(new_pos.pos - self.0);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
new_pos
|
||||
}
|
||||
|
||||
pub(crate) fn kind(&self) -> SyntaxKind {
|
@ -1,200 +0,0 @@
|
||||
mod event;
|
||||
mod input;
|
||||
|
||||
use std::cell::Cell;
|
||||
|
||||
use crate::{
|
||||
SmolStr,
|
||||
syntax_error::{ParseError, SyntaxError},
|
||||
parsing::{
|
||||
lexer::Token,
|
||||
parser_api::Parser,
|
||||
parser_impl::{
|
||||
event::{Event, EventProcessor},
|
||||
input::{InputPosition, ParserInput},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
use crate::SyntaxKind::{self, EOF, TOMBSTONE};
|
||||
|
||||
pub(super) trait Sink {
|
||||
type Tree;
|
||||
|
||||
/// Adds new leaf to the current branch.
|
||||
fn leaf(&mut self, kind: SyntaxKind, text: SmolStr);
|
||||
|
||||
/// Start new branch and make it current.
|
||||
fn start_branch(&mut self, kind: SyntaxKind);
|
||||
|
||||
/// Finish current branch and restore previous
|
||||
/// branch as current.
|
||||
fn finish_branch(&mut self);
|
||||
|
||||
fn error(&mut self, error: SyntaxError);
|
||||
|
||||
/// Complete tree building. Make sure that
|
||||
/// `start_branch` and `finish_branch` calls
|
||||
/// are paired!
|
||||
fn finish(self) -> Self::Tree;
|
||||
}
|
||||
|
||||
/// Parse a sequence of tokens into the representative node tree
|
||||
pub(super) fn parse_with<S: Sink>(
|
||||
sink: S,
|
||||
text: &str,
|
||||
tokens: &[Token],
|
||||
parser: fn(&mut Parser),
|
||||
) -> S::Tree {
|
||||
let mut events = {
|
||||
let input = input::ParserInput::new(text, tokens);
|
||||
let parser_impl = ParserImpl::new(&input);
|
||||
let mut parser_api = Parser(parser_impl);
|
||||
parser(&mut parser_api);
|
||||
parser_api.0.into_events()
|
||||
};
|
||||
EventProcessor::new(sink, text, tokens, &mut events).process().finish()
|
||||
}
|
||||
|
||||
/// Implementation details of `Parser`, extracted
|
||||
/// to a separate struct in order not to pollute
|
||||
/// the public API of the `Parser`.
|
||||
pub(super) struct ParserImpl<'t> {
|
||||
parser_input: &'t ParserInput<'t>,
|
||||
pos: InputPosition,
|
||||
events: Vec<Event>,
|
||||
steps: Cell<u32>,
|
||||
}
|
||||
|
||||
impl<'t> ParserImpl<'t> {
|
||||
fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> {
|
||||
ParserImpl {
|
||||
parser_input: inp,
|
||||
pos: InputPosition::new(),
|
||||
events: Vec::new(),
|
||||
steps: Cell::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
fn into_events(self) -> Vec<Event> {
|
||||
assert_eq!(self.nth(0), EOF);
|
||||
self.events
|
||||
}
|
||||
|
||||
pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> {
|
||||
let c1 = self.parser_input.kind(self.pos);
|
||||
let c2 = self.parser_input.kind(self.pos + 1);
|
||||
if self.parser_input.token_start_at(self.pos + 1)
|
||||
== self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos)
|
||||
{
|
||||
Some((c1, c2))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> {
|
||||
let c1 = self.parser_input.kind(self.pos);
|
||||
let c2 = self.parser_input.kind(self.pos + 1);
|
||||
let c3 = self.parser_input.kind(self.pos + 2);
|
||||
if self.parser_input.token_start_at(self.pos + 1)
|
||||
== self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos)
|
||||
&& self.parser_input.token_start_at(self.pos + 2)
|
||||
== self.parser_input.token_start_at(self.pos + 1)
|
||||
+ self.parser_input.token_len(self.pos + 1)
|
||||
{
|
||||
Some((c1, c2, c3))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the syntax kind of the nth token.
|
||||
pub(super) fn nth(&self, n: u32) -> SyntaxKind {
|
||||
let steps = self.steps.get();
|
||||
assert!(steps <= 10_000_000, "the parser seems stuck");
|
||||
self.steps.set(steps + 1);
|
||||
|
||||
self.parser_input.kind(self.pos + n)
|
||||
}
|
||||
|
||||
pub(super) fn at_kw(&self, t: &str) -> bool {
|
||||
self.parser_input.token_text(self.pos) == t
|
||||
}
|
||||
|
||||
/// Start parsing right behind the last event.
|
||||
pub(super) fn start(&mut self) -> u32 {
|
||||
let pos = self.events.len() as u32;
|
||||
self.push_event(Event::tombstone());
|
||||
pos
|
||||
}
|
||||
|
||||
/// Advances the parser by one token unconditionally.
|
||||
pub(super) fn bump(&mut self) {
|
||||
let kind = self.nth(0);
|
||||
if kind == EOF {
|
||||
return;
|
||||
}
|
||||
self.do_bump(kind, 1);
|
||||
}
|
||||
|
||||
pub(super) fn bump_remap(&mut self, kind: SyntaxKind) {
|
||||
if self.nth(0) == EOF {
|
||||
// TODO: panic!?
|
||||
return;
|
||||
}
|
||||
self.do_bump(kind, 1);
|
||||
}
|
||||
|
||||
pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) {
|
||||
self.do_bump(kind, n);
|
||||
}
|
||||
|
||||
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
|
||||
self.pos += u32::from(n_raw_tokens);
|
||||
self.push_event(Event::Token { kind, n_raw_tokens });
|
||||
}
|
||||
|
||||
/// Append one Error event to the back of events.
|
||||
pub(super) fn error(&mut self, msg: String) {
|
||||
self.push_event(Event::Error { msg: ParseError(msg) })
|
||||
}
|
||||
|
||||
/// Complete an event with appending a `Finish` event.
|
||||
pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) {
|
||||
match self.events[pos as usize] {
|
||||
Event::Start { kind: ref mut slot, .. } => {
|
||||
*slot = kind;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
self.push_event(Event::Finish);
|
||||
}
|
||||
|
||||
/// Ignore the dummy `Start` event.
|
||||
pub(super) fn abandon(&mut self, pos: u32) {
|
||||
let idx = pos as usize;
|
||||
if idx == self.events.len() - 1 {
|
||||
match self.events.pop() {
|
||||
Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the relative distance of a completed event to its forward_parent.
|
||||
pub(super) fn precede(&mut self, pos: u32) -> u32 {
|
||||
let new_pos = self.start();
|
||||
match self.events[pos as usize] {
|
||||
Event::Start { ref mut forward_parent, .. } => {
|
||||
*forward_parent = Some(new_pos - pos);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
new_pos
|
||||
}
|
||||
|
||||
fn push_event(&mut self, event: Event) {
|
||||
self.events.push(event)
|
||||
}
|
||||
}
|
@ -1,104 +0,0 @@
|
||||
use crate::{
|
||||
SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit,
|
||||
parsing::lexer::Token,
|
||||
};
|
||||
|
||||
use std::ops::{Add, AddAssign};
|
||||
|
||||
pub(crate) struct ParserInput<'t> {
|
||||
text: &'t str,
|
||||
/// start position of each token(expect whitespace and comment)
|
||||
/// ```non-rust
|
||||
/// struct Foo;
|
||||
/// ^------^---
|
||||
/// | | ^-
|
||||
/// 0 7 10
|
||||
/// ```
|
||||
/// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
|
||||
start_offsets: Vec<TextUnit>,
|
||||
/// non-whitespace/comment tokens
|
||||
/// ```non-rust
|
||||
/// struct Foo {}
|
||||
/// ^^^^^^ ^^^ ^^
|
||||
/// ```
|
||||
/// tokens: `[struct, Foo, {, }]`
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl<'t> ParserInput<'t> {
|
||||
/// Generate input from tokens(expect comment and whitespace).
|
||||
pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut start_offsets = Vec::new();
|
||||
let mut len = 0.into();
|
||||
for &token in raw_tokens.iter() {
|
||||
if !token.kind.is_trivia() {
|
||||
tokens.push(token);
|
||||
start_offsets.push(len);
|
||||
}
|
||||
len += token.len;
|
||||
}
|
||||
|
||||
ParserInput { text, start_offsets, tokens }
|
||||
}
|
||||
|
||||
/// Get the syntax kind of token at given input position.
|
||||
pub fn kind(&self, pos: InputPosition) -> SyntaxKind {
|
||||
let idx = pos.0 as usize;
|
||||
if !(idx < self.tokens.len()) {
|
||||
return EOF;
|
||||
}
|
||||
self.tokens[idx].kind
|
||||
}
|
||||
|
||||
/// Get the length of a token at given input position.
|
||||
pub fn token_len(&self, pos: InputPosition) -> TextUnit {
|
||||
let idx = pos.0 as usize;
|
||||
if !(idx < self.tokens.len()) {
|
||||
return 0.into();
|
||||
}
|
||||
self.tokens[idx].len
|
||||
}
|
||||
|
||||
/// Get the start position of a taken at given input position.
|
||||
pub fn token_start_at(&self, pos: InputPosition) -> TextUnit {
|
||||
let idx = pos.0 as usize;
|
||||
if !(idx < self.tokens.len()) {
|
||||
return 0.into();
|
||||
}
|
||||
self.start_offsets[idx]
|
||||
}
|
||||
|
||||
/// Get the raw text of a token at given input position.
|
||||
pub fn token_text(&self, pos: InputPosition) -> &'t str {
|
||||
let idx = pos.0 as usize;
|
||||
if !(idx < self.tokens.len()) {
|
||||
return "";
|
||||
}
|
||||
let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
|
||||
&self.text[range]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
|
||||
pub(crate) struct InputPosition(u32);
|
||||
|
||||
impl InputPosition {
|
||||
pub fn new() -> Self {
|
||||
InputPosition(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<u32> for InputPosition {
|
||||
type Output = InputPosition;
|
||||
|
||||
fn add(self, rhs: u32) -> InputPosition {
|
||||
InputPosition(self.0 + rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign<u32> for InputPosition {
|
||||
fn add_assign(&mut self, rhs: u32) {
|
||||
self.0 += rhs
|
||||
}
|
||||
}
|
@ -4,10 +4,9 @@ use crate::{
|
||||
syntax_node::{GreenNode, SyntaxNode},
|
||||
syntax_error::SyntaxError,
|
||||
parsing::{
|
||||
grammar,
|
||||
parser_impl,
|
||||
grammar, parse_with,
|
||||
builder::GreenBuilder,
|
||||
parser_api::Parser,
|
||||
parser::Parser,
|
||||
lexer::{tokenize, Token},
|
||||
}
|
||||
};
|
||||
@ -62,8 +61,7 @@ fn reparse_block<'node>(
|
||||
if !is_balanced(&tokens) {
|
||||
return None;
|
||||
}
|
||||
let (green, new_errors) =
|
||||
parser_impl::parse_with(GreenBuilder::new(), &text, &tokens, reparser);
|
||||
let (green, new_errors) = parse_with(GreenBuilder::default(), &text, &tokens, reparser);
|
||||
Some((node, green, new_errors))
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::{TextRange, TextUnit};
|
||||
use crate::{TextRange, TextUnit, parsing::ParseError};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SyntaxError {
|
||||
@ -95,9 +95,6 @@ pub enum SyntaxErrorKind {
|
||||
InvalidMatchInnerAttr,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ParseError(pub String);
|
||||
|
||||
impl fmt::Display for SyntaxErrorKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::SyntaxErrorKind::*;
|
||||
|
Loading…
x
Reference in New Issue
Block a user