rust/crates/parser/src/parser.rs
2023-02-07 18:08:05 +01:00

376 lines
12 KiB
Rust

//! See [`Parser`].
use std::cell::Cell;
use drop_bomb::DropBomb;
use limit::Limit;
use crate::{
event::Event,
input::Input,
SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
TokenSet, T,
};
/// `Parser` struct provides the low-level API for
/// navigating through the stream of tokens and
/// constructing the parse tree. The actual parsing
/// happens in the [`grammar`](super::grammar) module.
///
/// However, the result of this `Parser` is not a real
/// tree, but rather a flat stream of events of the form
/// "start expression, consume number literal,
/// finish expression". See `Event` docs for more.
pub(crate) struct Parser<'t> {
inp: &'t Input,
pos: usize,
events: Vec<Event>,
steps: Cell<u32>,
}
static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
impl<'t> Parser<'t> {
pub(super) fn new(inp: &'t Input) -> Parser<'t> {
Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) }
}
pub(crate) fn finish(self) -> Vec<Event> {
self.events
}
/// Returns the kind of the current token.
/// If parser has already reached the end of input,
/// the special `EOF` kind is returned.
pub(crate) fn current(&self) -> SyntaxKind {
self.nth(0)
}
/// Lookahead operation: returns the kind of the next nth
/// token.
pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
assert!(n <= 3);
let steps = self.steps.get();
assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
self.steps.set(steps + 1);
self.inp.kind(self.pos + n)
}
/// Checks if the current token is `kind`.
pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
self.nth_at(0, kind)
}
pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
match kind {
T![-=] => self.at_composite2(n, T![-], T![=]),
T![->] => self.at_composite2(n, T![-], T![>]),
T![::] => self.at_composite2(n, T![:], T![:]),
T![!=] => self.at_composite2(n, T![!], T![=]),
T![..] => self.at_composite2(n, T![.], T![.]),
T![*=] => self.at_composite2(n, T![*], T![=]),
T![/=] => self.at_composite2(n, T![/], T![=]),
T![&&] => self.at_composite2(n, T![&], T![&]),
T![&=] => self.at_composite2(n, T![&], T![=]),
T![%=] => self.at_composite2(n, T![%], T![=]),
T![^=] => self.at_composite2(n, T![^], T![=]),
T![+=] => self.at_composite2(n, T![+], T![=]),
T![<<] => self.at_composite2(n, T![<], T![<]),
T![<=] => self.at_composite2(n, T![<], T![=]),
T![==] => self.at_composite2(n, T![=], T![=]),
T![=>] => self.at_composite2(n, T![=], T![>]),
T![>=] => self.at_composite2(n, T![>], T![=]),
T![>>] => self.at_composite2(n, T![>], T![>]),
T![|=] => self.at_composite2(n, T![|], T![=]),
T![||] => self.at_composite2(n, T![|], T![|]),
T![...] => self.at_composite3(n, T![.], T![.], T![.]),
T![..=] => self.at_composite3(n, T![.], T![.], T![=]),
T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
_ => self.inp.kind(self.pos + n) == kind,
}
}
/// Consume the next token if `kind` matches.
pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
if !self.at(kind) {
return false;
}
let n_raw_tokens = match kind {
T![-=]
| T![->]
| T![::]
| T![!=]
| T![..]
| T![*=]
| T![/=]
| T![&&]
| T![&=]
| T![%=]
| T![^=]
| T![+=]
| T![<<]
| T![<=]
| T![==]
| T![=>]
| T![>=]
| T![>>]
| T![|=]
| T![||] => 2,
T![...] | T![..=] | T![<<=] | T![>>=] => 3,
_ => 1,
};
self.do_bump(kind, n_raw_tokens);
true
}
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
self.inp.kind(self.pos + n) == k1
&& self.inp.kind(self.pos + n + 1) == k2
&& self.inp.is_joint(self.pos + n)
}
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
self.inp.kind(self.pos + n) == k1
&& self.inp.kind(self.pos + n + 1) == k2
&& self.inp.kind(self.pos + n + 2) == k3
&& self.inp.is_joint(self.pos + n)
&& self.inp.is_joint(self.pos + n + 1)
}
/// Checks if the current token is in `kinds`.
pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
kinds.contains(self.current())
}
/// Checks if the current token is contextual keyword `kw`.
pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
self.inp.contextual_kind(self.pos) == kw
}
/// Checks if the nth token is contextual keyword `kw`.
pub(crate) fn nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool {
self.inp.contextual_kind(self.pos + n) == kw
}
/// Starts a new node in the syntax tree. All nodes and tokens
/// consumed between the `start` and the corresponding `Marker::complete`
/// belong to the same node.
pub(crate) fn start(&mut self) -> Marker {
let pos = self.events.len() as u32;
self.push_event(Event::tombstone());
Marker::new(pos)
}
/// Consume the next token. Panics if the parser isn't currently at `kind`.
pub(crate) fn bump(&mut self, kind: SyntaxKind) {
assert!(self.eat(kind));
}
/// Advances the parser by one token
pub(crate) fn bump_any(&mut self) {
let kind = self.nth(0);
if kind == EOF {
return;
}
self.do_bump(kind, 1);
}
/// Advances the parser by one token
pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) {
assert!(self.at(SyntaxKind::FLOAT_NUMBER));
// we have parse `<something>.`
// `<something>`.0.1
// here we need to insert an extra event
//
// `<something>`. 0. 1;
// here we need to change the follow up parse, the return value will cause us to emulate a dot
// the actual splitting happens later
let ends_in_dot = !self.inp.is_joint(self.pos);
if !ends_in_dot {
let new_marker = self.start();
let idx = marker.pos as usize;
match &mut self.events[idx] {
Event::Start { forward_parent, kind } => {
*kind = SyntaxKind::FIELD_EXPR;
*forward_parent = Some(new_marker.pos - marker.pos);
}
_ => unreachable!(),
}
marker.bomb.defuse();
marker = new_marker;
};
self.pos += 1 as usize;
self.push_event(Event::FloatSplitHack { ends_in_dot });
(ends_in_dot, marker)
}
/// Advances the parser by one token, remapping its kind.
/// This is useful to create contextual keywords from
/// identifiers. For example, the lexer creates a `union`
/// *identifier* token, but the parser remaps it to the
/// `union` keyword, and keyword is what ends up in the
/// final tree.
pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
if self.nth(0) == EOF {
// FIXME: panic!?
return;
}
self.do_bump(kind, 1);
}
/// Emit error with the `message`
/// FIXME: this should be much more fancy and support
/// structured errors with spans and notes, like rustc
/// does.
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
let msg = message.into();
self.push_event(Event::Error { msg });
}
/// Consume the next token if it is `kind` or emit an error
/// otherwise.
pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
if self.eat(kind) {
return true;
}
self.error(format!("expected {kind:?}"));
false
}
/// Create an error node and consume the next token.
pub(crate) fn err_and_bump(&mut self, message: &str) {
self.err_recover(message, TokenSet::EMPTY);
}
/// Create an error node and consume the next token.
pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
match self.current() {
T!['{'] | T!['}'] => {
self.error(message);
return;
}
_ => (),
}
if self.at_ts(recovery) {
self.error(message);
return;
}
let m = self.start();
self.error(message);
self.bump_any();
m.complete(self, ERROR);
}
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
self.pos += n_raw_tokens as usize;
self.steps.set(0);
self.push_event(Event::Token { kind, n_raw_tokens });
}
fn push_event(&mut self, event: Event) {
self.events.push(event);
}
}
/// See [`Parser::start`].
pub(crate) struct Marker {
pos: u32,
bomb: DropBomb,
}
impl Marker {
fn new(pos: u32) -> Marker {
Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
}
/// Finishes the syntax tree node and assigns `kind` to it,
/// and mark the create a `CompletedMarker` for possible future
/// operation like `.precede()` to deal with forward_parent.
pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
self.bomb.defuse();
let idx = self.pos as usize;
match &mut p.events[idx] {
Event::Start { kind: slot, .. } => {
*slot = kind;
}
_ => unreachable!(),
}
p.push_event(Event::Finish);
CompletedMarker::new(self.pos, kind)
}
/// Abandons the syntax tree node. All its children
/// are attached to its parent instead.
pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
self.bomb.defuse();
let idx = self.pos as usize;
if idx == p.events.len() - 1 {
match p.events.pop() {
Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
_ => unreachable!(),
}
}
}
}
pub(crate) struct CompletedMarker {
pos: u32,
kind: SyntaxKind,
}
impl CompletedMarker {
fn new(pos: u32, kind: SyntaxKind) -> Self {
CompletedMarker { pos, kind }
}
/// This method allows to create a new node which starts
/// *before* the current one. That is, parser could start
/// node `A`, then complete it, and then after parsing the
/// whole `A`, decide that it should have started some node
/// `B` before starting `A`. `precede` allows to do exactly
/// that. See also docs about
/// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
///
/// Given completed events `[START, FINISH]` and its corresponding
/// `CompletedMarker(pos: 0, _)`.
/// Append a new `START` events as `[START, FINISH, NEWSTART]`,
/// then mark `NEWSTART` as `START`'s parent with saving its relative
/// distance to `NEWSTART` into forward_parent(=2 in this case);
pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
let new_pos = p.start();
let idx = self.pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
*forward_parent = Some(new_pos.pos - self.pos);
}
_ => unreachable!(),
}
new_pos
}
/// Extends this completed marker *to the left* up to `m`.
pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
m.bomb.defuse();
let idx = m.pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
*forward_parent = Some(self.pos - m.pos);
}
_ => unreachable!(),
}
self
}
pub(crate) fn kind(&self) -> SyntaxKind {
self.kind
}
}