rust/crates/parser/src/lib.rs

154 lines
4.7 KiB
Rust
Raw Normal View History

2019-02-21 06:24:42 -06:00
//! The Rust parser.
//!
//! The parser doesn't know about concrete representation of tokens and syntax
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead.
//! As a consequence, this crate does not contain a lexer.
2019-02-21 06:24:42 -06:00
//!
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
//! sequence of tokens. Parsing routines use [`Parser`] to inspect current
//! state and advance the parsing.
2019-02-21 06:24:42 -06:00
//!
//! The actual parsing happens in the [`grammar`] module.
2019-02-21 06:24:42 -06:00
//!
//! Tests for this crate live in the `syntax` crate.
//!
//! [`Parser`]: crate::parser::Parser
2019-02-21 06:24:42 -06:00
#![allow(rustdoc::private_intra_doc_links)]
2019-02-21 04:27:45 -06:00
#[macro_use]
mod token_set;
2019-05-08 10:35:32 -05:00
#[macro_use]
2019-02-21 04:27:45 -06:00
mod syntax_kind;
mod event;
mod parser;
mod grammar;
pub(crate) use token_set::TokenSet;
pub use syntax_kind::SyntaxKind;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ParseError(pub Box<String>);
2019-02-21 04:27:45 -06:00
/// `TokenSource` abstracts the source of the tokens parser operates on.
2019-02-21 06:24:42 -06:00
///
/// Hopefully this will allow us to treat text and token trees in the same way!
pub trait TokenSource {
2019-05-25 07:31:53 -05:00
fn current(&self) -> Token;
/// Lookahead n token
fn lookahead_nth(&self, n: usize) -> Token;
/// bump cursor to next token
fn bump(&mut self);
/// Is the current token a specified keyword?
fn is_keyword(&self, kw: &str) -> bool;
}
/// `Token` abstracts the cursor of `TokenSource` operates on.
2019-05-25 07:31:53 -05:00
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Token {
2019-02-21 06:24:42 -06:00
/// What is the current token?
2019-05-25 07:31:53 -05:00
pub kind: SyntaxKind,
2019-02-21 06:24:42 -06:00
/// Is the current token joined to the next one (`> >` vs `>>`).
2019-05-25 07:31:53 -05:00
pub is_jointed_to_next: bool,
2019-02-21 06:24:42 -06:00
}
2019-02-21 04:27:45 -06:00
/// `TreeSink` abstracts details of a particular syntax tree implementation.
pub trait TreeSink {
2019-03-30 05:25:53 -05:00
/// Adds new token to the current branch.
fn token(&mut self, kind: SyntaxKind, n_tokens: u8);
2019-02-21 04:27:45 -06:00
/// Start new branch and make it current.
2019-03-30 05:25:53 -05:00
fn start_node(&mut self, kind: SyntaxKind);
2019-02-21 04:27:45 -06:00
/// Finish current branch and restore previous
/// branch as current.
2019-03-30 05:25:53 -05:00
fn finish_node(&mut self);
2019-02-21 04:27:45 -06:00
fn error(&mut self, error: ParseError);
}
2019-05-25 07:31:53 -05:00
fn parse_from_tokens<F>(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink, f: F)
where
F: FnOnce(&mut parser::Parser),
{
2019-02-21 04:27:45 -06:00
let mut p = parser::Parser::new(token_source);
f(&mut p);
2019-02-21 04:27:45 -06:00
let events = p.finish();
event::process(tree_sink, events);
}
/// Parse given tokens into the given sink as a rust file.
2019-05-25 07:31:53 -05:00
pub fn parse(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
parse_from_tokens(token_source, tree_sink, grammar::root);
}
2020-03-02 00:05:15 -06:00
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
2019-09-02 10:51:03 -05:00
pub enum FragmentKind {
Path,
Expr,
Statement,
StatementOptionalSemi,
2019-09-02 10:51:03 -05:00
Type,
Pattern,
Item,
Block,
Visibility,
MetaItem,
Items,
Statements,
2020-12-18 11:58:42 -06:00
Attr,
2019-09-02 10:51:03 -05:00
}
pub fn parse_fragment(
2019-05-25 07:31:53 -05:00
token_source: &mut dyn TokenSource,
tree_sink: &mut dyn TreeSink,
2019-09-02 10:51:03 -05:00
fragment_kind: FragmentKind,
2019-05-25 07:31:53 -05:00
) {
2019-09-02 10:51:03 -05:00
let parser: fn(&'_ mut parser::Parser) = match fragment_kind {
FragmentKind::Path => grammar::fragments::path,
FragmentKind::Expr => grammar::fragments::expr,
FragmentKind::Type => grammar::fragments::type_,
FragmentKind::Pattern => grammar::fragments::pattern_single,
2019-09-02 10:51:03 -05:00
FragmentKind::Item => grammar::fragments::item,
2020-05-02 07:34:39 -05:00
FragmentKind::Block => grammar::fragments::block_expr,
2019-09-02 10:51:03 -05:00
FragmentKind::Visibility => grammar::fragments::opt_visibility,
FragmentKind::MetaItem => grammar::fragments::meta_item,
FragmentKind::Statement => grammar::fragments::stmt,
FragmentKind::StatementOptionalSemi => grammar::fragments::stmt_optional_semi,
2019-09-02 10:51:03 -05:00
FragmentKind::Items => grammar::fragments::macro_items,
FragmentKind::Statements => grammar::fragments::macro_stmts,
2020-12-18 11:58:42 -06:00
FragmentKind::Attr => grammar::fragments::attr,
2019-09-02 10:51:03 -05:00
};
parse_from_tokens(token_source, tree_sink, parser)
2019-04-18 14:49:56 -05:00
}
2019-02-21 06:24:42 -06:00
/// A parsing function for a specific braced-block.
2019-02-21 04:27:45 -06:00
pub struct Reparser(fn(&mut parser::Parser));
impl Reparser {
2019-02-21 06:24:42 -06:00
/// If the node is a braced block, return the corresponding `Reparser`.
2019-02-21 04:27:45 -06:00
pub fn for_node(
node: SyntaxKind,
first_child: Option<SyntaxKind>,
parent: Option<SyntaxKind>,
) -> Option<Reparser> {
grammar::reparser(node, first_child, parent).map(Reparser)
}
2019-02-21 06:24:42 -06:00
/// Re-parse given tokens using this `Reparser`.
///
/// Tokens must start with `{`, end with `}` and form a valid brace
/// sequence.
2019-05-25 07:31:53 -05:00
pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
2019-02-21 04:37:32 -06:00
let Reparser(r) = self;
let mut p = parser::Parser::new(token_source);
r(&mut p);
let events = p.finish();
event::process(tree_sink, events);
}
2019-02-21 04:27:45 -06:00
}