rust/crates/syntax/src/lib.rs

407 lines
12 KiB
Rust
Raw Normal View History

2019-02-21 06:24:42 -06:00
//! Syntax Tree library used throughout the rust analyzer.
//!
2019-02-21 06:24:42 -06:00
//! Properties:
//! - easy and fast incremental re-parsing
//! - graceful handling of errors
//! - full-fidelity representation (*any* text can be precisely represented as
//! a syntax tree)
//!
2019-02-21 06:24:42 -06:00
//! For more information, see the [RFC]. Current implementation is inspired by
//! the [Swift] one.
//!
2019-02-21 06:24:42 -06:00
//! The most interesting modules here are `syntax_node` (which defines concrete
//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
2020-08-12 10:06:49 -05:00
//! CST). The actual parser live in a separate `parser` crate, though the
2019-02-21 06:24:42 -06:00
//! lexer lives in this crate.
//!
2019-02-21 08:35:45 -06:00
//! See `api_walkthrough` test in this file for a quick API tour!
//!
2019-02-21 06:24:42 -06:00
//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
2020-04-11 12:25:33 -05:00
#[allow(unused)]
macro_rules! eprintln {
($($tt:tt)*) => { stdx::eprintln!($($tt)*) };
}
mod syntax_node;
mod syntax_error;
mod parsing;
mod validation;
2019-01-23 08:37:10 -06:00
mod ptr;
2019-07-24 04:38:21 -05:00
#[cfg(test)]
mod tests;
pub mod display;
pub mod algo;
pub mod ast;
2019-03-21 12:05:12 -05:00
#[doc(hidden)]
pub mod fuzz;
2018-07-29 07:16:07 -05:00
2020-03-28 05:08:19 -05:00
use std::{marker::PhantomData, sync::Arc};
2019-05-05 03:31:27 -05:00
2020-03-28 05:08:19 -05:00
use stdx::format_to;
2020-08-12 10:03:06 -05:00
use text_edit::Indel;
2019-05-05 03:31:27 -05:00
2018-10-15 11:55:32 -05:00
pub use crate::{
2019-07-21 05:08:32 -05:00
algo::InsertPosition,
2019-07-18 12:11:05 -05:00
ast::{AstNode, AstToken},
parsing::lexer::{lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token},
ptr::{AstPtr, SyntaxNodePtr},
syntax_error::SyntaxError,
syntax_node::{
SyntaxElement, SyntaxElementChildren, SyntaxNode, SyntaxNodeChildren, SyntaxToken,
SyntaxTreeBuilder,
},
2018-07-29 07:16:07 -05:00
};
2020-08-12 10:06:49 -05:00
pub use parser::{SyntaxKind, T};
pub use rowan::{
Direction, GreenNode, NodeOrToken, SmolStr, SyntaxText, TextRange, TextSize, TokenAtOffset,
WalkEvent,
};
2018-07-29 07:16:07 -05:00
2019-05-05 03:31:27 -05:00
/// `Parse` is the result of the parsing: a syntax tree and a collection of
/// errors.
///
/// Note that we always produce a syntax tree, even for completely invalid
/// files.
2019-07-18 14:29:20 -05:00
#[derive(Debug, PartialEq, Eq)]
2019-07-18 11:23:05 -05:00
pub struct Parse<T> {
green: GreenNode,
errors: Arc<Vec<SyntaxError>>,
2019-07-18 11:23:05 -05:00
_ty: PhantomData<fn() -> T>,
2019-05-05 03:31:27 -05:00
}
2019-07-18 11:23:05 -05:00
impl<T> Clone for Parse<T> {
2019-07-18 14:29:20 -05:00
fn clone(&self) -> Parse<T> {
2019-07-18 11:23:05 -05:00
Parse { green: self.green.clone(), errors: self.errors.clone(), _ty: PhantomData }
2019-07-18 14:29:20 -05:00
}
}
2019-07-18 11:23:05 -05:00
impl<T> Parse<T> {
fn new(green: GreenNode, errors: Vec<SyntaxError>) -> Parse<T> {
Parse { green, errors: Arc::new(errors), _ty: PhantomData }
2019-07-18 14:29:20 -05:00
}
2019-07-19 02:43:01 -05:00
pub fn syntax_node(&self) -> SyntaxNode {
2019-07-20 12:04:34 -05:00
SyntaxNode::new_root(self.green.clone())
2019-07-18 11:23:05 -05:00
}
}
impl<T: AstNode> Parse<T> {
pub fn to_syntax(self) -> Parse<SyntaxNode> {
Parse { green: self.green, errors: self.errors, _ty: PhantomData }
}
pub fn tree(&self) -> T {
T::cast(self.syntax_node()).unwrap()
}
pub fn errors(&self) -> &[SyntaxError] {
&*self.errors
}
2019-07-18 11:23:05 -05:00
pub fn ok(self) -> Result<T, Arc<Vec<SyntaxError>>> {
2019-05-05 03:31:27 -05:00
if self.errors.is_empty() {
2019-07-18 11:23:05 -05:00
Ok(self.tree())
2019-05-05 03:31:27 -05:00
} else {
Err(self.errors)
}
}
2019-07-18 14:29:20 -05:00
}
2019-05-28 08:59:22 -05:00
2019-07-18 11:23:05 -05:00
impl Parse<SyntaxNode> {
pub fn cast<N: AstNode>(self) -> Option<Parse<N>> {
if N::cast(self.syntax_node()).is_some() {
Some(Parse { green: self.green, errors: self.errors, _ty: PhantomData })
} else {
None
}
2019-07-18 15:19:04 -05:00
}
}
2019-07-18 14:29:20 -05:00
impl Parse<SourceFile> {
2019-05-28 08:59:22 -05:00
pub fn debug_dump(&self) -> String {
2019-07-20 04:48:24 -05:00
let mut buf = format!("{:#?}", self.tree().syntax());
2019-05-28 08:59:22 -05:00
for err in self.errors.iter() {
2020-03-28 05:08:19 -05:00
format_to!(buf, "error {:?}: {}\n", err.range(), err);
2019-05-28 08:59:22 -05:00
}
buf
}
2019-05-28 09:34:28 -05:00
2020-05-05 16:15:49 -05:00
pub fn reparse(&self, indel: &Indel) -> Parse<SourceFile> {
self.incremental_reparse(indel).unwrap_or_else(|| self.full_reparse(indel))
2019-07-18 14:29:20 -05:00
}
2020-05-05 16:15:49 -05:00
fn incremental_reparse(&self, indel: &Indel) -> Option<Parse<SourceFile>> {
2019-05-28 09:34:28 -05:00
// FIXME: validation errors are not handled here
2020-05-05 16:15:49 -05:00
parsing::incremental_reparse(self.tree().syntax(), indel, self.errors.to_vec()).map(
2019-05-28 09:34:28 -05:00
|(green_node, errors, _reparsed_range)| Parse {
2019-07-18 11:23:05 -05:00
green: green_node,
2019-05-28 09:34:28 -05:00
errors: Arc::new(errors),
2019-07-18 11:23:05 -05:00
_ty: PhantomData,
2019-05-28 09:34:28 -05:00
},
)
}
2020-05-05 16:15:49 -05:00
fn full_reparse(&self, indel: &Indel) -> Parse<SourceFile> {
let mut text = self.tree().syntax().text().to_string();
indel.apply(&mut text);
2019-05-28 09:34:28 -05:00
SourceFile::parse(&text)
}
2019-05-05 03:31:27 -05:00
}
2018-08-25 04:10:35 -05:00
2019-01-07 07:15:47 -06:00
/// `SourceFile` represents a parse tree for a single Rust file.
pub use crate::ast::SourceFile;
2018-08-25 03:40:17 -05:00
2019-01-07 07:15:47 -06:00
impl SourceFile {
2019-07-21 05:34:15 -05:00
pub fn parse(text: &str) -> Parse<SourceFile> {
let (green, mut errors) = parsing::parse_text(text);
let root = SyntaxNode::new_root(green.clone());
2018-09-08 10:34:41 -05:00
if cfg!(debug_assertions) {
2019-02-21 06:51:22 -06:00
validation::validate_block_structure(&root);
2018-09-08 10:34:41 -05:00
}
2019-01-14 12:30:21 -06:00
2019-07-21 05:34:15 -05:00
errors.extend(validation::validate(&root));
assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
2019-07-18 11:23:05 -05:00
Parse { green, errors: Arc::new(errors), _ty: PhantomData }
2019-05-28 08:59:22 -05:00
}
2018-08-24 11:27:30 -05:00
}
2019-02-21 06:51:22 -06:00
impl ast::Path {
/// Returns `text`, parsed as a path, but only if it has no errors.
pub fn parse(text: &str) -> Result<Self, ()> {
2020-08-12 10:06:49 -05:00
parsing::parse_text_fragment(text, parser::FragmentKind::Path)
}
}
impl ast::Pat {
/// Returns `text`, parsed as a pattern, but only if it has no errors.
pub fn parse(text: &str) -> Result<Self, ()> {
2020-08-12 10:06:49 -05:00
parsing::parse_text_fragment(text, parser::FragmentKind::Pattern)
}
}
impl ast::Expr {
/// Returns `text`, parsed as an expression, but only if it has no errors.
pub fn parse(text: &str) -> Result<Self, ()> {
2020-08-12 10:06:49 -05:00
parsing::parse_text_fragment(text, parser::FragmentKind::Expr)
}
}
2020-07-29 17:23:03 -05:00
impl ast::Item {
/// Returns `text`, parsed as an item, but only if it has no errors.
pub fn parse(text: &str) -> Result<Self, ()> {
2020-08-12 10:06:49 -05:00
parsing::parse_text_fragment(text, parser::FragmentKind::Item)
}
}
impl ast::Type {
/// Returns `text`, parsed as an type reference, but only if it has no errors.
pub fn parse(text: &str) -> Result<Self, ()> {
2020-08-12 10:06:49 -05:00
parsing::parse_text_fragment(text, parser::FragmentKind::Type)
}
}
2020-12-18 11:58:42 -06:00
impl ast::Attr {
/// Returns `text`, parsed as an attribute, but only if it has no errors.
pub fn parse(text: &str) -> Result<Self, ()> {
parsing::parse_text_fragment(text, parser::FragmentKind::Attr)
}
}
impl ast::Stmt {
/// Returns `text`, parsed as statement, but only if it has no errors.
pub fn parse(text: &str) -> Result<Self, ()> {
parsing::parse_text_fragment(text, parser::FragmentKind::StatementOptionalSemi)
}
}
2019-10-30 13:38:45 -05:00
/// Matches a `SyntaxNode` against an `ast` type.
///
/// # Example:
///
/// ```ignore
/// match_ast! {
/// match node {
/// ast::CallExpr(it) => { ... },
/// ast::MethodCallExpr(it) => { ... },
/// ast::MacroCall(it) => { ... },
/// _ => None,
/// }
/// }
/// ```
2019-10-05 09:48:31 -05:00
#[macro_export]
macro_rules! match_ast {
(match $node:ident { $($tt:tt)* }) => { match_ast!(match ($node) { $($tt)* }) };
(match ($node:expr) {
2020-03-18 14:51:47 -05:00
$( ast::$ast:ident($it:ident) => $res:expr, )*
_ => $catch_all:expr $(,)?
2019-10-05 09:48:31 -05:00
}) => {{
2020-03-18 14:51:47 -05:00
$( if let Some($it) = ast::$ast::cast($node.clone()) { $res } else )*
2019-10-05 09:48:31 -05:00
{ $catch_all }
}};
}
2019-02-21 08:35:45 -06:00
/// This test does not assert anything and instead just shows off the crate's
/// API.
#[test]
fn api_walkthrough() {
use ast::{ModuleItemOwner, NameOwner};
let source_code = "
fn foo() {
1 + 1
}
";
// `SourceFile` is the main entry point.
//
2019-05-28 09:34:28 -05:00
// The `parse` method returns a `Parse` -- a pair of syntax tree and a list
// of errors. That is, syntax tree is constructed even in presence of errors.
let parse = SourceFile::parse(source_code);
2019-07-18 11:23:05 -05:00
assert!(parse.errors().is_empty());
2019-02-21 08:35:45 -06:00
2019-07-18 11:23:05 -05:00
// The `tree` method returns an owned syntax node of type `SourceFile`.
// Owned nodes are cheap: inside, they are `Rc` handles to the underling data.
let file: SourceFile = parse.tree();
2019-02-21 08:35:45 -06:00
2019-07-18 11:23:05 -05:00
// `SourceFile` is the root of the syntax tree. We can iterate file's items.
// Let's fetch the `foo` function.
2019-02-21 08:35:45 -06:00
let mut func = None;
for item in file.items() {
match item {
2020-07-30 07:51:08 -05:00
ast::Item::Fn(f) => func = Some(f),
2019-02-21 08:35:45 -06:00
_ => unreachable!(),
}
}
2020-07-30 07:51:08 -05:00
let func: ast::Fn = func.unwrap();
2019-02-21 08:35:45 -06:00
// Each AST node has a bunch of getters for children. All getters return
// `Option`s though, to account for incomplete code. Some getters are common
// for several kinds of node. In this case, a trait like `ast::NameOwner`
// usually exists. By convention, all ast types should be used with `ast::`
// qualifier.
2019-07-18 11:23:05 -05:00
let name: Option<ast::Name> = func.name();
2019-02-21 08:35:45 -06:00
let name = name.unwrap();
assert_eq!(name.text(), "foo");
// Let's get the `1 + 1` expression!
2019-09-02 11:42:14 -05:00
let body: ast::BlockExpr = func.body().unwrap();
2020-05-01 18:18:19 -05:00
let expr: ast::Expr = body.expr().unwrap();
2019-02-21 08:35:45 -06:00
// Enums are used to group related ast nodes together, and can be used for
// matching. However, because there are no public fields, it's possible to
// match only the top level enum: that is the price we pay for increased API
// flexibility
let bin_expr: &ast::BinExpr = match &expr {
ast::Expr::BinExpr(e) => e,
2019-02-21 08:35:45 -06:00
_ => unreachable!(),
};
// Besides the "typed" AST API, there's an untyped CST one as well.
// To switch from AST to CST, call `.syntax()` method:
let expr_syntax: &SyntaxNode = expr.syntax();
// Note how `expr` and `bin_expr` are in fact the same node underneath:
2019-07-18 11:23:05 -05:00
assert!(expr_syntax == bin_expr.syntax());
2019-02-21 08:35:45 -06:00
// To go from CST to AST, `AstNode::cast` function is used:
2019-07-18 11:23:05 -05:00
let _expr: ast::Expr = match ast::Expr::cast(expr_syntax.clone()) {
2019-02-21 08:35:45 -06:00
Some(e) => e,
None => unreachable!(),
};
// The two properties each syntax node has is a `SyntaxKind`:
assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
// And text range:
2020-04-24 16:40:41 -05:00
assert_eq!(expr_syntax.text_range(), TextRange::new(32.into(), 37.into()));
2019-02-21 08:35:45 -06:00
// You can get node's text as a `SyntaxText` object, which will traverse the
// tree collecting token's text:
2019-07-19 13:41:36 -05:00
let text: SyntaxText = expr_syntax.text();
2019-02-21 08:35:45 -06:00
assert_eq!(text.to_string(), "1 + 1");
// There's a bunch of traversal methods on `SyntaxNode`:
2020-05-01 18:18:19 -05:00
assert_eq!(expr_syntax.parent().as_ref(), Some(body.syntax()));
assert_eq!(body.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
2019-03-30 05:25:53 -05:00
assert_eq!(
expr_syntax.next_sibling_or_token().map(|it| it.kind()),
Some(SyntaxKind::WHITESPACE)
);
2019-02-21 08:35:45 -06:00
// As well as some iterator helpers:
2020-07-30 07:51:08 -05:00
let f = expr_syntax.ancestors().find_map(ast::Fn::cast);
2019-07-18 11:23:05 -05:00
assert_eq!(f, Some(func));
2019-05-15 07:35:47 -05:00
assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
2019-02-21 08:35:45 -06:00
assert_eq!(
2019-03-30 05:25:53 -05:00
expr_syntax.descendants_with_tokens().count(),
2019-02-21 08:35:45 -06:00
8, // 5 tokens `1`, ` `, `+`, ` `, `!`
// 2 child literal expressions: `1`, `1`
// 1 the node itself: `1 + 1`
);
// There's also a `preorder` method with a more fine-grained iteration control:
let mut buf = String::new();
let mut indent = 0;
2019-03-30 05:25:53 -05:00
for event in expr_syntax.preorder_with_tokens() {
2019-02-21 08:35:45 -06:00
match event {
WalkEvent::Enter(node) => {
2019-07-18 11:23:05 -05:00
let text = match &node {
2019-07-20 12:04:34 -05:00
NodeOrToken::Node(it) => it.text().to_string(),
NodeOrToken::Token(it) => it.text().to_string(),
2019-03-30 05:25:53 -05:00
};
2020-03-28 05:08:19 -05:00
format_to!(buf, "{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
2019-02-21 08:35:45 -06:00
indent += 2;
}
WalkEvent::Leave(_) => indent -= 2,
}
}
assert_eq!(indent, 0);
assert_eq!(
buf.trim(),
r#"
"1 + 1" BIN_EXPR
"1" LITERAL
"1" INT_NUMBER
" " WHITESPACE
"+" PLUS
" " WHITESPACE
"1" LITERAL
"1" INT_NUMBER
"#
.trim()
);
// To recursively process the tree, there are three approaches:
// 1. explicitly call getter methods on AST nodes.
// 2. use descendants and `AstNode::cast`.
2019-10-05 09:48:31 -05:00
// 3. use descendants and `match_ast!`.
2019-02-21 08:35:45 -06:00
//
// Here's how the first one looks like:
let exprs_cast: Vec<String> = file
.syntax()
.descendants()
.filter_map(ast::Expr::cast)
.map(|expr| expr.syntax().text().to_string())
.collect();
2019-10-05 09:48:31 -05:00
// An alternative is to use a macro.
2019-02-21 08:35:45 -06:00
let mut exprs_visit = Vec::new();
for node in file.syntax().descendants() {
2019-10-05 09:48:31 -05:00
match_ast! {
match node {
ast::Expr(it) => {
let res = it.syntax().text().to_string();
exprs_visit.push(res);
},
_ => (),
}
2019-02-21 08:35:45 -06:00
}
}
assert_eq!(exprs_cast, exprs_visit);
}