rust/crates/ra_syntax/src/lib.rs

283 lines
9.4 KiB
Rust
Raw Normal View History

2019-02-21 06:24:42 -06:00
//! Syntax Tree library used throughout the rust analyzer.
//!
2019-02-21 06:24:42 -06:00
//! Properties:
//! - easy and fast incremental re-parsing
//! - graceful handling of errors
//! - full-fidelity representation (*any* text can be precisely represented as
//! a syntax tree)
//!
2019-02-21 06:24:42 -06:00
//! For more information, see the [RFC]. Current implementation is inspired by
//! the [Swift] one.
//!
2019-02-21 06:24:42 -06:00
//! The most interesting modules here are `syntax_node` (which defines concrete
//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
//! CST). The actual parser live in a separate `ra_parser` crate, thought the
//! lexer lives in this crate.
//!
2019-02-21 08:35:45 -06:00
//! See `api_walkthrough` test in this file for a quick API tour!
//!
2019-02-21 06:24:42 -06:00
//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
mod syntax_node;
mod syntax_text;
mod syntax_error;
mod parsing;
mod validation;
2019-01-23 08:37:10 -06:00
mod ptr;
pub mod algo;
pub mod ast;
2019-03-21 12:05:12 -05:00
#[doc(hidden)]
pub mod fuzz;
2018-07-29 07:16:07 -05:00
2019-05-05 03:31:27 -05:00
use std::sync::Arc;
use ra_text_edit::AtomTextEdit;
use crate::syntax_node::GreenNode;
2018-12-06 11:49:36 -06:00
pub use rowan::{SmolStr, TextRange, TextUnit};
2019-02-21 04:37:32 -06:00
pub use ra_parser::SyntaxKind;
2019-05-08 10:35:32 -05:00
pub use ra_parser::T;
2018-10-15 11:55:32 -05:00
pub use crate::{
2018-08-25 03:40:17 -05:00
ast::AstNode,
2019-02-20 07:16:14 -06:00
syntax_error::{SyntaxError, SyntaxErrorKind, Location},
syntax_text::SyntaxText,
2019-04-21 09:47:55 -05:00
syntax_node::{Direction, SyntaxNode, WalkEvent, TreeArc, SyntaxTreeBuilder, SyntaxElement, SyntaxToken, InsertPosition},
2019-01-23 09:26:02 -06:00
ptr::{SyntaxNodePtr, AstPtr},
parsing::{tokenize, classify_literal, Token},
2018-07-29 07:16:07 -05:00
};
2019-05-05 03:31:27 -05:00
/// `Parse` is the result of the parsing: a syntax tree and a collection of
/// errors.
///
/// Note that we always produce a syntax tree, even for completely invalid
/// files.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parse {
pub tree: TreeArc<SourceFile>,
pub errors: Arc<Vec<SyntaxError>>,
}
impl Parse {
pub fn ok(self) -> Result<TreeArc<SourceFile>, Arc<Vec<SyntaxError>>> {
if self.errors.is_empty() {
Ok(self.tree)
} else {
Err(self.errors)
}
}
}
2018-08-25 04:10:35 -05:00
2019-01-07 07:15:47 -06:00
/// `SourceFile` represents a parse tree for a single Rust file.
pub use crate::ast::SourceFile;
2018-08-25 03:40:17 -05:00
2019-01-07 07:15:47 -06:00
impl SourceFile {
2019-05-28 08:34:23 -05:00
fn new(green: GreenNode) -> TreeArc<SourceFile> {
2019-05-05 03:33:07 -05:00
let root = SyntaxNode::new(green);
2018-09-08 10:34:41 -05:00
if cfg!(debug_assertions) {
2019-02-21 06:51:22 -06:00
validation::validate_block_structure(&root);
2018-09-08 10:34:41 -05:00
}
2018-11-07 09:38:43 -06:00
assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
TreeArc::cast(root)
2018-08-25 03:40:17 -05:00
}
2019-01-14 12:30:21 -06:00
pub fn parse(text: &str) -> TreeArc<SourceFile> {
2019-05-28 08:34:23 -05:00
let (green, _errors) = parsing::parse_text(text);
SourceFile::new(green)
2018-08-25 04:10:35 -05:00
}
pub fn reparse(&self, edit: &AtomTextEdit) -> TreeArc<SourceFile> {
2019-02-08 05:49:43 -06:00
self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit))
2018-08-25 05:17:54 -05:00
}
pub fn incremental_reparse(&self, edit: &AtomTextEdit) -> Option<TreeArc<SourceFile>> {
parsing::incremental_reparse(self.syntax(), edit, self.errors())
2019-05-28 08:34:23 -05:00
.map(|(green_node, _errors, _reparsed_range)| SourceFile::new(green_node))
2018-08-25 05:17:54 -05:00
}
fn full_reparse(&self, edit: &AtomTextEdit) -> TreeArc<SourceFile> {
2019-01-08 12:59:55 -06:00
let text = edit.apply(self.syntax().text().to_string());
2019-01-07 07:15:47 -06:00
SourceFile::parse(&text)
2018-08-25 03:40:17 -05:00
}
2018-08-25 03:40:17 -05:00
pub fn errors(&self) -> Vec<SyntaxError> {
2019-04-08 17:06:30 -05:00
let mut errors = self.syntax.root_data().to_vec();
errors.extend(validation::validate(self));
errors
2018-08-25 03:40:17 -05:00
}
2018-08-24 11:27:30 -05:00
}
2019-02-21 06:51:22 -06:00
2019-02-21 08:35:45 -06:00
/// This test does not assert anything and instead just shows off the crate's
/// API.
#[test]
fn api_walkthrough() {
use ast::{ModuleItemOwner, NameOwner};
let source_code = "
fn foo() {
1 + 1
}
";
// `SourceFile` is the main entry point.
//
// Note how `parse` does not return a `Result`: even completely invalid
// source code might be parsed.
let file = SourceFile::parse(source_code);
// Due to the way ownership is set up, owned syntax Nodes always live behind
// a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
// points to the whole file instead of an individual node.
let file: TreeArc<SourceFile> = file;
// `SourceFile` is the root of the syntax tree. We can iterate file's items:
let mut func = None;
for item in file.items() {
match item.kind() {
ast::ModuleItemKind::FnDef(f) => func = Some(f),
_ => unreachable!(),
}
}
// The returned items are always references.
let func: &ast::FnDef = func.unwrap();
// All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
// `to_owned` is a cheap operation: atomic increment.
let _owned_func: TreeArc<ast::FnDef> = func.to_owned();
// Each AST node has a bunch of getters for children. All getters return
// `Option`s though, to account for incomplete code. Some getters are common
// for several kinds of node. In this case, a trait like `ast::NameOwner`
// usually exists. By convention, all ast types should be used with `ast::`
// qualifier.
let name: Option<&ast::Name> = func.name();
let name = name.unwrap();
assert_eq!(name.text(), "foo");
// Let's get the `1 + 1` expression!
let block: &ast::Block = func.body().unwrap();
let expr: &ast::Expr = block.expr().unwrap();
// "Enum"-like nodes are represented using the "kind" pattern. It allows us
// to match exhaustively against all flavors of nodes, while maintaining
// internal representation flexibility. The drawback is that one can't write
// nested matches as one pattern.
let bin_expr: &ast::BinExpr = match expr.kind() {
ast::ExprKind::BinExpr(e) => e,
_ => unreachable!(),
};
// Besides the "typed" AST API, there's an untyped CST one as well.
// To switch from AST to CST, call `.syntax()` method:
let expr_syntax: &SyntaxNode = expr.syntax();
// Note how `expr` and `bin_expr` are in fact the same node underneath:
assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));
// To go from CST to AST, `AstNode::cast` function is used:
let expr = match ast::Expr::cast(expr_syntax) {
Some(e) => e,
None => unreachable!(),
};
// Note how expr is also a reference!
let expr: &ast::Expr = expr;
// This is possible because the underlying representation is the same:
assert_eq!(
expr as *const ast::Expr as *const u8,
expr_syntax as *const SyntaxNode as *const u8
);
// The two properties each syntax node has is a `SyntaxKind`:
assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
// And text range:
assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));
// You can get node's text as a `SyntaxText` object, which will traverse the
// tree collecting token's text:
let text: SyntaxText<'_> = expr_syntax.text();
assert_eq!(text.to_string(), "1 + 1");
// There's a bunch of traversal methods on `SyntaxNode`:
assert_eq!(expr_syntax.parent(), Some(block.syntax()));
2019-05-15 07:35:47 -05:00
assert_eq!(block.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
2019-03-30 05:25:53 -05:00
assert_eq!(
expr_syntax.next_sibling_or_token().map(|it| it.kind()),
Some(SyntaxKind::WHITESPACE)
);
2019-02-21 08:35:45 -06:00
// As well as some iterator helpers:
let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
assert_eq!(f, Some(&*func));
2019-05-15 07:35:47 -05:00
assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
2019-02-21 08:35:45 -06:00
assert_eq!(
2019-03-30 05:25:53 -05:00
expr_syntax.descendants_with_tokens().count(),
2019-02-21 08:35:45 -06:00
8, // 5 tokens `1`, ` `, `+`, ` `, `!`
// 2 child literal expressions: `1`, `1`
// 1 the node itself: `1 + 1`
);
// There's also a `preorder` method with a more fine-grained iteration control:
let mut buf = String::new();
let mut indent = 0;
2019-03-30 05:25:53 -05:00
for event in expr_syntax.preorder_with_tokens() {
2019-02-21 08:35:45 -06:00
match event {
WalkEvent::Enter(node) => {
2019-03-30 05:25:53 -05:00
let text = match node {
SyntaxElement::Node(it) => it.text().to_string(),
SyntaxElement::Token(it) => it.text().to_string(),
};
buf += &format!("{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
2019-02-21 08:35:45 -06:00
indent += 2;
}
WalkEvent::Leave(_) => indent -= 2,
}
}
assert_eq!(indent, 0);
assert_eq!(
buf.trim(),
r#"
"1 + 1" BIN_EXPR
"1" LITERAL
"1" INT_NUMBER
" " WHITESPACE
"+" PLUS
" " WHITESPACE
"1" LITERAL
"1" INT_NUMBER
"#
.trim()
);
// To recursively process the tree, there are three approaches:
// 1. explicitly call getter methods on AST nodes.
// 2. use descendants and `AstNode::cast`.
// 3. use descendants and the visitor.
//
// Here's how the first one looks like:
let exprs_cast: Vec<String> = file
.syntax()
.descendants()
.filter_map(ast::Expr::cast)
.map(|expr| expr.syntax().text().to_string())
.collect();
// An alternative is to use a visitor. The visitor does not do traversal
// automatically (so it's more akin to a generic lambda) and is constructed
// from closures. This seems more flexible than a single generated visitor
// trait.
use algo::visit::{visitor, Visitor};
let mut exprs_visit = Vec::new();
for node in file.syntax().descendants() {
if let Some(result) =
visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node)
{
exprs_visit.push(result);
}
}
assert_eq!(exprs_cast, exprs_visit);
}