rust/crates/syntax/src/parsing/reparsing.rs

450 lines
11 KiB
Rust
Raw Normal View History

2019-02-21 06:24:42 -06:00
//! Implementation of incremental re-parsing.
//!
//! We use two simple strategies for this:
//! - if the edit modifies only a single token (like changing an identifier's
//! letter), we replace only this token.
//! - otherwise, we search for the nearest `{}` block which contains the edit
//! and try to parse only this block.
2020-08-12 10:06:49 -05:00
use parser::Reparser;
2020-08-12 10:03:06 -05:00
use text_edit::Indel;
2019-02-21 04:37:32 -06:00
use crate::{
parsing::{
lexer::{lex_single_syntax_kind, tokenize, Token},
2019-02-23 07:07:29 -06:00
text_token_source::TextTokenSource,
text_tree_sink::TextTreeSink,
2019-05-15 07:35:47 -05:00
},
2019-07-20 12:04:34 -05:00
syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode},
SyntaxError,
SyntaxKind::*,
2020-04-24 16:40:41 -05:00
TextRange, TextSize, T,
};
pub(crate) fn incremental_reparse(
2019-01-07 07:15:47 -06:00
node: &SyntaxNode,
2020-05-05 16:15:49 -05:00
edit: &Indel,
errors: Vec<SyntaxError>,
2019-03-30 05:25:53 -05:00
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
2021-06-12 22:54:16 -05:00
if let Some((green, new_errors, old_range)) = reparse_token(node, edit) {
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
2019-03-30 05:25:53 -05:00
}
2021-06-12 22:54:16 -05:00
if let Some((green, new_errors, old_range)) = reparse_block(node, edit) {
2019-03-30 05:25:53 -05:00
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
}
None
}
fn reparse_token(
root: &SyntaxNode,
2020-05-05 16:15:49 -05:00
edit: &Indel,
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
2021-01-15 11:15:33 -06:00
let prev_token = root.covering_element(edit.delete).as_token()?.clone();
let prev_token_kind = prev_token.kind();
match prev_token_kind {
WHITESPACE | COMMENT | IDENT | STRING => {
if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT {
// removing a new line may extends previous token
let deleted_range = edit.delete - prev_token.text_range().start();
if prev_token.text()[deleted_range].contains('\n') {
return None;
}
}
2021-06-12 22:54:16 -05:00
let mut new_text = get_text_after_edit(prev_token.clone().into(), edit);
let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
if new_token_kind != prev_token_kind
|| (new_token_kind == IDENT && is_contextual_kw(&new_text))
{
return None;
}
// Check that edited token is not a part of the bigger token.
// E.g. if for source code `bruh"str"` the user removed `ruh`, then
// `b` no longer remains an identifier, but becomes a part of byte string literal
if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
new_text.push(next_char);
let token_with_next_char = lex_single_syntax_kind(&new_text);
if let Some((_kind, _error)) = token_with_next_char {
return None;
}
new_text.pop();
}
let new_token = GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), &new_text);
Some((
prev_token.replace_with(new_token),
new_err.into_iter().collect(),
prev_token.text_range(),
))
}
_ => None,
}
}
fn reparse_block(
root: &SyntaxNode,
2020-05-05 16:15:49 -05:00
edit: &Indel,
2019-03-30 05:25:53 -05:00
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
let (node, reparser) = find_reparsable_node(root, edit.delete)?;
let text = get_text_after_edit(node.clone().into(), edit);
let (tokens, new_lexer_errors) = tokenize(&text);
if !is_balanced(&tokens) {
return None;
}
2019-05-25 07:31:53 -05:00
let mut token_source = TextTokenSource::new(&text, &tokens);
let mut tree_sink = TextTreeSink::new(&text, &tokens);
2019-05-25 07:31:53 -05:00
reparser.parse(&mut token_source, &mut tree_sink);
let (green, mut new_parser_errors) = tree_sink.finish();
new_parser_errors.extend(new_lexer_errors);
Some((node.replace_with(green), new_parser_errors, node.text_range()))
}
2020-05-05 16:15:49 -05:00
fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String {
let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone());
let mut text = match element {
2019-07-20 12:04:34 -05:00
NodeOrToken::Token(token) => token.text().to_string(),
NodeOrToken::Node(node) => node.text().to_string(),
2019-03-30 05:25:53 -05:00
};
edit.apply(&mut text);
text
}
fn is_contextual_kw(text: &str) -> bool {
2020-06-27 20:02:03 -05:00
matches!(text, "auto" | "default" | "union")
}
2019-07-18 11:23:05 -05:00
fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> {
2021-01-15 11:15:33 -06:00
let node = node.covering_element(range);
node.ancestors().find_map(|node| {
2019-03-30 05:25:53 -05:00
let first_child = node.first_child_or_token().map(|it| it.kind());
let parent = node.parent().map(|it| it.kind());
2019-02-21 04:37:32 -06:00
Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r))
})
}
fn is_balanced(tokens: &[Token]) -> bool {
2018-10-17 18:25:37 -05:00
if tokens.is_empty()
2019-05-15 07:35:47 -05:00
|| tokens.first().unwrap().kind != T!['{']
|| tokens.last().unwrap().kind != T!['}']
{
return false;
}
let mut balance = 0usize;
2019-03-21 14:33:05 -05:00
for t in &tokens[1..tokens.len() - 1] {
match t.kind {
2019-05-15 07:35:47 -05:00
T!['{'] => balance += 1,
T!['}'] => {
balance = match balance.checked_sub(1) {
Some(b) => b,
None => return false,
}
}
_ => (),
}
}
balance == 0
}
fn merge_errors(
old_errors: Vec<SyntaxError>,
new_errors: Vec<SyntaxError>,
range_before_reparse: TextRange,
2020-05-05 16:15:49 -05:00
edit: &Indel,
) -> Vec<SyntaxError> {
let mut res = Vec::new();
for old_err in old_errors {
let old_err_range = old_err.range();
if old_err_range.end() <= range_before_reparse.start() {
res.push(old_err);
} else if old_err_range.start() >= range_before_reparse.end() {
2020-04-24 16:40:41 -05:00
let inserted_len = TextSize::of(&edit.insert);
res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
// Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
}
}
res.extend(new_errors.into_iter().map(|new_err| {
// fighting borrow checker with a variable ;)
let offseted_range = new_err.range() + range_before_reparse.start();
new_err.with_range(offseted_range)
}));
res
}
2018-09-15 07:35:30 -05:00
#[cfg(test)]
mod tests {
use test_utils::{assert_eq_text, extract_range};
2018-12-06 12:16:37 -06:00
use super::*;
use crate::{AstNode, Parse, SourceFile};
2018-09-15 07:35:30 -05:00
2019-03-30 05:25:53 -05:00
fn do_check(before: &str, replace_with: &str, reparsed_len: u32) {
let (range, before) = extract_range(before);
2020-05-05 16:15:49 -05:00
let edit = Indel::replace(range, replace_with.to_owned());
let after = {
let mut after = before.clone();
edit.apply(&mut after);
after
};
2018-09-15 07:35:30 -05:00
2019-05-28 09:34:28 -05:00
let fully_reparsed = SourceFile::parse(&after);
2019-07-18 11:23:05 -05:00
let incrementally_reparsed: Parse<SourceFile> = {
let before = SourceFile::parse(&before);
2019-03-30 05:25:53 -05:00
let (green, new_errors, range) =
incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
2019-03-30 05:25:53 -05:00
assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
2019-07-18 11:23:05 -05:00
Parse::new(green, new_errors)
};
assert_eq_text!(
2019-07-20 04:48:24 -05:00
&format!("{:#?}", fully_reparsed.tree().syntax()),
&format!("{:#?}", incrementally_reparsed.tree().syntax()),
2019-03-30 05:25:53 -05:00
);
assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
}
2019-03-30 05:25:53 -05:00
#[test] // FIXME: some test here actually test token reparsing
fn reparse_block_tests() {
do_check(
r"
2018-09-15 07:35:30 -05:00
fn foo() {
2021-01-06 14:15:48 -06:00
let x = foo + $0bar$0
2018-09-15 07:35:30 -05:00
}
",
"baz",
2019-03-30 05:25:53 -05:00
3,
);
do_check(
r"
2018-09-15 07:35:30 -05:00
fn foo() {
2021-01-06 14:15:48 -06:00
let x = foo$0 + bar$0
2018-09-15 07:35:30 -05:00
}
",
"baz",
2019-03-30 05:25:53 -05:00
25,
);
do_check(
r"
2018-09-15 07:35:30 -05:00
struct Foo {
2021-01-06 14:15:48 -06:00
f: foo$0$0
2018-09-15 07:35:30 -05:00
}
",
",\n g: (),",
2019-03-30 05:25:53 -05:00
14,
);
do_check(
r"
2018-09-15 07:35:30 -05:00
fn foo {
let;
1 + 1;
2021-01-06 14:15:48 -06:00
$092$0;
2018-09-15 07:35:30 -05:00
}
",
"62",
2019-03-30 05:25:53 -05:00
31, // FIXME: reparse only int literal here
);
do_check(
r"
2018-09-15 07:35:30 -05:00
mod foo {
2021-01-06 14:15:48 -06:00
fn $0$0
2018-09-15 07:35:30 -05:00
}
",
"bar",
2019-03-30 05:25:53 -05:00
11,
);
2019-03-30 05:25:53 -05:00
do_check(
r"
2018-09-15 07:35:30 -05:00
trait Foo {
2021-01-06 14:15:48 -06:00
type $0Foo$0;
2018-09-15 07:35:30 -05:00
}
",
"Output",
2019-03-30 05:25:53 -05:00
3,
);
do_check(
r"
2018-09-15 07:35:30 -05:00
impl IntoIterator<Item=i32> for Foo {
2021-01-06 14:15:48 -06:00
f$0$0
2018-09-15 07:35:30 -05:00
}
",
"n next(",
2019-03-30 05:25:53 -05:00
9,
);
2021-01-06 14:15:48 -06:00
do_check(r"use a::b::{foo,$0,bar$0};", "baz", 10);
do_check(
r"
2018-09-15 07:35:30 -05:00
pub enum A {
2021-01-06 14:15:48 -06:00
Foo$0$0
2018-09-15 07:35:30 -05:00
}
",
"\nBar;\n",
2019-03-30 05:25:53 -05:00
11,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
foo!{a, b$0$0 d}
",
", c[3]",
2019-03-30 05:25:53 -05:00
8,
);
do_check(
r"
2018-09-15 07:35:30 -05:00
fn foo() {
2021-01-06 14:15:48 -06:00
vec![$0$0]
2018-09-15 07:35:30 -05:00
}
",
"123",
2019-03-30 05:25:53 -05:00
14,
);
do_check(
r"
2018-09-15 07:35:30 -05:00
extern {
2021-01-06 14:15:48 -06:00
fn$0;$0
2018-09-15 07:35:30 -05:00
}
",
" exit(code: c_int)",
2019-03-30 05:25:53 -05:00
11,
);
}
#[test]
2019-03-30 05:25:53 -05:00
fn reparse_token_tests() {
do_check(
2021-01-06 14:15:48 -06:00
r"$0$0
fn foo() -> i32 { 1 }
",
"\n\n\n \n",
2019-03-30 05:25:53 -05:00
1,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
fn foo() -> $0$0 {}
",
" \n",
2019-03-30 05:25:53 -05:00
2,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
fn $0foo$0() -> i32 { 1 }
",
"bar",
2019-03-30 05:25:53 -05:00
3,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
fn foo$0$0foo() { }
",
"bar",
2019-03-30 05:25:53 -05:00
6,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
fn foo /* $0$0 */ () {}
",
"some comment",
2019-03-30 05:25:53 -05:00
6,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
fn baz $0$0 () {}
",
" \t\t\n\n",
2019-03-30 05:25:53 -05:00
2,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
fn baz $0$0 () {}
",
" \t\t\n\n",
2019-03-30 05:25:53 -05:00
2,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
/// foo $0$0omment
mod { }
",
"c",
2019-03-30 05:25:53 -05:00
14,
);
do_check(
r#"
2021-01-06 14:15:48 -06:00
fn -> &str { "Hello$0$0" }
"#,
", world",
2019-03-30 05:25:53 -05:00
7,
);
do_check(
r#"
2021-01-06 14:15:48 -06:00
fn -> &str { // "Hello$0$0"
"#,
", world",
2019-03-30 05:25:53 -05:00
10,
);
do_check(
r##"
2021-01-06 14:15:48 -06:00
fn -> &str { r#"Hello$0$0"#
"##,
", world",
2019-03-30 05:25:53 -05:00
10,
);
do_check(
r"
2021-01-06 14:15:48 -06:00
#[derive($0Copy$0)]
enum Foo {
}
",
"Clone",
2019-03-30 05:25:53 -05:00
4,
);
2018-09-15 07:35:30 -05:00
}
#[test]
fn reparse_str_token_with_error_unchanged() {
2021-01-06 14:15:48 -06:00
do_check(r#""$0Unclosed$0 string literal"#, "Still unclosed", 24);
}
#[test]
fn reparse_str_token_with_error_fixed() {
2021-01-06 14:15:48 -06:00
do_check(r#""unterinated$0$0"#, "\"", 12);
}
#[test]
fn reparse_block_with_error_in_middle_unchanged() {
do_check(
r#"fn main() {
if {}
2021-01-06 14:15:48 -06:00
32 + 4$0$0
return
if {}
}"#,
"23",
105,
)
}
#[test]
fn reparse_block_with_error_in_middle_fixed() {
do_check(
r#"fn main() {
if {}
2021-01-06 14:15:48 -06:00
32 + 4$0$0
return
if {}
}"#,
";",
105,
)
}
2018-10-02 09:07:12 -05:00
}