455 lines
11 KiB
Rust
455 lines
11 KiB
Rust
//! Implementation of incremental re-parsing.
|
|
//!
|
|
//! We use two simple strategies for this:
|
|
//! - if the edit modifies only a single token (like changing an identifier's
|
|
//! letter), we replace only this token.
|
|
//! - otherwise, we search for the nearest `{}` block which contains the edit
|
|
//! and try to parse only this block.
|
|
|
|
use ra_parser::Reparser;
|
|
use ra_text_edit::Indel;
|
|
|
|
use crate::{
|
|
algo,
|
|
parsing::{
|
|
lexer::{lex_single_syntax_kind, tokenize, Token},
|
|
text_token_source::TextTokenSource,
|
|
text_tree_sink::TextTreeSink,
|
|
},
|
|
syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode},
|
|
SyntaxError,
|
|
SyntaxKind::*,
|
|
TextRange, TextSize, T,
|
|
};
|
|
|
|
pub(crate) fn incremental_reparse(
|
|
node: &SyntaxNode,
|
|
edit: &Indel,
|
|
errors: Vec<SyntaxError>,
|
|
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
|
|
if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
|
|
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
|
|
}
|
|
|
|
if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
|
|
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
|
|
}
|
|
None
|
|
}
|
|
|
|
fn reparse_token<'node>(
|
|
root: &'node SyntaxNode,
|
|
edit: &Indel,
|
|
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
|
|
let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
|
|
let prev_token_kind = prev_token.kind();
|
|
match prev_token_kind {
|
|
WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => {
|
|
if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT {
|
|
// removing a new line may extends previous token
|
|
let deleted_range = edit.delete - prev_token.text_range().start();
|
|
if prev_token.text()[deleted_range].contains('\n') {
|
|
return None;
|
|
}
|
|
}
|
|
|
|
let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
|
|
let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
|
|
|
|
if new_token_kind != prev_token_kind
|
|
|| (new_token_kind == IDENT && is_contextual_kw(&new_text))
|
|
{
|
|
return None;
|
|
}
|
|
|
|
// Check that edited token is not a part of the bigger token.
|
|
// E.g. if for source code `bruh"str"` the user removed `ruh`, then
|
|
// `b` no longer remains an identifier, but becomes a part of byte string literal
|
|
if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
|
|
new_text.push(next_char);
|
|
let token_with_next_char = lex_single_syntax_kind(&new_text);
|
|
if let Some((_kind, _error)) = token_with_next_char {
|
|
return None;
|
|
}
|
|
new_text.pop();
|
|
}
|
|
|
|
let new_token =
|
|
GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
|
|
Some((
|
|
prev_token.replace_with(new_token),
|
|
new_err.into_iter().collect(),
|
|
prev_token.text_range(),
|
|
))
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn reparse_block<'node>(
|
|
root: &'node SyntaxNode,
|
|
edit: &Indel,
|
|
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
|
|
let (node, reparser) = find_reparsable_node(root, edit.delete)?;
|
|
let text = get_text_after_edit(node.clone().into(), edit);
|
|
|
|
let (tokens, new_lexer_errors) = tokenize(&text);
|
|
if !is_balanced(&tokens) {
|
|
return None;
|
|
}
|
|
|
|
let mut token_source = TextTokenSource::new(&text, &tokens);
|
|
let mut tree_sink = TextTreeSink::new(&text, &tokens);
|
|
reparser.parse(&mut token_source, &mut tree_sink);
|
|
|
|
let (green, mut new_parser_errors) = tree_sink.finish();
|
|
new_parser_errors.extend(new_lexer_errors);
|
|
|
|
Some((node.replace_with(green), new_parser_errors, node.text_range()))
|
|
}
|
|
|
|
fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String {
|
|
let edit =
|
|
Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone());
|
|
|
|
let text = match element {
|
|
NodeOrToken::Token(token) => token.text().to_string(),
|
|
NodeOrToken::Node(node) => node.text().to_string(),
|
|
};
|
|
edit.apply(text)
|
|
}
|
|
|
|
fn is_contextual_kw(text: &str) -> bool {
|
|
match text {
|
|
"auto" | "default" | "union" => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> {
|
|
let node = algo::find_covering_element(node, range);
|
|
|
|
let mut ancestors = match node {
|
|
NodeOrToken::Token(it) => it.parent().ancestors(),
|
|
NodeOrToken::Node(it) => it.ancestors(),
|
|
};
|
|
ancestors.find_map(|node| {
|
|
let first_child = node.first_child_or_token().map(|it| it.kind());
|
|
let parent = node.parent().map(|it| it.kind());
|
|
Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r))
|
|
})
|
|
}
|
|
|
|
fn is_balanced(tokens: &[Token]) -> bool {
|
|
if tokens.is_empty()
|
|
|| tokens.first().unwrap().kind != T!['{']
|
|
|| tokens.last().unwrap().kind != T!['}']
|
|
{
|
|
return false;
|
|
}
|
|
let mut balance = 0usize;
|
|
for t in &tokens[1..tokens.len() - 1] {
|
|
match t.kind {
|
|
T!['{'] => balance += 1,
|
|
T!['}'] => {
|
|
balance = match balance.checked_sub(1) {
|
|
Some(b) => b,
|
|
None => return false,
|
|
}
|
|
}
|
|
_ => (),
|
|
}
|
|
}
|
|
balance == 0
|
|
}
|
|
|
|
fn merge_errors(
|
|
old_errors: Vec<SyntaxError>,
|
|
new_errors: Vec<SyntaxError>,
|
|
range_before_reparse: TextRange,
|
|
edit: &Indel,
|
|
) -> Vec<SyntaxError> {
|
|
let mut res = Vec::new();
|
|
|
|
for old_err in old_errors {
|
|
let old_err_range = old_err.range();
|
|
if old_err_range.end() <= range_before_reparse.start() {
|
|
res.push(old_err);
|
|
} else if old_err_range.start() >= range_before_reparse.end() {
|
|
let inserted_len = TextSize::of(&edit.insert);
|
|
res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
|
|
// Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
|
|
}
|
|
}
|
|
res.extend(new_errors.into_iter().map(|new_err| {
|
|
// fighting borrow checker with a variable ;)
|
|
let offseted_range = new_err.range() + range_before_reparse.start();
|
|
new_err.with_range(offseted_range)
|
|
}));
|
|
res
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use test_utils::{assert_eq_text, extract_range};
|
|
|
|
use super::*;
|
|
use crate::{AstNode, Parse, SourceFile};
|
|
|
|
fn do_check(before: &str, replace_with: &str, reparsed_len: u32) {
|
|
let (range, before) = extract_range(before);
|
|
let edit = Indel::replace(range, replace_with.to_owned());
|
|
let after = edit.apply(before.clone());
|
|
|
|
let fully_reparsed = SourceFile::parse(&after);
|
|
let incrementally_reparsed: Parse<SourceFile> = {
|
|
let before = SourceFile::parse(&before);
|
|
let (green, new_errors, range) =
|
|
incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
|
|
assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
|
|
Parse::new(green, new_errors)
|
|
};
|
|
|
|
assert_eq_text!(
|
|
&format!("{:#?}", fully_reparsed.tree().syntax()),
|
|
&format!("{:#?}", incrementally_reparsed.tree().syntax()),
|
|
);
|
|
assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
|
|
}
|
|
|
|
#[test] // FIXME: some test here actually test token reparsing
|
|
fn reparse_block_tests() {
|
|
do_check(
|
|
r"
|
|
fn foo() {
|
|
let x = foo + <|>bar<|>
|
|
}
|
|
",
|
|
"baz",
|
|
3,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn foo() {
|
|
let x = foo<|> + bar<|>
|
|
}
|
|
",
|
|
"baz",
|
|
25,
|
|
);
|
|
do_check(
|
|
r"
|
|
struct Foo {
|
|
f: foo<|><|>
|
|
}
|
|
",
|
|
",\n g: (),",
|
|
14,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn foo {
|
|
let;
|
|
1 + 1;
|
|
<|>92<|>;
|
|
}
|
|
",
|
|
"62",
|
|
31, // FIXME: reparse only int literal here
|
|
);
|
|
do_check(
|
|
r"
|
|
mod foo {
|
|
fn <|><|>
|
|
}
|
|
",
|
|
"bar",
|
|
11,
|
|
);
|
|
|
|
do_check(
|
|
r"
|
|
trait Foo {
|
|
type <|>Foo<|>;
|
|
}
|
|
",
|
|
"Output",
|
|
3,
|
|
);
|
|
do_check(
|
|
r"
|
|
impl IntoIterator<Item=i32> for Foo {
|
|
f<|><|>
|
|
}
|
|
",
|
|
"n next(",
|
|
9,
|
|
);
|
|
do_check(r"use a::b::{foo,<|>,bar<|>};", "baz", 10);
|
|
do_check(
|
|
r"
|
|
pub enum A {
|
|
Foo<|><|>
|
|
}
|
|
",
|
|
"\nBar;\n",
|
|
11,
|
|
);
|
|
do_check(
|
|
r"
|
|
foo!{a, b<|><|> d}
|
|
",
|
|
", c[3]",
|
|
8,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn foo() {
|
|
vec![<|><|>]
|
|
}
|
|
",
|
|
"123",
|
|
14,
|
|
);
|
|
do_check(
|
|
r"
|
|
extern {
|
|
fn<|>;<|>
|
|
}
|
|
",
|
|
" exit(code: c_int)",
|
|
11,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn reparse_token_tests() {
|
|
do_check(
|
|
r"<|><|>
|
|
fn foo() -> i32 { 1 }
|
|
",
|
|
"\n\n\n \n",
|
|
1,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn foo() -> <|><|> {}
|
|
",
|
|
" \n",
|
|
2,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn <|>foo<|>() -> i32 { 1 }
|
|
",
|
|
"bar",
|
|
3,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn foo<|><|>foo() { }
|
|
",
|
|
"bar",
|
|
6,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn foo /* <|><|> */ () {}
|
|
",
|
|
"some comment",
|
|
6,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn baz <|><|> () {}
|
|
",
|
|
" \t\t\n\n",
|
|
2,
|
|
);
|
|
do_check(
|
|
r"
|
|
fn baz <|><|> () {}
|
|
",
|
|
" \t\t\n\n",
|
|
2,
|
|
);
|
|
do_check(
|
|
r"
|
|
/// foo <|><|>omment
|
|
mod { }
|
|
",
|
|
"c",
|
|
14,
|
|
);
|
|
do_check(
|
|
r#"
|
|
fn -> &str { "Hello<|><|>" }
|
|
"#,
|
|
", world",
|
|
7,
|
|
);
|
|
do_check(
|
|
r#"
|
|
fn -> &str { // "Hello<|><|>"
|
|
"#,
|
|
", world",
|
|
10,
|
|
);
|
|
do_check(
|
|
r##"
|
|
fn -> &str { r#"Hello<|><|>"#
|
|
"##,
|
|
", world",
|
|
10,
|
|
);
|
|
do_check(
|
|
r"
|
|
#[derive(<|>Copy<|>)]
|
|
enum Foo {
|
|
|
|
}
|
|
",
|
|
"Clone",
|
|
4,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn reparse_str_token_with_error_unchanged() {
|
|
do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24);
|
|
}
|
|
|
|
#[test]
|
|
fn reparse_str_token_with_error_fixed() {
|
|
do_check(r#""unterinated<|><|>"#, "\"", 12);
|
|
}
|
|
|
|
#[test]
|
|
fn reparse_block_with_error_in_middle_unchanged() {
|
|
do_check(
|
|
r#"fn main() {
|
|
if {}
|
|
32 + 4<|><|>
|
|
return
|
|
if {}
|
|
}"#,
|
|
"23",
|
|
105,
|
|
)
|
|
}
|
|
|
|
#[test]
|
|
fn reparse_block_with_error_in_middle_fixed() {
|
|
do_check(
|
|
r#"fn main() {
|
|
if {}
|
|
32 + 4<|><|>
|
|
return
|
|
if {}
|
|
}"#,
|
|
";",
|
|
105,
|
|
)
|
|
}
|
|
}
|