VFS no longer stores all source files in memory

This commit is contained in:
Lukas Wirth 2024-01-07 20:31:56 +01:00
parent af40101841
commit 1c40ac79c8
9 changed files with 130 additions and 143 deletions

View File

@ -331,9 +331,8 @@ fn load_crate_graph(
}
let changes = vfs.take_changes();
for file in changes {
if file.exists() {
let contents = vfs.file_contents(file.file_id);
if let Ok(text) = std::str::from_utf8(contents) {
if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change {
if let Ok(text) = std::str::from_utf8(&v) {
analysis_change.change_file(file.file_id, Some(text.into()))
}
}

View File

@ -3,7 +3,7 @@
//!
//! Each tick provides an immutable snapshot of the state as `WorldSnapshot`.
use std::time::Instant;
use std::{collections::hash_map::Entry, time::Instant};
use crossbeam_channel::{unbounded, Receiver, Sender};
use flycheck::FlycheckHandle;
@ -21,7 +21,7 @@ use proc_macro_api::ProcMacroServer;
use project_model::{CargoWorkspace, ProjectWorkspace, Target, WorkspaceBuildScripts};
use rustc_hash::{FxHashMap, FxHashSet};
use triomphe::Arc;
use vfs::{AnchoredPathBuf, Vfs};
use vfs::{AnchoredPathBuf, ChangedFile, Vfs};
use crate::{
config::{Config, ConfigError},
@ -217,8 +217,8 @@ impl GlobalState {
pub(crate) fn process_changes(&mut self) -> bool {
let _p = profile::span("GlobalState::process_changes");
let mut file_changes = FxHashMap::default();
let (change, changed_files, workspace_structure_change) = {
let mut file_changes = FxHashMap::<_, (bool, ChangedFile)>::default();
let (change, modified_files, workspace_structure_change) = {
let mut change = Change::new();
let mut guard = self.vfs.write();
let changed_files = guard.0.take_changes();
@ -233,64 +233,63 @@ impl GlobalState {
// id that is followed by a delete we actually skip observing the file text from the
// earlier event, to avoid problems later on.
for changed_file in changed_files {
use vfs::ChangeKind::*;
file_changes
.entry(changed_file.file_id)
.and_modify(|(change, just_created)| {
// None -> Delete => keep
// Create -> Delete => collapse
//
match (change, just_created, changed_file.change_kind) {
use vfs::Change::*;
match file_changes.entry(changed_file.file_id) {
Entry::Occupied(mut o) => {
let (just_created, change) = o.get_mut();
match (&mut change.change, just_created, changed_file.change) {
// latter `Delete` wins
(change, _, Delete) => *change = Delete,
// merge `Create` with `Create` or `Modify`
(Create, _, Create | Modify) => {}
(Create(prev), _, Create(new) | Modify(new)) => *prev = new,
// collapse identical `Modify`es
(Modify, _, Modify) => {}
(Modify(prev), _, Modify(new)) => *prev = new,
// equivalent to `Modify`
(change @ Delete, just_created, Create) => {
*change = Modify;
(change @ Delete, just_created, Create(new)) => {
*change = Modify(new);
*just_created = true;
}
// shouldn't occur, but collapse into `Create`
(change @ Delete, just_created, Modify) => {
*change = Create;
(change @ Delete, just_created, Modify(new)) => {
*change = Create(new);
*just_created = true;
}
// shouldn't occur, but collapse into `Modify`
(Modify, _, Create) => {}
(Modify(prev), _, Create(new)) => *prev = new,
}
})
.or_insert((
changed_file.change_kind,
matches!(changed_file.change_kind, Create),
));
}
Entry::Vacant(v) => {
_ = v.insert((matches!(&changed_file.change, Create(_)), changed_file))
}
}
}
let changed_files: Vec<_> = file_changes
.into_iter()
.filter(|(_, (change_kind, just_created))| {
!matches!((change_kind, just_created), (vfs::ChangeKind::Delete, true))
.filter(|(_, (just_created, change))| {
!(*just_created && matches!(change.change, vfs::Change::Delete))
})
.map(|(file_id, (change_kind, _))| vfs::ChangedFile { file_id, change_kind })
.map(|(file_id, (_, change))| vfs::ChangedFile { file_id, ..change })
.collect();
let mut workspace_structure_change = None;
// A file was added or deleted
let mut has_structure_changes = false;
let mut bytes = vec![];
for file in &changed_files {
let mut modified_files = vec![];
for file in changed_files {
let vfs_path = &vfs.file_path(file.file_id);
if let Some(path) = vfs_path.as_path() {
let path = path.to_path_buf();
if reload::should_refresh_for_change(&path, file.change_kind) {
if reload::should_refresh_for_change(&path, file.kind()) {
workspace_structure_change = Some((path.clone(), false));
}
if file.is_created_or_deleted() {
has_structure_changes = true;
workspace_structure_change =
Some((path, self.crate_graph_file_dependencies.contains(vfs_path)));
} else {
modified_files.push(file.file_id);
}
}
@ -299,10 +298,8 @@ impl GlobalState {
self.diagnostics.clear_native_for(file.file_id);
}
let text = if file.exists() {
let bytes = vfs.file_contents(file.file_id).to_vec();
String::from_utf8(bytes).ok().and_then(|text| {
let text = if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change {
String::from_utf8(v).ok().and_then(|text| {
// FIXME: Consider doing normalization in the `vfs` instead? That allows
// getting rid of some locking
let (text, line_endings) = LineEndings::normalize(text);
@ -327,11 +324,10 @@ impl GlobalState {
let roots = self.source_root_config.partition(vfs);
change.set_roots(roots);
}
(change, changed_files, workspace_structure_change)
(change, modified_files, workspace_structure_change)
};
self.analysis_host.apply_change(change);
{
let raw_database = self.analysis_host.raw_database();
// FIXME: ideally we should only trigger a workspace fetch for non-library changes
@ -343,13 +339,12 @@ impl GlobalState {
force_crate_graph_reload,
);
}
self.proc_macro_changed =
changed_files.iter().filter(|file| !file.is_created_or_deleted()).any(|file| {
let crates = raw_database.relevant_crates(file.file_id);
let crate_graph = raw_database.crate_graph();
self.proc_macro_changed = modified_files.into_iter().any(|file_id| {
let crates = raw_database.relevant_crates(file_id);
let crate_graph = raw_database.crate_graph();
crates.iter().any(|&krate| crate_graph[krate].is_proc_macro)
});
crates.iter().any(|&krate| crate_graph[krate].is_proc_macro)
});
}
true
@ -494,10 +489,6 @@ impl GlobalStateSnapshot {
})
}
pub(crate) fn vfs_memory_usage(&self) -> usize {
self.vfs_read().memory_usage()
}
pub(crate) fn file_exists(&self, file_id: FileId) -> bool {
self.vfs.read().0.exists(file_id)
}

View File

@ -59,7 +59,13 @@ pub(crate) fn handle_did_open_text_document(
if let Ok(path) = from_proto::vfs_path(&params.text_document.uri) {
let already_exists = state
.mem_docs
.insert(path.clone(), DocumentData::new(params.text_document.version))
.insert(
path.clone(),
DocumentData::new(
params.text_document.version,
params.text_document.text.clone().into_bytes(),
),
)
.is_err();
if already_exists {
tracing::error!("duplicate DidOpenTextDocument: {}", path);
@ -76,11 +82,12 @@ pub(crate) fn handle_did_change_text_document(
let _p = profile::span("handle_did_change_text_document");
if let Ok(path) = from_proto::vfs_path(&params.text_document.uri) {
match state.mem_docs.get_mut(&path) {
let data = match state.mem_docs.get_mut(&path) {
Some(doc) => {
// The version passed in DidChangeTextDocument is the version after all edits are applied
// so we should apply it before the vfs is notified.
doc.version = params.text_document.version;
&mut doc.data
}
None => {
tracing::error!("unexpected DidChangeTextDocument: {}", path);
@ -88,16 +95,14 @@ pub(crate) fn handle_did_change_text_document(
}
};
let text = apply_document_changes(
let new_contents = apply_document_changes(
state.config.position_encoding(),
|| {
let vfs = &state.vfs.read().0;
let file_id = vfs.file_id(&path).unwrap();
std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into()
},
std::str::from_utf8(data).unwrap(),
params.content_changes,
);
state.vfs.write().0.set_file_contents(path, Some(text.into_bytes()));
)
.into_bytes();
*data = new_contents.clone();
state.vfs.write().0.set_file_contents(path, Some(new_contents));
}
Ok(())
}

View File

@ -103,7 +103,6 @@ pub(crate) fn handle_analyzer_status(
.collect::<Vec<&AbsPath>>()
);
}
format_to!(buf, "\nVfs memory usage: {}\n", profile::Bytes::new(snap.vfs_memory_usage() as _));
buf.push_str("\nAnalysis:\n");
buf.push_str(
&snap

View File

@ -168,7 +168,7 @@ impl GlobalState {
pub(crate) fn apply_document_changes(
encoding: PositionEncoding,
file_contents: impl FnOnce() -> String,
file_contents: &str,
mut content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>,
) -> String {
// If at least one of the changes is a full document change, use the last
@ -179,7 +179,7 @@ pub(crate) fn apply_document_changes(
let text = mem::take(&mut content_changes[idx].text);
(text, &content_changes[idx + 1..])
}
None => (file_contents(), &content_changes[..]),
None => (file_contents.to_owned(), &content_changes[..]),
};
if content_changes.is_empty() {
return text;
@ -276,11 +276,11 @@ mod tests {
}
let encoding = PositionEncoding::Wide(WideEncoding::Utf16);
let text = apply_document_changes(encoding, || String::new(), vec![]);
let text = apply_document_changes(encoding, "", vec![]);
assert_eq!(text, "");
let text = apply_document_changes(
encoding,
|| text,
&text,
vec![TextDocumentContentChangeEvent {
range: None,
range_length: None,
@ -288,49 +288,46 @@ mod tests {
}],
);
assert_eq!(text, "the");
let text = apply_document_changes(encoding, || text, c![0, 3; 0, 3 => " quick"]);
let text = apply_document_changes(encoding, &text, c![0, 3; 0, 3 => " quick"]);
assert_eq!(text, "the quick");
let text =
apply_document_changes(encoding, || text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
apply_document_changes(encoding, &text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
assert_eq!(text, "quick foxes");
let text = apply_document_changes(encoding, || text, c![0, 11; 0, 11 => "\ndream"]);
let text = apply_document_changes(encoding, &text, c![0, 11; 0, 11 => "\ndream"]);
assert_eq!(text, "quick foxes\ndream");
let text = apply_document_changes(encoding, || text, c![1, 0; 1, 0 => "have "]);
let text = apply_document_changes(encoding, &text, c![1, 0; 1, 0 => "have "]);
assert_eq!(text, "quick foxes\nhave dream");
let text = apply_document_changes(
encoding,
|| text,
&text,
c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"],
);
assert_eq!(text, "the quick foxes\nhave quiet dreams\n");
let text = apply_document_changes(
encoding,
|| text,
c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"],
);
let text =
apply_document_changes(encoding, &text, c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"]);
assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n");
let text = apply_document_changes(
encoding,
|| text,
&text,
c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"],
);
assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n");
let text =
apply_document_changes(encoding, || text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
apply_document_changes(encoding, &text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
assert_eq!(text, "the quick \nthey have quiet dreams\n");
let text = String::from("❤️");
let text = apply_document_changes(encoding, || text, c![0, 0; 0, 0 => "a"]);
let text = apply_document_changes(encoding, &text, c![0, 0; 0, 0 => "a"]);
assert_eq!(text, "a❤");
let text = String::from("a\nb");
let text =
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
apply_document_changes(encoding, &text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
assert_eq!(text, "adcb");
let text = String::from("a\nb");
let text =
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
apply_document_changes(encoding, &text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
assert_eq!(text, "ațc\ncb");
}

View File

@ -576,6 +576,8 @@ impl GlobalState {
let vfs = &mut self.vfs.write().0;
for (path, contents) in files {
let path = VfsPath::from(path);
// if the file is in mem docs, it's managed by the client via notifications
// so only set it if its not in there
if !self.mem_docs.contains(&path) {
vfs.set_file_contents(path, contents);
}

View File

@ -62,10 +62,11 @@ impl MemDocs {
#[derive(Debug, Clone)]
pub(crate) struct DocumentData {
pub(crate) version: i32,
pub(crate) data: Vec<u8>,
}
impl DocumentData {
pub(crate) fn new(version: i32) -> Self {
DocumentData { version }
pub(crate) fn new(version: i32, data: Vec<u8>) -> Self {
DocumentData { version, data }
}
}

View File

@ -503,10 +503,9 @@ impl GlobalState {
match vfs.file_id(&vfs_path) {
Some(file_id) => Some(file_id),
None => {
if !self.mem_docs.contains(&vfs_path) {
let contents = loader.handle.load_sync(path);
vfs.set_file_contents(vfs_path.clone(), contents);
}
// FIXME: Consider not loading this here?
let contents = loader.handle.load_sync(path);
vfs.set_file_contents(vfs_path.clone(), contents);
vfs.file_id(&vfs_path)
}
}

View File

@ -1,8 +1,8 @@
//! # Virtual File System
//!
//! VFS stores all files read by rust-analyzer. Reading file contents from VFS
//! always returns the same contents, unless VFS was explicitly modified with
//! [`set_file_contents`]. All changes to VFS are logged, and can be retrieved via
//! VFS records all file changes pushed to it via [`set_file_contents`].
//! As such it only ever stores changes, not the actual content of a file at any given moment.
//! All file changes are logged, and can be retrieved via
//! [`take_changes`] method. The pack of changes is then pushed to `salsa` and
//! triggers incremental recomputation.
//!
@ -84,40 +84,65 @@ impl FileId {
/// safe because `FileId` is a newtype of `u32`
impl nohash_hasher::IsEnabled for FileId {}
/// Storage for all files read by rust-analyzer.
/// Storage for all file changes and the file id to path mapping.
///
/// For more information see the [crate-level](crate) documentation.
#[derive(Default)]
pub struct Vfs {
interner: PathInterner,
data: Vec<Option<Vec<u8>>>,
data: Vec<FileState>,
changes: Vec<ChangedFile>,
}
#[derive(Copy, PartialEq, PartialOrd, Clone)]
pub enum FileState {
Exists,
Deleted,
}
/// Changed file in the [`Vfs`].
#[derive(Debug)]
pub struct ChangedFile {
/// Id of the changed file
pub file_id: FileId,
/// Kind of change
pub change_kind: ChangeKind,
pub change: Change,
}
impl ChangedFile {
/// Returns `true` if the change is not [`Delete`](ChangeKind::Delete).
pub fn exists(&self) -> bool {
self.change_kind != ChangeKind::Delete
!matches!(self.change, Change::Delete)
}
/// Returns `true` if the change is [`Create`](ChangeKind::Create) or
/// [`Delete`](ChangeKind::Delete).
/// [`Delete`](Change::Delete).
pub fn is_created_or_deleted(&self) -> bool {
matches!(self.change_kind, ChangeKind::Create | ChangeKind::Delete)
matches!(self.change, Change::Create(_) | Change::Delete)
}
pub fn kind(&self) -> ChangeKind {
match self.change {
Change::Create(_) => ChangeKind::Create,
Change::Modify(_) => ChangeKind::Modify,
Change::Delete => ChangeKind::Delete,
}
}
}
/// Kind of [file change](ChangedFile).
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
#[derive(Eq, PartialEq, Debug)]
pub enum Change {
/// The file was (re-)created
Create(Vec<u8>),
/// The file was modified
Modify(Vec<u8>),
/// The file was deleted
Delete,
}
/// Kind of [file change](ChangedFile).
#[derive(Eq, PartialEq, Debug)]
pub enum ChangeKind {
/// The file was (re-)created
Create,
@ -130,7 +155,7 @@ pub enum ChangeKind {
impl Vfs {
/// Id of the given path if it exists in the `Vfs` and is not deleted.
pub fn file_id(&self, path: &VfsPath) -> Option<FileId> {
self.interner.get(path).filter(|&it| self.get(it).is_some())
self.interner.get(path).filter(|&it| matches!(self.get(it), FileState::Exists))
}
/// File path corresponding to the given `file_id`.
@ -142,28 +167,13 @@ impl Vfs {
self.interner.lookup(file_id).clone()
}
/// File content corresponding to the given `file_id`.
///
/// # Panics
///
/// Panics if the id is not present in the `Vfs`, or if the corresponding file is
/// deleted.
pub fn file_contents(&self, file_id: FileId) -> &[u8] {
self.get(file_id).as_deref().unwrap()
}
/// Returns the overall memory usage for the stored files.
pub fn memory_usage(&self) -> usize {
self.data.iter().flatten().map(|d| d.capacity()).sum()
}
/// Returns an iterator over the stored ids and their corresponding paths.
///
/// This will skip deleted files.
pub fn iter(&self) -> impl Iterator<Item = (FileId, &VfsPath)> + '_ {
(0..self.data.len())
.map(|it| FileId(it as u32))
.filter(move |&file_id| self.get(file_id).is_some())
.filter(move |&file_id| matches!(self.get(file_id), FileState::Exists))
.map(move |file_id| {
let path = self.interner.lookup(file_id);
(file_id, path)
@ -176,28 +186,21 @@ impl Vfs {
///
/// If the path does not currently exists in the `Vfs`, allocates a new
/// [`FileId`] for it.
pub fn set_file_contents(&mut self, path: VfsPath, mut contents: Option<Vec<u8>>) -> bool {
pub fn set_file_contents(&mut self, path: VfsPath, contents: Option<Vec<u8>>) -> bool {
let file_id = self.alloc_file_id(path);
let change_kind = match (self.get(file_id), &contents) {
(None, None) => return false,
(Some(old), Some(new)) if old == new => return false,
(None, Some(_)) => ChangeKind::Create,
(Some(_), None) => ChangeKind::Delete,
(Some(_), Some(_)) => ChangeKind::Modify,
let change_kind = match (self.get(file_id), contents) {
(FileState::Deleted, None) => return false,
(FileState::Deleted, Some(v)) => Change::Create(v),
(FileState::Exists, None) => Change::Delete,
(FileState::Exists, Some(v)) => Change::Modify(v),
};
if let Some(contents) = &mut contents {
contents.shrink_to_fit();
}
*self.get_mut(file_id) = contents;
self.changes.push(ChangedFile { file_id, change_kind });
let changed_file = ChangedFile { file_id, change: change_kind };
self.data[file_id.0 as usize] =
if changed_file.exists() { FileState::Exists } else { FileState::Deleted };
self.changes.push(changed_file);
true
}
/// Returns `true` if the `Vfs` contains [changes](ChangedFile).
pub fn has_changes(&self) -> bool {
!self.changes.is_empty()
}
/// Drain and returns all the changes in the `Vfs`.
pub fn take_changes(&mut self) -> Vec<ChangedFile> {
mem::take(&mut self.changes)
@ -205,7 +208,7 @@ impl Vfs {
/// Provides a panic-less way to verify file_id validity.
pub fn exists(&self, file_id: FileId) -> bool {
self.get(file_id).is_some()
matches!(self.get(file_id), FileState::Exists)
}
/// Returns the id associated with `path`
@ -219,26 +222,17 @@ impl Vfs {
let file_id = self.interner.intern(path);
let idx = file_id.0 as usize;
let len = self.data.len().max(idx + 1);
self.data.resize_with(len, || None);
self.data.resize(len, FileState::Deleted);
file_id
}
/// Returns the content associated with the given `file_id`.
/// Returns the status of the file associated with the given `file_id`.
///
/// # Panics
///
/// Panics if no file is associated to that id.
fn get(&self, file_id: FileId) -> &Option<Vec<u8>> {
&self.data[file_id.0 as usize]
}
/// Mutably returns the content associated with the given `file_id`.
///
/// # Panics
///
/// Panics if no file is associated to that id.
fn get_mut(&mut self, file_id: FileId) -> &mut Option<Vec<u8>> {
&mut self.data[file_id.0 as usize]
fn get(&self, file_id: FileId) -> FileState {
self.data[file_id.0 as usize]
}
}