From 29256f22e4fb8db8558885e5eeeac5595c428031 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Wed, 3 May 2023 19:15:14 -0700 Subject: [PATCH 01/40] Make non-hash an external lib --- Cargo.lock | 5 +++ crates/stdx/Cargo.toml | 1 + crates/stdx/src/lib.rs | 2 +- lib/non-hash/Cargo.toml | 7 ++++ .../src/hash.rs => lib/non-hash/src/lib.rs | 34 ++++++++++++++++--- 5 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 lib/non-hash/Cargo.toml rename crates/stdx/src/hash.rs => lib/non-hash/src/lib.rs (61%) diff --git a/Cargo.lock b/Cargo.lock index f0fe95327f3..d0f07427165 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1054,6 +1054,10 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "non-hash" +version = "0.1.0" + [[package]] name = "notify" version = "5.1.0" @@ -1693,6 +1697,7 @@ dependencies = [ "backtrace", "libc", "miow", + "non-hash", "winapi", ] diff --git a/crates/stdx/Cargo.toml b/crates/stdx/Cargo.toml index c881f2fd3f4..7be9ddaffff 100644 --- a/crates/stdx/Cargo.toml +++ b/crates/stdx/Cargo.toml @@ -15,6 +15,7 @@ doctest = false libc = "0.2.135" backtrace = { version = "0.3.65", optional = true } always-assert = { version = "0.1.2", features = ["log"] } +non-hash = { version = "0.1.0", path = "../../lib/non-hash" } # Think twice before adding anything here [target.'cfg(windows)'.dependencies] diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs index 5639aaf57cd..c8f1d8bca11 100644 --- a/crates/stdx/src/lib.rs +++ b/crates/stdx/src/lib.rs @@ -7,13 +7,13 @@ use std::process::Command; use std::{cmp::Ordering, ops, time::Instant}; mod macros; -pub mod hash; pub mod process; pub mod panic_context; pub mod non_empty_vec; pub mod rand; pub use always_assert::{always, never}; +pub use non_hash as hash; #[inline(always)] pub fn is_ci() -> bool { diff --git a/lib/non-hash/Cargo.toml b/lib/non-hash/Cargo.toml new file mode 100644 index 00000000000..27b35a76295 --- /dev/null +++ b/lib/non-hash/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "non-hash" +version = "0.1.0" +description = "A non-hashing `Hasher` implementation." +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/non-hash" +edition = "2021" diff --git a/crates/stdx/src/hash.rs b/lib/non-hash/src/lib.rs similarity index 61% rename from crates/stdx/src/hash.rs rename to lib/non-hash/src/lib.rs index 0c21d2674b1..af03f3d7920 100644 --- a/crates/stdx/src/hash.rs +++ b/lib/non-hash/src/lib.rs @@ -1,25 +1,49 @@ -//! A none hashing [`Hasher`] implementation. +//! A non-hashing [`Hasher`] implementation. + +#![deny(clippy::pedantic, missing_debug_implementations, missing_docs, rust_2018_idioms)] + use std::{ hash::{BuildHasher, Hasher}, marker::PhantomData, }; +/// A [`std::collections::HashMap`] with [`NoHashHasherBuilder`]. pub type NoHashHashMap = std::collections::HashMap>; + +/// A [`std::collections::HashSet`] with [`NoHashHasherBuilder`]. pub type NoHashHashSet = std::collections::HashSet>; +/// A hasher builder for [`NoHashHasher`]. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct NoHashHasherBuilder(PhantomData); impl Default for NoHashHasherBuilder { fn default() -> Self { - Self(Default::default()) + Self(PhantomData) } } +/// Types for which an acceptable hash function is to return itself. +/// +/// This trait is implemented by sufficiently-small integer types. It should only be implemented for +/// foreign types that are newtypes of these types. If it is implemented on more complex types, +/// hashing will panic. pub trait NoHashHashable {} -impl NoHashHashable for usize {} -impl NoHashHashable for u32 {} +impl NoHashHashable for u8 {} +impl NoHashHashable for u16 {} +impl NoHashHashable for u32 {} +impl NoHashHashable for u64 {} +impl NoHashHashable for usize {} + +impl NoHashHashable for i8 {} +impl NoHashHashable for i16 {} +impl NoHashHashable for i32 {} +impl NoHashHashable for i64 {} +impl NoHashHashable for isize {} + +/// A hasher for [`NoHashHashable`] types. +#[derive(Debug)] pub struct NoHashHasher(u64); impl BuildHasher for NoHashHasherBuilder { @@ -35,7 +59,7 @@ impl Hasher for NoHashHasher { } fn write(&mut self, _: &[u8]) { - unimplemented!("NoHashHasher should only be used for hashing primitive integers") + unimplemented!("NoHashHasher should only be used for hashing sufficiently-small integer types and their newtypes") } fn write_u8(&mut self, i: u8) { From 7e1992a0d9004d9bdbb2a73942789831e9554dba Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Wed, 3 May 2023 19:18:41 -0700 Subject: [PATCH 02/40] Make line-index an external lib --- Cargo.lock | 9 + crates/ide-db/Cargo.toml | 3 + crates/ide-db/src/lib.rs | 4 +- crates/ide-db/src/tests/line_index.rs | 54 ++++++ lib/line-index/Cargo.toml | 11 ++ .../line-index/src/lib.rs | 165 +++--------------- lib/line-index/src/tests.rs | 73 ++++++++ 7 files changed, 177 insertions(+), 142 deletions(-) create mode 100644 crates/ide-db/src/tests/line_index.rs create mode 100644 lib/line-index/Cargo.toml rename crates/ide-db/src/line_index.rs => lib/line-index/src/lib.rs (57%) create mode 100644 lib/line-index/src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index d0f07427165..8fc4680e21e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -717,6 +717,7 @@ dependencies = [ "indexmap", "itertools", "limit", + "line-index", "memchr", "once_cell", "oorandom", @@ -912,6 +913,14 @@ dependencies = [ name = "limit" version = "0.0.0" +[[package]] +name = "line-index" +version = "0.1.0" +dependencies = [ + "non-hash", + "text-size", +] + [[package]] name = "lock_api" version = "0.4.9" diff --git a/crates/ide-db/Cargo.toml b/crates/ide-db/Cargo.toml index fccd6d2b6db..022eb7859ce 100644 --- a/crates/ide-db/Cargo.toml +++ b/crates/ide-db/Cargo.toml @@ -37,6 +37,9 @@ text-edit.workspace = true # something from some `hir-xxx` subpackage, reexport the API via `hir`. hir.workspace = true +# used to be a module, turned into its own library +line-index = { version = "0.1.0", path = "../../lib/line-index" } + [dev-dependencies] expect-test = "1.4.0" oorandom = "11.1.3" diff --git a/crates/ide-db/src/lib.rs b/crates/ide-db/src/lib.rs index 5263271fa6f..ff1a20f03f4 100644 --- a/crates/ide-db/src/lib.rs +++ b/crates/ide-db/src/lib.rs @@ -13,7 +13,6 @@ pub mod famous_defs; pub mod helpers; pub mod items_locator; pub mod label; -pub mod line_index; pub mod path_transform; pub mod rename; pub mod rust_doc; @@ -55,6 +54,8 @@ use triomphe::Arc; use crate::{line_index::LineIndex, symbol_index::SymbolsDatabase}; pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher}; +pub use ::line_index; + /// `base_db` is normally also needed in places where `ide_db` is used, so this re-export is for convenience. pub use base_db; @@ -414,4 +415,5 @@ impl SnippetCap { #[cfg(test)] mod tests { mod sourcegen_lints; + mod line_index; } diff --git a/crates/ide-db/src/tests/line_index.rs b/crates/ide-db/src/tests/line_index.rs new file mode 100644 index 00000000000..c12936071d3 --- /dev/null +++ b/crates/ide-db/src/tests/line_index.rs @@ -0,0 +1,54 @@ +use line_index::{LineCol, LineIndex, WideEncoding}; +use test_utils::skip_slow_tests; + +#[test] +fn test_every_chars() { + if skip_slow_tests() { + return; + } + + let text: String = { + let mut chars: Vec = ((0 as char)..char::MAX).collect(); // Neat! + chars.extend("\n".repeat(chars.len() / 16).chars()); + let mut rng = oorandom::Rand32::new(stdx::rand::seed()); + stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize); + chars.into_iter().collect() + }; + assert!(text.contains('💩')); // Sanity check. + + let line_index = LineIndex::new(&text); + + let mut lin_col = LineCol { line: 0, col: 0 }; + let mut col_utf16 = 0; + let mut col_utf32 = 0; + for (offset, c) in text.char_indices() { + let got_offset = line_index.offset(lin_col).unwrap(); + assert_eq!(usize::from(got_offset), offset); + + let got_lin_col = line_index.line_col(got_offset); + assert_eq!(got_lin_col, lin_col); + + for enc in [WideEncoding::Utf16, WideEncoding::Utf32] { + let wide_lin_col = line_index.to_wide(enc, lin_col); + let got_lin_col = line_index.to_utf8(enc, wide_lin_col); + assert_eq!(got_lin_col, lin_col); + + let want_col = match enc { + WideEncoding::Utf16 => col_utf16, + WideEncoding::Utf32 => col_utf32, + }; + assert_eq!(wide_lin_col.col, want_col) + } + + if c == '\n' { + lin_col.line += 1; + lin_col.col = 0; + col_utf16 = 0; + col_utf32 = 0; + } else { + lin_col.col += c.len_utf8() as u32; + col_utf16 += c.len_utf16() as u32; + col_utf32 += 1; + } + } +} diff --git a/lib/line-index/Cargo.toml b/lib/line-index/Cargo.toml new file mode 100644 index 00000000000..0abc539e892 --- /dev/null +++ b/lib/line-index/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "line-index" +version = "0.1.0" +description = "Maps flat `TextSize` offsets into `(line, column)` representation." +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/non-hash" +edition = "2021" + +[dependencies] +text-size = "1" +non-hash = { version = "0.1.0", path = "../non-hash" } diff --git a/crates/ide-db/src/line_index.rs b/lib/line-index/src/lib.rs similarity index 57% rename from crates/ide-db/src/line_index.rs rename to lib/line-index/src/lib.rs index 9fb58ebe8ab..af01eafc281 100644 --- a/crates/ide-db/src/line_index.rs +++ b/lib/line-index/src/lib.rs @@ -1,10 +1,16 @@ -//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)` -//! representation. +//! See [`LineIndex`]. + +#![deny(clippy::pedantic, missing_debug_implementations, missing_docs, rust_2018_idioms)] + +#[cfg(test)] +mod tests; + use std::{iter, mem}; -use stdx::hash::NoHashHashMap; -use syntax::{TextRange, TextSize}; +use non_hash::NoHashHashMap; +use text_size::{TextRange, TextSize}; +/// Maps flat [`TextSize`] offsets into `(line, column)` representation. #[derive(Clone, Debug, PartialEq, Eq)] pub struct LineIndex { /// Offset the beginning of each line, zero-based. @@ -16,26 +22,29 @@ pub struct LineIndex { /// Line/Column information in native, utf8 format. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct LineCol { - /// Zero-based + /// Zero-based. pub line: u32, - /// Zero-based utf8 offset + /// Zero-based UTF-8 offset. pub col: u32, } +/// A kind of wide character encoding. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub enum WideEncoding { + /// UTF-16. Utf16, + /// UTF-32. Utf32, } /// Line/Column information in legacy encodings. /// -/// Deliberately not a generic type and different from `LineCol`. +/// Deliberately not a generic type and different from [`LineCol`]. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct WideLineCol { - /// Zero-based + /// Zero-based. pub line: u32, - /// Zero-based + /// Zero-based. pub col: u32, } @@ -70,6 +79,7 @@ impl WideChar { } impl LineIndex { + /// Returns a `LineIndex` for the `text`. pub fn new(text: &str) -> LineIndex { let mut line_wide_chars = NoHashHashMap::default(); let mut wide_chars = Vec::new(); @@ -115,6 +125,7 @@ impl LineIndex { LineIndex { newlines, line_wide_chars } } + /// Transforms the `TextSize` into a `LineCol`. pub fn line_col(&self, offset: TextSize) -> LineCol { let line = self.newlines.partition_point(|&it| it <= offset) - 1; let line_start_offset = self.newlines[line]; @@ -122,22 +133,26 @@ impl LineIndex { LineCol { line: line as u32, col: col.into() } } + /// Transforms the `LineCol` into a `TextSize`. pub fn offset(&self, line_col: LineCol) -> Option { self.newlines .get(line_col.line as usize) .map(|offset| offset + TextSize::from(line_col.col)) } + /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol { let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into()); WideLineCol { line: line_col.line, col: col as u32 } } + /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`. pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol { let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col); LineCol { line: line_col.line, col: col.into() } } + /// Returns an iterator over the ranges for the lines. pub fn lines(&self, range: TextRange) -> impl Iterator + '_ { let lo = self.newlines.partition_point(|&it| it < range.start()); let hi = self.newlines.partition_point(|&it| it <= range.end()); @@ -183,135 +198,3 @@ impl LineIndex { col.into() } } - -#[cfg(test)] -mod tests { - use test_utils::skip_slow_tests; - - use super::WideEncoding::{Utf16, Utf32}; - use super::*; - - #[test] - fn test_line_index() { - let text = "hello\nworld"; - let table = [ - (00, 0, 0), - (01, 0, 1), - (05, 0, 5), - (06, 1, 0), - (07, 1, 1), - (08, 1, 2), - (10, 1, 4), - (11, 1, 5), - (12, 1, 6), - ]; - - let index = LineIndex::new(text); - for (offset, line, col) in table { - assert_eq!(index.line_col(offset.into()), LineCol { line, col }); - } - - let text = "\nhello\nworld"; - let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)]; - let index = LineIndex::new(text); - for (offset, line, col) in table { - assert_eq!(index.line_col(offset.into()), LineCol { line, col }); - } - } - - #[test] - fn test_char_len() { - assert_eq!('メ'.len_utf8(), 3); - assert_eq!('メ'.len_utf16(), 1); - } - - #[test] - fn test_empty_index() { - let col_index = LineIndex::new( - " -const C: char = 'x'; -", - ); - assert_eq!(col_index.line_wide_chars.len(), 0); - } - - #[test] - fn test_every_chars() { - if skip_slow_tests() { - return; - } - - let text: String = { - let mut chars: Vec = ((0 as char)..char::MAX).collect(); // Neat! - chars.extend("\n".repeat(chars.len() / 16).chars()); - let mut rng = oorandom::Rand32::new(stdx::rand::seed()); - stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize); - chars.into_iter().collect() - }; - assert!(text.contains('💩')); // Sanity check. - - let line_index = LineIndex::new(&text); - - let mut lin_col = LineCol { line: 0, col: 0 }; - let mut col_utf16 = 0; - let mut col_utf32 = 0; - for (offset, c) in text.char_indices() { - let got_offset = line_index.offset(lin_col).unwrap(); - assert_eq!(usize::from(got_offset), offset); - - let got_lin_col = line_index.line_col(got_offset); - assert_eq!(got_lin_col, lin_col); - - for enc in [Utf16, Utf32] { - let wide_lin_col = line_index.to_wide(enc, lin_col); - let got_lin_col = line_index.to_utf8(enc, wide_lin_col); - assert_eq!(got_lin_col, lin_col); - - let want_col = match enc { - Utf16 => col_utf16, - Utf32 => col_utf32, - }; - assert_eq!(wide_lin_col.col, want_col) - } - - if c == '\n' { - lin_col.line += 1; - lin_col.col = 0; - col_utf16 = 0; - col_utf32 = 0; - } else { - lin_col.col += c.len_utf8() as u32; - col_utf16 += c.len_utf16() as u32; - col_utf32 += 1; - } - } - } - - #[test] - fn test_splitlines() { - fn r(lo: u32, hi: u32) -> TextRange { - TextRange::new(lo.into(), hi.into()) - } - - let text = "a\nbb\nccc\n"; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 9)).collect::>(); - let expected = vec![r(0, 2), r(2, 5), r(5, 9)]; - assert_eq!(actual, expected); - - let text = ""; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 0)).collect::>(); - let expected = vec![]; - assert_eq!(actual, expected); - - let text = "\n"; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 1)).collect::>(); - let expected = vec![r(0, 1)]; - assert_eq!(actual, expected) - } -} diff --git a/lib/line-index/src/tests.rs b/lib/line-index/src/tests.rs new file mode 100644 index 00000000000..4b58cfc47dd --- /dev/null +++ b/lib/line-index/src/tests.rs @@ -0,0 +1,73 @@ +use super::*; + +#[test] +fn test_line_index() { + let text = "hello\nworld"; + let table = [ + (00, 0, 0), + (01, 0, 1), + (05, 0, 5), + (06, 1, 0), + (07, 1, 1), + (08, 1, 2), + (10, 1, 4), + (11, 1, 5), + (12, 1, 6), + ]; + + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } + + let text = "\nhello\nworld"; + let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)]; + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } +} + +#[test] +fn test_char_len() { + assert_eq!('メ'.len_utf8(), 3); + assert_eq!('メ'.len_utf16(), 1); +} + +#[test] +fn test_empty_index() { + let col_index = LineIndex::new( + " +const C: char = 'x'; +", + ); + assert_eq!(col_index.line_wide_chars.len(), 0); +} + +#[test] +fn test_splitlines() { + fn r(lo: u32, hi: u32) -> TextRange { + TextRange::new(lo.into(), hi.into()) + } + + let text = "a\nbb\nccc\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 9)).collect::>(); + let expected = vec![r(0, 2), r(2, 5), r(5, 9)]; + assert_eq!(actual, expected); + + let text = ""; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 0)).collect::>(); + let expected = vec![]; + assert_eq!(actual, expected); + + let text = "\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 1)).collect::>(); + let expected = vec![r(0, 1)]; + assert_eq!(actual, expected) +} From 85dd7b22b4f7c25b5be48dc477f85f84575cf6b5 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Wed, 3 May 2023 23:48:59 -0700 Subject: [PATCH 03/40] Use nohash_hasher, rm comment --- Cargo.lock | 10 ++-- Cargo.toml | 2 + crates/ide-db/Cargo.toml | 3 +- crates/stdx/Cargo.toml | 2 +- crates/stdx/src/hash.rs | 5 ++ crates/stdx/src/lib.rs | 2 +- crates/vfs/src/lib.rs | 3 +- lib/line-index/Cargo.toml | 4 +- lib/line-index/src/lib.rs | 2 +- lib/non-hash/Cargo.toml | 7 --- lib/non-hash/src/lib.rs | 104 -------------------------------------- 11 files changed, 21 insertions(+), 123 deletions(-) create mode 100644 crates/stdx/src/hash.rs delete mode 100644 lib/non-hash/Cargo.toml delete mode 100644 lib/non-hash/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 8fc4680e21e..f7179e94242 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -917,7 +917,7 @@ version = "0.0.0" name = "line-index" version = "0.1.0" dependencies = [ - "non-hash", + "nohash-hasher", "text-size", ] @@ -1064,8 +1064,10 @@ dependencies = [ ] [[package]] -name = "non-hash" -version = "0.1.0" +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" [[package]] name = "notify" @@ -1706,7 +1708,7 @@ dependencies = [ "backtrace", "libc", "miow", - "non-hash", + "nohash-hasher", "winapi", ] diff --git a/Cargo.toml b/Cargo.toml index ef8d8c0eef4..a37b05f1211 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,10 +74,12 @@ toolchain = { path = "./crates/toolchain", version = "0.0.0" } tt = { path = "./crates/tt", version = "0.0.0" } vfs-notify = { path = "./crates/vfs-notify", version = "0.0.0" } vfs = { path = "./crates/vfs", version = "0.0.0" } +line-index = { version = "0.1.0", path = "./lib/line-index" } # non-local crates smallvec = { version = "1.10.0", features = ["const_new", "union", "const_generics"] } smol_str = "0.2.0" +nohash-hasher = "0.2.0" # the following crates are pinned to prevent us from pulling in syn 2 until all our dependencies have moved serde = { version = "=1.0.156", features = ["derive"] } serde_json = "1.0.94" diff --git a/crates/ide-db/Cargo.toml b/crates/ide-db/Cargo.toml index 022eb7859ce..a0b79d17646 100644 --- a/crates/ide-db/Cargo.toml +++ b/crates/ide-db/Cargo.toml @@ -37,8 +37,7 @@ text-edit.workspace = true # something from some `hir-xxx` subpackage, reexport the API via `hir`. hir.workspace = true -# used to be a module, turned into its own library -line-index = { version = "0.1.0", path = "../../lib/line-index" } +line-index.workspace = true [dev-dependencies] expect-test = "1.4.0" diff --git a/crates/stdx/Cargo.toml b/crates/stdx/Cargo.toml index 7be9ddaffff..3933a1f8c96 100644 --- a/crates/stdx/Cargo.toml +++ b/crates/stdx/Cargo.toml @@ -15,7 +15,7 @@ doctest = false libc = "0.2.135" backtrace = { version = "0.3.65", optional = true } always-assert = { version = "0.1.2", features = ["log"] } -non-hash = { version = "0.1.0", path = "../../lib/non-hash" } +nohash-hasher.workspace = true # Think twice before adding anything here [target.'cfg(windows)'.dependencies] diff --git a/crates/stdx/src/hash.rs b/crates/stdx/src/hash.rs new file mode 100644 index 00000000000..66e6c9462b6 --- /dev/null +++ b/crates/stdx/src/hash.rs @@ -0,0 +1,5 @@ +//! Re-exports from [`nohash_hasher`]. + +pub use nohash_hasher::IntMap as NoHashHashMap; +pub use nohash_hasher::IntSet as NoHashHashSet; +pub use nohash_hasher::IsEnabled; diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs index c8f1d8bca11..5ec6e0751a4 100644 --- a/crates/stdx/src/lib.rs +++ b/crates/stdx/src/lib.rs @@ -11,9 +11,9 @@ pub mod process; pub mod panic_context; pub mod non_empty_vec; pub mod rand; +pub mod hash; pub use always_assert::{always, never}; -pub use non_hash as hash; #[inline(always)] pub fn is_ci() -> bool { diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs index b510b9e3942..caddd4e4810 100644 --- a/crates/vfs/src/lib.rs +++ b/crates/vfs/src/lib.rs @@ -62,7 +62,8 @@ pub use paths::{AbsPath, AbsPathBuf}; #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct FileId(pub u32); -impl stdx::hash::NoHashHashable for FileId {} +/// safe because `FileId` is a newtype of `u32` +impl stdx::hash::IsEnabled for FileId {} /// Storage for all files read by rust-analyzer. /// diff --git a/lib/line-index/Cargo.toml b/lib/line-index/Cargo.toml index 0abc539e892..bea9242ea6c 100644 --- a/lib/line-index/Cargo.toml +++ b/lib/line-index/Cargo.toml @@ -3,9 +3,9 @@ name = "line-index" version = "0.1.0" description = "Maps flat `TextSize` offsets into `(line, column)` representation." license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/non-hash" +repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-index" edition = "2021" [dependencies] text-size = "1" -non-hash = { version = "0.1.0", path = "../non-hash" } +nohash-hasher.workspace = true diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index af01eafc281..64a094c63e1 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -7,7 +7,7 @@ mod tests; use std::{iter, mem}; -use non_hash::NoHashHashMap; +use nohash_hasher::IntMap as NoHashHashMap; use text_size::{TextRange, TextSize}; /// Maps flat [`TextSize`] offsets into `(line, column)` representation. diff --git a/lib/non-hash/Cargo.toml b/lib/non-hash/Cargo.toml deleted file mode 100644 index 27b35a76295..00000000000 --- a/lib/non-hash/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "non-hash" -version = "0.1.0" -description = "A non-hashing `Hasher` implementation." -license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/non-hash" -edition = "2021" diff --git a/lib/non-hash/src/lib.rs b/lib/non-hash/src/lib.rs deleted file mode 100644 index af03f3d7920..00000000000 --- a/lib/non-hash/src/lib.rs +++ /dev/null @@ -1,104 +0,0 @@ -//! A non-hashing [`Hasher`] implementation. - -#![deny(clippy::pedantic, missing_debug_implementations, missing_docs, rust_2018_idioms)] - -use std::{ - hash::{BuildHasher, Hasher}, - marker::PhantomData, -}; - -/// A [`std::collections::HashMap`] with [`NoHashHasherBuilder`]. -pub type NoHashHashMap = std::collections::HashMap>; - -/// A [`std::collections::HashSet`] with [`NoHashHasherBuilder`]. -pub type NoHashHashSet = std::collections::HashSet>; - -/// A hasher builder for [`NoHashHasher`]. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct NoHashHasherBuilder(PhantomData); - -impl Default for NoHashHasherBuilder { - fn default() -> Self { - Self(PhantomData) - } -} - -/// Types for which an acceptable hash function is to return itself. -/// -/// This trait is implemented by sufficiently-small integer types. It should only be implemented for -/// foreign types that are newtypes of these types. If it is implemented on more complex types, -/// hashing will panic. -pub trait NoHashHashable {} - -impl NoHashHashable for u8 {} -impl NoHashHashable for u16 {} -impl NoHashHashable for u32 {} -impl NoHashHashable for u64 {} -impl NoHashHashable for usize {} - -impl NoHashHashable for i8 {} -impl NoHashHashable for i16 {} -impl NoHashHashable for i32 {} -impl NoHashHashable for i64 {} -impl NoHashHashable for isize {} - -/// A hasher for [`NoHashHashable`] types. -#[derive(Debug)] -pub struct NoHashHasher(u64); - -impl BuildHasher for NoHashHasherBuilder { - type Hasher = NoHashHasher; - fn build_hasher(&self) -> Self::Hasher { - NoHashHasher(0) - } -} - -impl Hasher for NoHashHasher { - fn finish(&self) -> u64 { - self.0 - } - - fn write(&mut self, _: &[u8]) { - unimplemented!("NoHashHasher should only be used for hashing sufficiently-small integer types and their newtypes") - } - - fn write_u8(&mut self, i: u8) { - self.0 = i as u64; - } - - fn write_u16(&mut self, i: u16) { - self.0 = i as u64; - } - - fn write_u32(&mut self, i: u32) { - self.0 = i as u64; - } - - fn write_u64(&mut self, i: u64) { - self.0 = i; - } - - fn write_usize(&mut self, i: usize) { - self.0 = i as u64; - } - - fn write_i8(&mut self, i: i8) { - self.0 = i as u64; - } - - fn write_i16(&mut self, i: i16) { - self.0 = i as u64; - } - - fn write_i32(&mut self, i: i32) { - self.0 = i as u64; - } - - fn write_i64(&mut self, i: i64) { - self.0 = i as u64; - } - - fn write_isize(&mut self, i: isize) { - self.0 = i as u64; - } -} From 4b28ad92e99d7bc60f79ceea56082553b6fead7e Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Wed, 3 May 2023 23:52:58 -0700 Subject: [PATCH 04/40] Make text-size a workspace dep --- Cargo.toml | 1 + crates/test-utils/Cargo.toml | 2 +- crates/text-edit/Cargo.toml | 2 +- lib/line-index/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a37b05f1211..123b06e0ccc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ line-index = { version = "0.1.0", path = "./lib/line-index" } smallvec = { version = "1.10.0", features = ["const_new", "union", "const_generics"] } smol_str = "0.2.0" nohash-hasher = "0.2.0" +text-size = "1.1.0" # the following crates are pinned to prevent us from pulling in syn 2 until all our dependencies have moved serde = { version = "=1.0.156", features = ["derive"] } serde_json = "1.0.94" diff --git a/crates/test-utils/Cargo.toml b/crates/test-utils/Cargo.toml index 92b1ef23e69..2b5b6f49561 100644 --- a/crates/test-utils/Cargo.toml +++ b/crates/test-utils/Cargo.toml @@ -14,7 +14,7 @@ doctest = false [dependencies] # Avoid adding deps here, this crate is widely used in tests it should compile fast! dissimilar = "1.0.4" -text-size = "1.1.0" +text-size.workspace = true rustc-hash = "1.1.0" stdx.workspace = true diff --git a/crates/text-edit/Cargo.toml b/crates/text-edit/Cargo.toml index 337cd234739..76d0ca5ccb6 100644 --- a/crates/text-edit/Cargo.toml +++ b/crates/text-edit/Cargo.toml @@ -13,4 +13,4 @@ doctest = false [dependencies] itertools = "0.10.5" -text-size = "1.1.0" +text-size.workspace = true diff --git a/lib/line-index/Cargo.toml b/lib/line-index/Cargo.toml index bea9242ea6c..b08a55854e0 100644 --- a/lib/line-index/Cargo.toml +++ b/lib/line-index/Cargo.toml @@ -7,5 +7,5 @@ repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-in edition = "2021" [dependencies] -text-size = "1" +text-size.workspace = true nohash-hasher.workspace = true From b26cded8d19153180dcdd745034ddbb4b3a3fa1e Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 01:06:10 -0700 Subject: [PATCH 05/40] Swap --- crates/stdx/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs index 5ec6e0751a4..5639aaf57cd 100644 --- a/crates/stdx/src/lib.rs +++ b/crates/stdx/src/lib.rs @@ -7,11 +7,11 @@ use std::process::Command; use std::{cmp::Ordering, ops, time::Instant}; mod macros; +pub mod hash; pub mod process; pub mod panic_context; pub mod non_empty_vec; pub mod rand; -pub mod hash; pub use always_assert::{always, never}; From 39ef368e75d2afc66516b369dd02a3a0ac8d2b8d Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:20:53 -0700 Subject: [PATCH 06/40] Remove pub(crate) --- lib/line-index/src/lib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 64a094c63e1..2898cdc1bfe 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -14,9 +14,9 @@ use text_size::{TextRange, TextSize}; #[derive(Clone, Debug, PartialEq, Eq)] pub struct LineIndex { /// Offset the beginning of each line, zero-based. - pub(crate) newlines: Vec, + newlines: Vec, /// List of non-ASCII characters on each line. - pub(crate) line_wide_chars: NoHashHashMap>, + line_wide_chars: NoHashHashMap>, } /// Line/Column information in native, utf8 format. @@ -49,11 +49,11 @@ pub struct WideLineCol { } #[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub(crate) struct WideChar { +struct WideChar { /// Start offset of a character inside a line, zero-based - pub(crate) start: TextSize, + start: TextSize, /// End offset of a character inside a line, zero-based - pub(crate) end: TextSize, + end: TextSize, } impl WideChar { From 5e2c68f4d400984234b157b10b361470273b57c3 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:21:18 -0700 Subject: [PATCH 07/40] Use less --- lib/line-index/src/lib.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 2898cdc1bfe..9ce30102319 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -5,8 +5,6 @@ #[cfg(test)] mod tests; -use std::{iter, mem}; - use nohash_hasher::IntMap as NoHashHashMap; use text_size::{TextRange, TextSize}; @@ -98,7 +96,7 @@ impl LineIndex { // Save any utf-16 characters seen in the previous line if !wide_chars.is_empty() { - line_wide_chars.insert(line, mem::take(&mut wide_chars)); + line_wide_chars.insert(line, std::mem::take(&mut wide_chars)); } // Prepare for processing the next line @@ -156,9 +154,9 @@ impl LineIndex { pub fn lines(&self, range: TextRange) -> impl Iterator + '_ { let lo = self.newlines.partition_point(|&it| it < range.start()); let hi = self.newlines.partition_point(|&it| it <= range.end()); - let all = iter::once(range.start()) + let all = std::iter::once(range.start()) .chain(self.newlines[lo..hi].iter().copied()) - .chain(iter::once(range.end())); + .chain(std::iter::once(range.end())); all.clone() .zip(all.skip(1)) From 71c4a8968d4b7bb08c81f23d04ea4115ee846580 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:21:29 -0700 Subject: [PATCH 08/40] Pub use TextRange, TextSize --- lib/line-index/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 9ce30102319..73aea0ea3c1 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -6,7 +6,8 @@ mod tests; use nohash_hasher::IntMap as NoHashHashMap; -use text_size::{TextRange, TextSize}; + +pub use text_size::{TextRange, TextSize}; /// Maps flat [`TextSize`] offsets into `(line, column)` representation. #[derive(Clone, Debug, PartialEq, Eq)] From 1d678cf6a04d52fca7c593b39fc9a8f772b08b55 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:21:42 -0700 Subject: [PATCH 09/40] Allow clippy --- lib/line-index/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 73aea0ea3c1..33265f0914b 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -1,6 +1,6 @@ //! See [`LineIndex`]. -#![deny(clippy::pedantic, missing_debug_implementations, missing_docs, rust_2018_idioms)] +#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)] #[cfg(test)] mod tests; From 4a1922fd1af6feab10ecde76b8de40bcad1cb9f7 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:28:15 -0700 Subject: [PATCH 10/40] Depend on nohash-hasher individually --- Cargo.lock | 7 ++++++- crates/hir-ty/Cargo.toml | 1 + crates/hir-ty/src/test_db.rs | 4 ++-- crates/ide-db/Cargo.toml | 1 + crates/ide-db/src/search.rs | 18 +++++++++--------- crates/ide-db/src/source_change.rs | 14 +++++++------- crates/ide-ssr/Cargo.toml | 1 + crates/ide-ssr/src/lib.rs | 6 +++--- crates/ide/Cargo.toml | 1 + crates/ide/src/references.rs | 4 ++-- crates/rust-analyzer/Cargo.toml | 6 ++---- crates/rust-analyzer/src/diagnostics.rs | 14 +++++++------- crates/rust-analyzer/src/global_state.rs | 8 ++++---- crates/stdx/Cargo.toml | 1 - crates/stdx/src/hash.rs | 5 ----- crates/stdx/src/lib.rs | 1 - crates/vfs/Cargo.toml | 1 + crates/vfs/src/file_set.rs | 4 ++-- crates/vfs/src/lib.rs | 2 +- lib/line-index/src/lib.rs | 6 +++--- 20 files changed, 53 insertions(+), 52 deletions(-) delete mode 100644 crates/stdx/src/hash.rs diff --git a/Cargo.lock b/Cargo.lock index f7179e94242..f20cba98655 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -587,6 +587,7 @@ dependencies = [ "itertools", "la-arena", "limit", + "nohash-hasher", "once_cell", "profile", "project-model", @@ -650,6 +651,7 @@ dependencies = [ "ide-diagnostics", "ide-ssr", "itertools", + "nohash-hasher", "oorandom", "profile", "pulldown-cmark", @@ -719,6 +721,7 @@ dependencies = [ "limit", "line-index", "memchr", + "nohash-hasher", "once_cell", "oorandom", "parser", @@ -764,6 +767,7 @@ dependencies = [ "hir", "ide-db", "itertools", + "nohash-hasher", "parser", "stdx", "syntax", @@ -1485,6 +1489,7 @@ dependencies = [ "mbe", "mimalloc", "mio", + "nohash-hasher", "num_cpus", "oorandom", "parking_lot 0.12.1", @@ -1708,7 +1713,6 @@ dependencies = [ "backtrace", "libc", "miow", - "nohash-hasher", "winapi", ] @@ -2066,6 +2070,7 @@ version = "0.0.0" dependencies = [ "fst", "indexmap", + "nohash-hasher", "paths", "rustc-hash", "stdx", diff --git a/crates/hir-ty/Cargo.toml b/crates/hir-ty/Cargo.toml index 51d69d103ad..6ca0dbb8503 100644 --- a/crates/hir-ty/Cargo.toml +++ b/crates/hir-ty/Cargo.toml @@ -29,6 +29,7 @@ chalk-derive = "0.89.0" la-arena = { version = "0.3.0", path = "../../lib/la-arena" } once_cell = "1.17.0" triomphe.workspace = true +nohash-hasher.workspace = true typed-arena = "2.0.1" rustc_index = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_index", default-features = false } diff --git a/crates/hir-ty/src/test_db.rs b/crates/hir-ty/src/test_db.rs index 1276a4c5e14..7d19e0a1916 100644 --- a/crates/hir-ty/src/test_db.rs +++ b/crates/hir-ty/src/test_db.rs @@ -8,8 +8,8 @@ use base_db::{ }; use hir_def::{db::DefDatabase, ModuleId}; use hir_expand::db::ExpandDatabase; +use nohash_hasher::IntMap; use rustc_hash::FxHashSet; -use stdx::hash::NoHashHashMap; use syntax::TextRange; use test_utils::extract_annotations; use triomphe::Arc; @@ -102,7 +102,7 @@ impl TestDB { self.module_for_file_opt(file_id).unwrap() } - pub(crate) fn extract_annotations(&self) -> NoHashHashMap> { + pub(crate) fn extract_annotations(&self) -> IntMap> { let mut files = Vec::new(); let crate_graph = self.crate_graph(); for krate in crate_graph.iter() { diff --git a/crates/ide-db/Cargo.toml b/crates/ide-db/Cargo.toml index a0b79d17646..4e75dc4dba5 100644 --- a/crates/ide-db/Cargo.toml +++ b/crates/ide-db/Cargo.toml @@ -24,6 +24,7 @@ arrayvec = "0.7.2" indexmap = "1.9.1" memchr = "2.5.0" triomphe.workspace = true +nohash-hasher.workspace = true # local deps base-db.workspace = true diff --git a/crates/ide-db/src/search.rs b/crates/ide-db/src/search.rs index f58a96d595a..9d00c717097 100644 --- a/crates/ide-db/src/search.rs +++ b/crates/ide-db/src/search.rs @@ -11,9 +11,9 @@ use hir::{ AsAssocItem, DefWithBody, HasAttrs, HasSource, InFile, ModuleSource, Semantics, Visibility, }; use memchr::memmem::Finder; +use nohash_hasher::IntMap; use once_cell::unsync::Lazy; use parser::SyntaxKind; -use stdx::hash::NoHashHashMap; use syntax::{ast, match_ast, AstNode, TextRange, TextSize}; use triomphe::Arc; @@ -25,7 +25,7 @@ use crate::{ #[derive(Debug, Default, Clone)] pub struct UsageSearchResult { - pub references: NoHashHashMap>, + pub references: IntMap>, } impl UsageSearchResult { @@ -50,7 +50,7 @@ impl UsageSearchResult { impl IntoIterator for UsageSearchResult { type Item = (FileId, Vec); - type IntoIter = > as IntoIterator>::IntoIter; + type IntoIter = > as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { self.references.into_iter() @@ -84,17 +84,17 @@ pub enum ReferenceCategory { /// e.g. for things like local variables. #[derive(Clone, Debug)] pub struct SearchScope { - entries: NoHashHashMap>, + entries: IntMap>, } impl SearchScope { - fn new(entries: NoHashHashMap>) -> SearchScope { + fn new(entries: IntMap>) -> SearchScope { SearchScope { entries } } /// Build a search scope spanning the entire crate graph of files. fn crate_graph(db: &RootDatabase) -> SearchScope { - let mut entries = NoHashHashMap::default(); + let mut entries = IntMap::default(); let graph = db.crate_graph(); for krate in graph.iter() { @@ -108,7 +108,7 @@ impl SearchScope { /// Build a search scope spanning all the reverse dependencies of the given crate. fn reverse_dependencies(db: &RootDatabase, of: hir::Crate) -> SearchScope { - let mut entries = NoHashHashMap::default(); + let mut entries = IntMap::default(); for rev_dep in of.transitive_reverse_dependencies(db) { let root_file = rev_dep.root_file(db); let source_root_id = db.file_source_root(root_file); @@ -128,7 +128,7 @@ impl SearchScope { /// Build a search scope spanning the given module and all its submodules. fn module_and_children(db: &RootDatabase, module: hir::Module) -> SearchScope { - let mut entries = NoHashHashMap::default(); + let mut entries = IntMap::default(); let (file_id, range) = { let InFile { file_id, value } = module.definition_source(db); @@ -161,7 +161,7 @@ impl SearchScope { /// Build an empty search scope. pub fn empty() -> SearchScope { - SearchScope::new(NoHashHashMap::default()) + SearchScope::new(IntMap::default()) } /// Build a empty search scope spanning the given file. diff --git a/crates/ide-db/src/source_change.rs b/crates/ide-db/src/source_change.rs index 5a3e352b2ec..061fb0f05cf 100644 --- a/crates/ide-db/src/source_change.rs +++ b/crates/ide-db/src/source_change.rs @@ -5,16 +5,16 @@ use std::{collections::hash_map::Entry, iter, mem}; +use crate::SnippetCap; use base_db::{AnchoredPathBuf, FileId}; -use stdx::{hash::NoHashHashMap, never}; +use nohash_hasher::IntMap; +use stdx::never; use syntax::{algo, ast, ted, AstNode, SyntaxNode, SyntaxNodePtr, TextRange, TextSize}; use text_edit::{TextEdit, TextEditBuilder}; -use crate::SnippetCap; - #[derive(Default, Debug, Clone)] pub struct SourceChange { - pub source_file_edits: NoHashHashMap, + pub source_file_edits: IntMap, pub file_system_edits: Vec, pub is_snippet: bool, } @@ -23,7 +23,7 @@ impl SourceChange { /// Creates a new SourceChange with the given label /// from the edits. pub fn from_edits( - source_file_edits: NoHashHashMap, + source_file_edits: IntMap, file_system_edits: Vec, ) -> Self { SourceChange { source_file_edits, file_system_edits, is_snippet: false } @@ -77,8 +77,8 @@ impl Extend for SourceChange { } } -impl From> for SourceChange { - fn from(source_file_edits: NoHashHashMap) -> SourceChange { +impl From> for SourceChange { + fn from(source_file_edits: IntMap) -> SourceChange { SourceChange { source_file_edits, file_system_edits: Vec::new(), is_snippet: false } } } diff --git a/crates/ide-ssr/Cargo.toml b/crates/ide-ssr/Cargo.toml index b8625fc1be9..70ed6dea5bf 100644 --- a/crates/ide-ssr/Cargo.toml +++ b/crates/ide-ssr/Cargo.toml @@ -16,6 +16,7 @@ doctest = false cov-mark = "2.0.0-pre.1" itertools = "0.10.5" triomphe.workspace = true +nohash-hasher.workspace = true # local deps hir.workspace = true diff --git a/crates/ide-ssr/src/lib.rs b/crates/ide-ssr/src/lib.rs index a1945087d75..f51a9547a50 100644 --- a/crates/ide-ssr/src/lib.rs +++ b/crates/ide-ssr/src/lib.rs @@ -87,8 +87,8 @@ pub use crate::{errors::SsrError, from_comment::ssr_from_comment, matching::Matc use crate::{errors::bail, matching::MatchFailureReason}; use hir::Semantics; use ide_db::base_db::{FileId, FilePosition, FileRange}; +use nohash_hasher::IntMap; use resolving::ResolvedRule; -use stdx::hash::NoHashHashMap; use syntax::{ast, AstNode, SyntaxNode, TextRange}; use text_edit::TextEdit; @@ -168,9 +168,9 @@ impl<'db> MatchFinder<'db> { } /// Finds matches for all added rules and returns edits for all found matches. - pub fn edits(&self) -> NoHashHashMap { + pub fn edits(&self) -> IntMap { use ide_db::base_db::SourceDatabaseExt; - let mut matches_by_file = NoHashHashMap::default(); + let mut matches_by_file = IntMap::default(); for m in self.matches().matches { matches_by_file .entry(m.range.file_id) diff --git a/crates/ide/Cargo.toml b/crates/ide/Cargo.toml index c3dcb60490c..2aee203c4ea 100644 --- a/crates/ide/Cargo.toml +++ b/crates/ide/Cargo.toml @@ -24,6 +24,7 @@ url = "2.3.1" dot = "0.1.4" smallvec.workspace = true triomphe.workspace = true +nohash-hasher.workspace = true # local deps cfg.workspace = true diff --git a/crates/ide/src/references.rs b/crates/ide/src/references.rs index b8e05d4f625..291b1a349b3 100644 --- a/crates/ide/src/references.rs +++ b/crates/ide/src/references.rs @@ -17,7 +17,7 @@ use ide_db::{ RootDatabase, }; use itertools::Itertools; -use stdx::hash::NoHashHashMap; +use nohash_hasher::IntMap; use syntax::{ algo::find_node_at_offset, ast::{self, HasName}, @@ -31,7 +31,7 @@ use crate::{FilePosition, NavigationTarget, TryToNav}; #[derive(Debug, Clone)] pub struct ReferenceSearchResult { pub declaration: Option, - pub references: NoHashHashMap)>>, + pub references: IntMap)>>, } #[derive(Debug, Clone)] diff --git a/crates/rust-analyzer/Cargo.toml b/crates/rust-analyzer/Cargo.toml index c7c2e341276..ae5b8e4c422 100644 --- a/crates/rust-analyzer/Cargo.toml +++ b/crates/rust-analyzer/Cargo.toml @@ -46,6 +46,7 @@ tracing-subscriber = { version = "0.3.16", default-features = false, features = tracing-log = "0.1.3" tracing-tree = "0.2.1" triomphe.workspace = true +nohash-hasher.workspace = true always-assert = "0.1.2" # These dependencies are unused, but we pin them to a version here to restrict them for our transitive dependencies @@ -95,7 +96,4 @@ mbe.workspace = true [features] jemalloc = ["jemallocator", "profile/jemalloc"] force-always-assert = ["always-assert/force"] -in-rust-tree = [ - "ide/in-rust-tree", - "syntax/in-rust-tree", -] +in-rust-tree = ["ide/in-rust-tree", "syntax/in-rust-tree"] diff --git a/crates/rust-analyzer/src/diagnostics.rs b/crates/rust-analyzer/src/diagnostics.rs index 2edb394a0b1..33422fd058e 100644 --- a/crates/rust-analyzer/src/diagnostics.rs +++ b/crates/rust-analyzer/src/diagnostics.rs @@ -5,12 +5,12 @@ use std::mem; use ide::FileId; use ide_db::FxHashMap; -use stdx::hash::{NoHashHashMap, NoHashHashSet}; +use nohash_hasher::{IntMap, IntSet}; use triomphe::Arc; use crate::lsp_ext; -pub(crate) type CheckFixes = Arc>>>; +pub(crate) type CheckFixes = Arc>>>; #[derive(Debug, Default, Clone)] pub struct DiagnosticsMapConfig { @@ -21,12 +21,12 @@ pub struct DiagnosticsMapConfig { #[derive(Debug, Default, Clone)] pub(crate) struct DiagnosticCollection { - // FIXME: should be NoHashHashMap> - pub(crate) native: NoHashHashMap>, + // FIXME: should be IntMap> + pub(crate) native: IntMap>, // FIXME: should be Vec - pub(crate) check: NoHashHashMap>>, + pub(crate) check: IntMap>>, pub(crate) check_fixes: CheckFixes, - changes: NoHashHashSet, + changes: IntSet, } #[derive(Debug, Clone)] @@ -106,7 +106,7 @@ impl DiagnosticCollection { native.chain(check) } - pub(crate) fn take_changes(&mut self) -> Option> { + pub(crate) fn take_changes(&mut self) -> Option> { if self.changes.is_empty() { return None; } diff --git a/crates/rust-analyzer/src/global_state.rs b/crates/rust-analyzer/src/global_state.rs index 0f77eeae4ad..9535d88454f 100644 --- a/crates/rust-analyzer/src/global_state.rs +++ b/crates/rust-analyzer/src/global_state.rs @@ -10,11 +10,11 @@ use flycheck::FlycheckHandle; use ide::{Analysis, AnalysisHost, Cancellable, Change, FileId}; use ide_db::base_db::{CrateId, FileLoader, ProcMacroPaths, SourceDatabase}; use lsp_types::{SemanticTokens, Url}; +use nohash_hasher::IntMap; use parking_lot::{Mutex, RwLock}; use proc_macro_api::ProcMacroServer; use project_model::{CargoWorkspace, ProjectWorkspace, Target, WorkspaceBuildScripts}; use rustc_hash::FxHashMap; -use stdx::hash::NoHashHashMap; use triomphe::Arc; use vfs::AnchoredPathBuf; @@ -70,7 +70,7 @@ pub(crate) struct GlobalState { pub(crate) flycheck_sender: Sender, pub(crate) flycheck_receiver: Receiver, - pub(crate) vfs: Arc)>>, + pub(crate) vfs: Arc)>>, pub(crate) vfs_config_version: u32, pub(crate) vfs_progress_config_version: u32, pub(crate) vfs_progress_n_total: usize, @@ -117,7 +117,7 @@ pub(crate) struct GlobalStateSnapshot { pub(crate) check_fixes: CheckFixes, mem_docs: MemDocs, pub(crate) semantic_tokens_cache: Arc>>, - vfs: Arc)>>, + vfs: Arc)>>, pub(crate) workspaces: Arc>, // used to signal semantic highlighting to fall back to syntax based highlighting until proc-macros have been loaded pub(crate) proc_macros_loaded: bool, @@ -170,7 +170,7 @@ impl GlobalState { flycheck_sender, flycheck_receiver, - vfs: Arc::new(RwLock::new((vfs::Vfs::default(), NoHashHashMap::default()))), + vfs: Arc::new(RwLock::new((vfs::Vfs::default(), IntMap::default()))), vfs_config_version: 0, vfs_progress_config_version: 0, vfs_progress_n_total: 0, diff --git a/crates/stdx/Cargo.toml b/crates/stdx/Cargo.toml index 3933a1f8c96..c881f2fd3f4 100644 --- a/crates/stdx/Cargo.toml +++ b/crates/stdx/Cargo.toml @@ -15,7 +15,6 @@ doctest = false libc = "0.2.135" backtrace = { version = "0.3.65", optional = true } always-assert = { version = "0.1.2", features = ["log"] } -nohash-hasher.workspace = true # Think twice before adding anything here [target.'cfg(windows)'.dependencies] diff --git a/crates/stdx/src/hash.rs b/crates/stdx/src/hash.rs deleted file mode 100644 index 66e6c9462b6..00000000000 --- a/crates/stdx/src/hash.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Re-exports from [`nohash_hasher`]. - -pub use nohash_hasher::IntMap as NoHashHashMap; -pub use nohash_hasher::IntSet as NoHashHashSet; -pub use nohash_hasher::IsEnabled; diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs index 5639aaf57cd..8df86e81004 100644 --- a/crates/stdx/src/lib.rs +++ b/crates/stdx/src/lib.rs @@ -7,7 +7,6 @@ use std::process::Command; use std::{cmp::Ordering, ops, time::Instant}; mod macros; -pub mod hash; pub mod process; pub mod panic_context; pub mod non_empty_vec; diff --git a/crates/vfs/Cargo.toml b/crates/vfs/Cargo.toml index 802a300060f..3ae3dc83ca9 100644 --- a/crates/vfs/Cargo.toml +++ b/crates/vfs/Cargo.toml @@ -15,6 +15,7 @@ doctest = false rustc-hash = "1.1.0" fst = "0.4.7" indexmap = "1.9.1" +nohash-hasher.workspace = true paths.workspace = true stdx.workspace = true diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs index 700aebe0b34..0392ef3cebe 100644 --- a/crates/vfs/src/file_set.rs +++ b/crates/vfs/src/file_set.rs @@ -5,8 +5,8 @@ use std::fmt; use fst::{IntoStreamer, Streamer}; +use nohash_hasher::IntMap; use rustc_hash::FxHashMap; -use stdx::hash::NoHashHashMap; use crate::{AnchoredPath, FileId, Vfs, VfsPath}; @@ -14,7 +14,7 @@ use crate::{AnchoredPath, FileId, Vfs, VfsPath}; #[derive(Default, Clone, Eq, PartialEq)] pub struct FileSet { files: FxHashMap, - paths: NoHashHashMap, + paths: IntMap, } impl FileSet { diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs index caddd4e4810..ff8a2b96733 100644 --- a/crates/vfs/src/lib.rs +++ b/crates/vfs/src/lib.rs @@ -63,7 +63,7 @@ pub use paths::{AbsPath, AbsPathBuf}; pub struct FileId(pub u32); /// safe because `FileId` is a newtype of `u32` -impl stdx::hash::IsEnabled for FileId {} +impl nohash_hasher::IsEnabled for FileId {} /// Storage for all files read by rust-analyzer. /// diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 33265f0914b..fa5cf1503fe 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -5,7 +5,7 @@ #[cfg(test)] mod tests; -use nohash_hasher::IntMap as NoHashHashMap; +use nohash_hasher::IntMap; pub use text_size::{TextRange, TextSize}; @@ -15,7 +15,7 @@ pub struct LineIndex { /// Offset the beginning of each line, zero-based. newlines: Vec, /// List of non-ASCII characters on each line. - line_wide_chars: NoHashHashMap>, + line_wide_chars: IntMap>, } /// Line/Column information in native, utf8 format. @@ -80,7 +80,7 @@ impl WideChar { impl LineIndex { /// Returns a `LineIndex` for the `text`. pub fn new(text: &str) -> LineIndex { - let mut line_wide_chars = NoHashHashMap::default(); + let mut line_wide_chars = IntMap::default(); let mut wide_chars = Vec::new(); let mut newlines = Vec::with_capacity(16); From 4b7c759f2f0ab893feb8f13a55a59751e1050a67 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:34:24 -0700 Subject: [PATCH 11/40] Un-doc a comment --- lib/line-index/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index fa5cf1503fe..bd00519d4e2 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -37,8 +37,8 @@ pub enum WideEncoding { } /// Line/Column information in legacy encodings. -/// -/// Deliberately not a generic type and different from [`LineCol`]. +// +// Deliberately not a generic type and different from `LineCol`. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct WideLineCol { /// Zero-based. From d9c88460e448efab4f784187f8bcf95c5a945673 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:38:35 -0700 Subject: [PATCH 12/40] Use cur --- lib/line-index/src/lib.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index bd00519d4e2..1a4753cbd67 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -86,14 +86,14 @@ impl LineIndex { let mut newlines = Vec::with_capacity(16); newlines.push(TextSize::from(0)); - let mut curr_row = 0.into(); - let mut curr_col = 0.into(); + let mut cur_row = 0.into(); + let mut cur_col = 0.into(); let mut line = 0; for c in text.chars() { let c_len = TextSize::of(c); - curr_row += c_len; + cur_row += c_len; if c == '\n' { - newlines.push(curr_row); + newlines.push(cur_row); // Save any utf-16 characters seen in the previous line if !wide_chars.is_empty() { @@ -101,16 +101,16 @@ impl LineIndex { } // Prepare for processing the next line - curr_col = 0.into(); + cur_col = 0.into(); line += 1; continue; } if !c.is_ascii() { - wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len }); + wide_chars.push(WideChar { start: cur_col, end: cur_col + c_len }); } - curr_col += c_len; + cur_col += c_len; } // Save any utf-16 characters seen in the last line From da5c63c8f90b7cf66d94626352141dc3f0585b6e Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Thu, 4 May 2023 16:40:41 -0700 Subject: [PATCH 13/40] Use boxed slice As well as doing the shrink_to_fit, we also don't have to keep track of the capacity anymore. --- lib/line-index/src/lib.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 1a4753cbd67..e31f3006e21 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -13,9 +13,9 @@ pub use text_size::{TextRange, TextSize}; #[derive(Clone, Debug, PartialEq, Eq)] pub struct LineIndex { /// Offset the beginning of each line, zero-based. - newlines: Vec, + newlines: Box<[TextSize]>, /// List of non-ASCII characters on each line. - line_wide_chars: IntMap>, + line_wide_chars: IntMap>, } /// Line/Column information in native, utf8 format. @@ -97,7 +97,8 @@ impl LineIndex { // Save any utf-16 characters seen in the previous line if !wide_chars.is_empty() { - line_wide_chars.insert(line, std::mem::take(&mut wide_chars)); + line_wide_chars + .insert(line, std::mem::take(&mut wide_chars).into_boxed_slice()); } // Prepare for processing the next line @@ -115,13 +116,10 @@ impl LineIndex { // Save any utf-16 characters seen in the last line if !wide_chars.is_empty() { - line_wide_chars.insert(line, wide_chars); + line_wide_chars.insert(line, wide_chars.into_boxed_slice()); } - newlines.shrink_to_fit(); - line_wide_chars.shrink_to_fit(); - - LineIndex { newlines, line_wide_chars } + LineIndex { newlines: newlines.into_boxed_slice(), line_wide_chars } } /// Transforms the `TextSize` into a `LineCol`. @@ -168,7 +166,7 @@ impl LineIndex { fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize { let mut res: usize = col.into(); if let Some(wide_chars) = self.line_wide_chars.get(&line) { - for c in wide_chars { + for c in wide_chars.iter() { if c.end <= col { res -= usize::from(c.len()) - c.wide_len(enc); } else { @@ -183,7 +181,7 @@ impl LineIndex { fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize { if let Some(wide_chars) = self.line_wide_chars.get(&line) { - for c in wide_chars { + for c in wide_chars.iter() { if col > u32::from(c.start) { col += u32::from(c.len()) - c.wide_len(enc) as u32; } else { From 663e11c4b053943426533a60cb61a52598bb203b Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Fri, 5 May 2023 17:22:23 -0700 Subject: [PATCH 14/40] Move some tests --- lib/line-index/src/tests.rs | 64 +------------------------------------ lib/line-index/tests/it.rs | 63 ++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 63 deletions(-) create mode 100644 lib/line-index/tests/it.rs diff --git a/lib/line-index/src/tests.rs b/lib/line-index/src/tests.rs index 4b58cfc47dd..31c01c20ee3 100644 --- a/lib/line-index/src/tests.rs +++ b/lib/line-index/src/tests.rs @@ -1,38 +1,4 @@ -use super::*; - -#[test] -fn test_line_index() { - let text = "hello\nworld"; - let table = [ - (00, 0, 0), - (01, 0, 1), - (05, 0, 5), - (06, 1, 0), - (07, 1, 1), - (08, 1, 2), - (10, 1, 4), - (11, 1, 5), - (12, 1, 6), - ]; - - let index = LineIndex::new(text); - for (offset, line, col) in table { - assert_eq!(index.line_col(offset.into()), LineCol { line, col }); - } - - let text = "\nhello\nworld"; - let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)]; - let index = LineIndex::new(text); - for (offset, line, col) in table { - assert_eq!(index.line_col(offset.into()), LineCol { line, col }); - } -} - -#[test] -fn test_char_len() { - assert_eq!('メ'.len_utf8(), 3); - assert_eq!('メ'.len_utf16(), 1); -} +use super::LineIndex; #[test] fn test_empty_index() { @@ -43,31 +9,3 @@ const C: char = 'x'; ); assert_eq!(col_index.line_wide_chars.len(), 0); } - -#[test] -fn test_splitlines() { - fn r(lo: u32, hi: u32) -> TextRange { - TextRange::new(lo.into(), hi.into()) - } - - let text = "a\nbb\nccc\n"; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 9)).collect::>(); - let expected = vec![r(0, 2), r(2, 5), r(5, 9)]; - assert_eq!(actual, expected); - - let text = ""; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 0)).collect::>(); - let expected = vec![]; - assert_eq!(actual, expected); - - let text = "\n"; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 1)).collect::>(); - let expected = vec![r(0, 1)]; - assert_eq!(actual, expected) -} diff --git a/lib/line-index/tests/it.rs b/lib/line-index/tests/it.rs new file mode 100644 index 00000000000..fcaf0e4a8c9 --- /dev/null +++ b/lib/line-index/tests/it.rs @@ -0,0 +1,63 @@ +use line_index::{LineCol, LineIndex, TextRange}; + +#[test] +fn test_line_index() { + let text = "hello\nworld"; + let table = [ + (00, 0, 0), + (01, 0, 1), + (05, 0, 5), + (06, 1, 0), + (07, 1, 1), + (08, 1, 2), + (10, 1, 4), + (11, 1, 5), + (12, 1, 6), + ]; + + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } + + let text = "\nhello\nworld"; + let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)]; + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } +} + +#[test] +fn test_char_len() { + assert_eq!('メ'.len_utf8(), 3); + assert_eq!('メ'.len_utf16(), 1); +} + +#[test] +fn test_splitlines() { + fn r(lo: u32, hi: u32) -> TextRange { + TextRange::new(lo.into(), hi.into()) + } + + let text = "a\nbb\nccc\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 9)).collect::>(); + let expected = vec![r(0, 2), r(2, 5), r(5, 9)]; + assert_eq!(actual, expected); + + let text = ""; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 0)).collect::>(); + let expected = vec![]; + assert_eq!(actual, expected); + + let text = "\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 1)).collect::>(); + let expected = vec![r(0, 1)]; + assert_eq!(actual, expected) +} From 59b4916294d30af0c16a4737ff8d1e2077b589b5 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Fri, 5 May 2023 17:25:10 -0700 Subject: [PATCH 15/40] Swap --- lib/line-index/src/lib.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index e31f3006e21..61f907f0604 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -9,15 +9,6 @@ use nohash_hasher::IntMap; pub use text_size::{TextRange, TextSize}; -/// Maps flat [`TextSize`] offsets into `(line, column)` representation. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct LineIndex { - /// Offset the beginning of each line, zero-based. - newlines: Box<[TextSize]>, - /// List of non-ASCII characters on each line. - line_wide_chars: IntMap>, -} - /// Line/Column information in native, utf8 format. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct LineCol { @@ -77,6 +68,15 @@ impl WideChar { } } +/// Maps flat [`TextSize`] offsets into `(line, column)` representation. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LineIndex { + /// Offset the beginning of each line, zero-based. + newlines: Box<[TextSize]>, + /// List of non-ASCII characters on each line. + line_wide_chars: IntMap>, +} + impl LineIndex { /// Returns a `LineIndex` for the `text`. pub fn new(text: &str) -> LineIndex { From 1ad0779a007ed4a30d9e2443ab2e10d2ba041a1f Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Fri, 5 May 2023 17:35:20 -0700 Subject: [PATCH 16/40] Make WideEncoding non-exhaustive --- crates/ide-db/src/tests/line_index.rs | 9 ++------- crates/rust-analyzer/src/caps.rs | 11 ++++++----- crates/rust-analyzer/src/diagnostics/to_proto.rs | 12 +++++------- lib/line-index/src/lib.rs | 11 +++++++++++ 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/crates/ide-db/src/tests/line_index.rs b/crates/ide-db/src/tests/line_index.rs index c12936071d3..c41b0de5633 100644 --- a/crates/ide-db/src/tests/line_index.rs +++ b/crates/ide-db/src/tests/line_index.rs @@ -28,16 +28,11 @@ fn test_every_chars() { let got_lin_col = line_index.line_col(got_offset); assert_eq!(got_lin_col, lin_col); - for enc in [WideEncoding::Utf16, WideEncoding::Utf32] { + for (enc, col) in [(WideEncoding::Utf16, col_utf16), (WideEncoding::Utf32, col_utf32)] { let wide_lin_col = line_index.to_wide(enc, lin_col); let got_lin_col = line_index.to_utf8(enc, wide_lin_col); assert_eq!(got_lin_col, lin_col); - - let want_col = match enc { - WideEncoding::Utf16 => col_utf16, - WideEncoding::Utf32 => col_utf32, - }; - assert_eq!(wide_lin_col.col, want_col) + assert_eq!(wide_lin_col.col, col) } if c == '\n' { diff --git a/crates/rust-analyzer/src/caps.rs b/crates/rust-analyzer/src/caps.rs index 3628670ac98..ab06b96814a 100644 --- a/crates/rust-analyzer/src/caps.rs +++ b/crates/rust-analyzer/src/caps.rs @@ -23,13 +23,14 @@ use crate::semantic_tokens; pub fn server_capabilities(config: &Config) -> ServerCapabilities { ServerCapabilities { - position_encoding: Some(match negotiated_encoding(config.caps()) { - PositionEncoding::Utf8 => PositionEncodingKind::UTF8, + position_encoding: match negotiated_encoding(config.caps()) { + PositionEncoding::Utf8 => Some(PositionEncodingKind::UTF8), PositionEncoding::Wide(wide) => match wide { - WideEncoding::Utf16 => PositionEncodingKind::UTF16, - WideEncoding::Utf32 => PositionEncodingKind::UTF32, + WideEncoding::Utf16 => Some(PositionEncodingKind::UTF16), + WideEncoding::Utf32 => Some(PositionEncodingKind::UTF32), + _ => None, }, - }), + }, text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions { open_close: Some(true), change: Some(TextDocumentSyncKind::INCREMENTAL), diff --git a/crates/rust-analyzer/src/diagnostics/to_proto.rs b/crates/rust-analyzer/src/diagnostics/to_proto.rs index 415fa4e02f2..c8b2c4edb83 100644 --- a/crates/rust-analyzer/src/diagnostics/to_proto.rs +++ b/crates/rust-analyzer/src/diagnostics/to_proto.rs @@ -3,7 +3,6 @@ use std::collections::HashMap; use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan}; -use ide_db::line_index::WideEncoding; use itertools::Itertools; use stdx::format_to; use vfs::{AbsPath, AbsPathBuf}; @@ -94,17 +93,16 @@ fn position( }; } let mut char_offset = 0; - let len_func = match position_encoding { - PositionEncoding::Utf8 => char::len_utf8, - PositionEncoding::Wide(WideEncoding::Utf16) => char::len_utf16, - PositionEncoding::Wide(WideEncoding::Utf32) => |_| 1, - }; for c in line.text.chars() { char_offset += 1; if char_offset > column_offset { break; } - true_column_offset += len_func(c) - 1; + let len = match position_encoding { + PositionEncoding::Utf8 => c.len_utf8(), + PositionEncoding::Wide(w) => w.measure(&c.to_string()), + }; + true_column_offset += len - 1; } } diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 61f907f0604..40815bdcf19 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -20,6 +20,7 @@ pub struct LineCol { /// A kind of wide character encoding. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[non_exhaustive] pub enum WideEncoding { /// UTF-16. Utf16, @@ -27,6 +28,16 @@ pub enum WideEncoding { Utf32, } +impl WideEncoding { + /// Returns the number of units it takes to encode `text` in this encoding. + pub fn measure(&self, text: &str) -> usize { + match self { + WideEncoding::Utf16 => text.encode_utf16().count(), + WideEncoding::Utf32 => text.chars().count(), + } + } +} + /// Line/Column information in legacy encodings. // // Deliberately not a generic type and different from `LineCol`. From 594a41e7bfd0692c03e0a26f78a24388edd4332b Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 00:51:25 -0700 Subject: [PATCH 17/40] Use pre.1, tweak comment --- Cargo.lock | 2 +- Cargo.toml | 8 ++++++-- lib/line-index/Cargo.toml | 4 ++-- lib/line-index/src/lib.rs | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f20cba98655..4b22a58e568 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -919,7 +919,7 @@ version = "0.0.0" [[package]] name = "line-index" -version = "0.1.0" +version = "0.1.0-pre.1" dependencies = [ "nohash-hasher", "text-size", diff --git a/Cargo.toml b/Cargo.toml index 123b06e0ccc..c7b0228e331 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,10 +74,14 @@ toolchain = { path = "./crates/toolchain", version = "0.0.0" } tt = { path = "./crates/tt", version = "0.0.0" } vfs-notify = { path = "./crates/vfs-notify", version = "0.0.0" } vfs = { path = "./crates/vfs", version = "0.0.0" } -line-index = { version = "0.1.0", path = "./lib/line-index" } +line-index = { version = "0.1.0-pre.1", path = "./lib/line-index" } # non-local crates -smallvec = { version = "1.10.0", features = ["const_new", "union", "const_generics"] } +smallvec = { version = "1.10.0", features = [ + "const_new", + "union", + "const_generics", +] } smol_str = "0.2.0" nohash-hasher = "0.2.0" text-size = "1.1.0" diff --git a/lib/line-index/Cargo.toml b/lib/line-index/Cargo.toml index b08a55854e0..019ad3a53ba 100644 --- a/lib/line-index/Cargo.toml +++ b/lib/line-index/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "line-index" -version = "0.1.0" -description = "Maps flat `TextSize` offsets into `(line, column)` representation." +version = "0.1.0-pre.1" +description = "Maps flat `TextSize` offsets to/from `(line, column)` representation." license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-index" edition = "2021" diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 40815bdcf19..463227900d3 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -79,8 +79,8 @@ impl WideChar { } } -/// Maps flat [`TextSize`] offsets into `(line, column)` representation. -#[derive(Clone, Debug, PartialEq, Eq)] +/// Maps flat [`TextSize`] offsets to/from `(line, column)` representation. +#[derive(Debug, Clone, PartialEq, Eq)] pub struct LineIndex { /// Offset the beginning of each line, zero-based. newlines: Box<[TextSize]>, From 0bb21533c68bc551ef2ec70a3b70bb99b4d2e786 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 00:52:11 -0700 Subject: [PATCH 18/40] Tweak comments, swap --- lib/line-index/src/lib.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 463227900d3..91b7faa78ad 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -9,8 +9,8 @@ use nohash_hasher::IntMap; pub use text_size::{TextRange, TextSize}; -/// Line/Column information in native, utf8 format. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +/// `(line, column)` information in the native, UTF-8 encoding. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct LineCol { /// Zero-based. pub line: u32, @@ -19,7 +19,7 @@ pub struct LineCol { } /// A kind of wide character encoding. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum WideEncoding { /// UTF-16. @@ -29,7 +29,7 @@ pub enum WideEncoding { } impl WideEncoding { - /// Returns the number of units it takes to encode `text` in this encoding. + /// Returns the number of code units it takes to encode `text` in this encoding. pub fn measure(&self, text: &str) -> usize { match self { WideEncoding::Utf16 => text.encode_utf16().count(), @@ -38,10 +38,12 @@ impl WideEncoding { } } -/// Line/Column information in legacy encodings. +/// `(line, column)` information in wide encodings. +/// +/// See [`WideEncoding`] for the kinds of wide encodings available. // // Deliberately not a generic type and different from `LineCol`. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct WideLineCol { /// Zero-based. pub line: u32, @@ -49,11 +51,11 @@ pub struct WideLineCol { pub col: u32, } -#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] struct WideChar { - /// Start offset of a character inside a line, zero-based + /// Start offset of a character inside a line, zero-based. start: TextSize, - /// End offset of a character inside a line, zero-based + /// End offset of a character inside a line, zero-based. end: TextSize, } From fcbe73ec1caad761eb6cf1a641fa667ac920076b Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 00:52:32 -0700 Subject: [PATCH 19/40] Refactor position --- .../rust-analyzer/src/diagnostics/to_proto.rs | 41 +++++++++---------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/crates/rust-analyzer/src/diagnostics/to_proto.rs b/crates/rust-analyzer/src/diagnostics/to_proto.rs index c8b2c4edb83..e1d1130ff1b 100644 --- a/crates/rust-analyzer/src/diagnostics/to_proto.rs +++ b/crates/rust-analyzer/src/diagnostics/to_proto.rs @@ -79,36 +79,33 @@ fn position( position_encoding: &PositionEncoding, span: &DiagnosticSpan, line_offset: usize, - column_offset: usize, + column_offset_utf32: usize, ) -> lsp_types::Position { let line_index = line_offset - span.line_start; - let mut true_column_offset = column_offset; - if let Some(line) = span.text.get(line_index) { - if line.text.chars().count() == line.text.len() { - // all one byte utf-8 char - return lsp_types::Position { - line: (line_offset as u32).saturating_sub(1), - character: (column_offset as u32).saturating_sub(1), - }; - } - let mut char_offset = 0; - for c in line.text.chars() { - char_offset += 1; - if char_offset > column_offset { - break; + let column_offset_encoded = match span.text.get(line_index) { + // Fast path. + Some(line) if line.text.is_ascii() => column_offset_utf32, + Some(line) => { + let line_prefix_len = line + .text + .char_indices() + .take(column_offset_utf32) + .last() + .map(|(pos, c)| pos + c.len_utf8()) + .unwrap_or(0); + let line_prefix = &line.text[..line_prefix_len]; + match position_encoding { + PositionEncoding::Utf8 => line_prefix.len(), + PositionEncoding::Wide(enc) => enc.measure(line_prefix), } - let len = match position_encoding { - PositionEncoding::Utf8 => c.len_utf8(), - PositionEncoding::Wide(w) => w.measure(&c.to_string()), - }; - true_column_offset += len - 1; } - } + None => column_offset_utf32, + }; lsp_types::Position { line: (line_offset as u32).saturating_sub(1), - character: (true_column_offset as u32).saturating_sub(1), + character: (column_offset_encoded as u32).saturating_sub(1), } } From 9de213c4fe10a001d1f55c76f157c1d9173694bc Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 00:56:30 -0700 Subject: [PATCH 20/40] Swap, tweak comments --- lib/line-index/src/lib.rs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 91b7faa78ad..39ae99fbd69 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -93,29 +93,30 @@ pub struct LineIndex { impl LineIndex { /// Returns a `LineIndex` for the `text`. pub fn new(text: &str) -> LineIndex { - let mut line_wide_chars = IntMap::default(); - let mut wide_chars = Vec::new(); - let mut newlines = Vec::with_capacity(16); + let mut line_wide_chars = IntMap::default(); + + let mut wide_chars = Vec::new(); + let mut cur_row = TextSize::from(0); + let mut cur_col = TextSize::from(0); + let mut line = 0; + newlines.push(TextSize::from(0)); - let mut cur_row = 0.into(); - let mut cur_col = 0.into(); - let mut line = 0; for c in text.chars() { let c_len = TextSize::of(c); cur_row += c_len; if c == '\n' { newlines.push(cur_row); - // Save any utf-16 characters seen in the previous line + // Save any wide characters seen in the previous line if !wide_chars.is_empty() { - line_wide_chars - .insert(line, std::mem::take(&mut wide_chars).into_boxed_slice()); + let cs = std::mem::take(&mut wide_chars).into_boxed_slice(); + line_wide_chars.insert(line, cs); } // Prepare for processing the next line - cur_col = 0.into(); + cur_col = TextSize::from(0); line += 1; continue; } @@ -127,7 +128,7 @@ impl LineIndex { cur_col += c_len; } - // Save any utf-16 characters seen in the last line + // Save any wide characters seen in the last line if !wide_chars.is_empty() { line_wide_chars.insert(line, wide_chars.into_boxed_slice()); } @@ -136,6 +137,10 @@ impl LineIndex { } /// Transforms the `TextSize` into a `LineCol`. + /// + /// # Panics + /// + /// If the offset is invalid. pub fn line_col(&self, offset: TextSize) -> LineCol { let line = self.newlines.partition_point(|&it| it <= offset) - 1; let line_start_offset = self.newlines[line]; From 02e8bb0c6e3854485e72f896f889673a4f3ad762 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 00:57:57 -0700 Subject: [PATCH 21/40] Return Option --- crates/ide-db/src/tests/line_index.rs | 4 ++-- crates/rust-analyzer/src/from_proto.rs | 5 ++++- crates/rust-analyzer/src/to_proto.rs | 2 +- lib/line-index/src/lib.rs | 8 ++++---- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/crates/ide-db/src/tests/line_index.rs b/crates/ide-db/src/tests/line_index.rs index c41b0de5633..6b49bb2631c 100644 --- a/crates/ide-db/src/tests/line_index.rs +++ b/crates/ide-db/src/tests/line_index.rs @@ -29,8 +29,8 @@ fn test_every_chars() { assert_eq!(got_lin_col, lin_col); for (enc, col) in [(WideEncoding::Utf16, col_utf16), (WideEncoding::Utf32, col_utf32)] { - let wide_lin_col = line_index.to_wide(enc, lin_col); - let got_lin_col = line_index.to_utf8(enc, wide_lin_col); + let wide_lin_col = line_index.to_wide(enc, lin_col).unwrap(); + let got_lin_col = line_index.to_utf8(enc, wide_lin_col).unwrap(); assert_eq!(got_lin_col, lin_col); assert_eq!(wide_lin_col.col, col) } diff --git a/crates/rust-analyzer/src/from_proto.rs b/crates/rust-analyzer/src/from_proto.rs index 44891fad1a9..cd74a5500d0 100644 --- a/crates/rust-analyzer/src/from_proto.rs +++ b/crates/rust-analyzer/src/from_proto.rs @@ -31,7 +31,10 @@ pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> R PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character }, PositionEncoding::Wide(enc) => { let line_col = WideLineCol { line: position.line, col: position.character }; - line_index.index.to_utf8(enc, line_col) + line_index + .index + .to_utf8(enc, line_col) + .ok_or_else(|| format_err!("Invalid wide col offset"))? } }; let text_size = diff --git a/crates/rust-analyzer/src/to_proto.rs b/crates/rust-analyzer/src/to_proto.rs index 60292d2ad18..b9d853e202a 100644 --- a/crates/rust-analyzer/src/to_proto.rs +++ b/crates/rust-analyzer/src/to_proto.rs @@ -32,7 +32,7 @@ pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp_types::P match line_index.encoding { PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col), PositionEncoding::Wide(enc) => { - let line_col = line_index.index.to_wide(enc, line_col); + let line_col = line_index.index.to_wide(enc, line_col).unwrap(); lsp_types::Position::new(line_col.line, line_col.col) } } diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 39ae99fbd69..3c10fbe20c5 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -156,15 +156,15 @@ impl LineIndex { } /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. - pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol { + pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option { let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into()); - WideLineCol { line: line_col.line, col: col as u32 } + Some(WideLineCol { line: line_col.line, col: col as u32 }) } /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`. - pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol { + pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> Option { let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col); - LineCol { line: line_col.line, col: col.into() } + Some(LineCol { line: line_col.line, col: col.into() }) } /// Returns an iterator over the ranges for the lines. From 84a6cb3bfc98ec9692eb82a8e2016d69d95b3209 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 00:59:56 -0700 Subject: [PATCH 22/40] Inline --- lib/line-index/src/lib.rs | 61 +++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 3c10fbe20c5..eecc1edb135 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -157,13 +157,36 @@ impl LineIndex { /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option { - let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into()); - Some(WideLineCol { line: line_col.line, col: col as u32 }) + let col: TextSize = line_col.col.into(); + let mut res: usize = col.into(); + if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { + for c in wide_chars.iter() { + if c.end <= col { + res -= usize::from(c.len()) - c.wide_len(enc); + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } + Some(WideLineCol { line: line_col.line, col: res as u32 }) } /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`. pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> Option { - let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col); + let mut col = line_col.col; + if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { + for c in wide_chars.iter() { + if col > u32::from(c.start) { + col += u32::from(c.len()) - c.wide_len(enc) as u32; + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } Some(LineCol { line: line_col.line, col: col.into() }) } @@ -180,36 +203,4 @@ impl LineIndex { .map(|(lo, hi)| TextRange::new(lo, hi)) .filter(|it| !it.is_empty()) } - - fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize { - let mut res: usize = col.into(); - if let Some(wide_chars) = self.line_wide_chars.get(&line) { - for c in wide_chars.iter() { - if c.end <= col { - res -= usize::from(c.len()) - c.wide_len(enc); - } else { - // From here on, all utf16 characters come *after* the character we are mapping, - // so we don't need to take them into account - break; - } - } - } - res - } - - fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize { - if let Some(wide_chars) = self.line_wide_chars.get(&line) { - for c in wide_chars.iter() { - if col > u32::from(c.start) { - col += u32::from(c.len()) - c.wide_len(enc) as u32; - } else { - // From here on, all utf16 characters come *after* the character we are mapping, - // so we don't need to take them into account - break; - } - } - } - - col.into() - } } From 8012acc90e6d6ba0e59bb0513dc419a3aa4739bb Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 01:02:37 -0700 Subject: [PATCH 23/40] Use a from --- lib/line-index/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index eecc1edb135..26287212e31 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -161,7 +161,7 @@ impl LineIndex { let mut res: usize = col.into(); if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { for c in wide_chars.iter() { - if c.end <= col { + if u32::from(c.end) <= line_col.col { res -= usize::from(c.len()) - c.wide_len(enc); } else { // From here on, all utf16 characters come *after* the character we are mapping, From d683e220214f182ec788f86d3e005827a8a2648c Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 01:03:18 -0700 Subject: [PATCH 24/40] Use u32 more --- lib/line-index/src/lib.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 26287212e31..527ba08717a 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -66,7 +66,7 @@ impl WideChar { } /// Returns the length in UTF-16 or UTF-32 code units. - fn wide_len(&self, enc: WideEncoding) -> usize { + fn wide_len(&self, enc: WideEncoding) -> u32 { match enc { WideEncoding::Utf16 => { if self.len() == TextSize::from(4) { @@ -75,7 +75,6 @@ impl WideChar { 1 } } - WideEncoding::Utf32 => 1, } } @@ -157,12 +156,11 @@ impl LineIndex { /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option { - let col: TextSize = line_col.col.into(); - let mut res: usize = col.into(); + let mut col = line_col.col; if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { for c in wide_chars.iter() { if u32::from(c.end) <= line_col.col { - res -= usize::from(c.len()) - c.wide_len(enc); + col -= u32::from(c.len()) - c.wide_len(enc); } else { // From here on, all utf16 characters come *after* the character we are mapping, // so we don't need to take them into account @@ -170,7 +168,7 @@ impl LineIndex { } } } - Some(WideLineCol { line: line_col.line, col: res as u32 }) + Some(WideLineCol { line: line_col.line, col }) } /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`. From 902b3438c97067c4fddf4eb7e429b8fd82cfe5b7 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 01:04:41 -0700 Subject: [PATCH 25/40] Use try_line_col --- lib/line-index/src/lib.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 527ba08717a..c0e526a8e0f 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -141,10 +141,15 @@ impl LineIndex { /// /// If the offset is invalid. pub fn line_col(&self, offset: TextSize) -> LineCol { - let line = self.newlines.partition_point(|&it| it <= offset) - 1; - let line_start_offset = self.newlines[line]; + self.try_line_col(offset).expect("invalid offset") + } + + /// Transforms the `TextSize` into a `LineCol`, or returns `None` if the `offset` was invalid. + pub fn try_line_col(&self, offset: TextSize) -> Option { + let line = self.newlines.partition_point(|&it| it <= offset).checked_sub(1)?; + let line_start_offset = self.newlines.get(line)?; let col = offset - line_start_offset; - LineCol { line: line as u32, col: col.into() } + Some(LineCol { line: line as u32, col: col.into() }) } /// Transforms the `LineCol` into a `TextSize`. From 0ad2450396ea72bff341c65dbf24745e1e185aad Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 01:05:28 -0700 Subject: [PATCH 26/40] Check for inside multibyte --- lib/line-index/src/lib.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index c0e526a8e0f..2494975f9fb 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -144,12 +144,20 @@ impl LineIndex { self.try_line_col(offset).expect("invalid offset") } - /// Transforms the `TextSize` into a `LineCol`, or returns `None` if the `offset` was invalid. + /// Transforms the `TextSize` into a `LineCol`, or returns `None` if the `offset` was invalid, + /// e.g. if it points to the middle of a multi-byte character. pub fn try_line_col(&self, offset: TextSize) -> Option { let line = self.newlines.partition_point(|&it| it <= offset).checked_sub(1)?; let line_start_offset = self.newlines.get(line)?; let col = offset - line_start_offset; - Some(LineCol { line: line as u32, col: col.into() }) + let ret = LineCol { line: line as u32, col: col.into() }; + self.line_wide_chars + .get(&ret.line) + .into_iter() + .flat_map(|it| it.iter()) + .find(|it| it.start < col && col < it.end) + .is_none() + .then_some(ret) } /// Transforms the `LineCol` into a `TextSize`. From 65004877358fc5e9b285d9a7683baf62ec541f73 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 01:37:25 -0700 Subject: [PATCH 27/40] Rename --- lib/line-index/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 2494975f9fb..ad6a79c4095 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -148,8 +148,8 @@ impl LineIndex { /// e.g. if it points to the middle of a multi-byte character. pub fn try_line_col(&self, offset: TextSize) -> Option { let line = self.newlines.partition_point(|&it| it <= offset).checked_sub(1)?; - let line_start_offset = self.newlines.get(line)?; - let col = offset - line_start_offset; + let start = self.newlines.get(line)?; + let col = offset - start; let ret = LineCol { line: line as u32, col: col.into() }; self.line_wide_chars .get(&ret.line) From e8a93306f0068adacc6fe2e811e261a5795f68e8 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 01:46:33 -0700 Subject: [PATCH 28/40] Remove 0 TextSize at front --- lib/line-index/src/lib.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index ad6a79c4095..214fc215f66 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -83,7 +83,7 @@ impl WideChar { /// Maps flat [`TextSize`] offsets to/from `(line, column)` representation. #[derive(Debug, Clone, PartialEq, Eq)] pub struct LineIndex { - /// Offset the beginning of each line, zero-based. + /// Offset the beginning of each line (except the first, which always has offset 0). newlines: Box<[TextSize]>, /// List of non-ASCII characters on each line. line_wide_chars: IntMap>, @@ -100,8 +100,6 @@ impl LineIndex { let mut cur_col = TextSize::from(0); let mut line = 0; - newlines.push(TextSize::from(0)); - for c in text.chars() { let c_len = TextSize::of(c); cur_row += c_len; @@ -147,8 +145,8 @@ impl LineIndex { /// Transforms the `TextSize` into a `LineCol`, or returns `None` if the `offset` was invalid, /// e.g. if it points to the middle of a multi-byte character. pub fn try_line_col(&self, offset: TextSize) -> Option { - let line = self.newlines.partition_point(|&it| it <= offset).checked_sub(1)?; - let start = self.newlines.get(line)?; + let line = self.newlines.partition_point(|&it| it <= offset); + let start = self.start_offset(line)?; let col = offset - start; let ret = LineCol { line: line as u32, col: col.into() }; self.line_wide_chars @@ -162,9 +160,14 @@ impl LineIndex { /// Transforms the `LineCol` into a `TextSize`. pub fn offset(&self, line_col: LineCol) -> Option { - self.newlines - .get(line_col.line as usize) - .map(|offset| offset + TextSize::from(line_col.col)) + self.start_offset(line_col.line as usize).map(|start| start + TextSize::from(line_col.col)) + } + + fn start_offset(&self, line: usize) -> Option { + match line.checked_sub(1) { + None => Some(TextSize::from(0)), + Some(it) => self.newlines.get(it).copied(), + } } /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. From 343976fe56d65aae4d4c6c4ac9d0ceb25b07c036 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 02:08:47 -0700 Subject: [PATCH 29/40] Return None for too-large offset --- lib/line-index/src/lib.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 214fc215f66..6318cbde50b 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -84,6 +84,8 @@ impl WideChar { #[derive(Debug, Clone, PartialEq, Eq)] pub struct LineIndex { /// Offset the beginning of each line (except the first, which always has offset 0). + /// + /// Invariant: Always non-empty and the last element holds the length of the original text. newlines: Box<[TextSize]>, /// List of non-ASCII characters on each line. line_wide_chars: IntMap>, @@ -125,6 +127,8 @@ impl LineIndex { cur_col += c_len; } + newlines.push(TextSize::of(text)); + // Save any wide characters seen in the last line if !wide_chars.is_empty() { line_wide_chars.insert(line, wide_chars.into_boxed_slice()); @@ -143,8 +147,12 @@ impl LineIndex { } /// Transforms the `TextSize` into a `LineCol`, or returns `None` if the `offset` was invalid, - /// e.g. if it points to the middle of a multi-byte character. + /// e.g. if it extends past the end of the text or points to the middle of a multi-byte + /// character. pub fn try_line_col(&self, offset: TextSize) -> Option { + if offset > *self.newlines.last().unwrap() { + return None; + } let line = self.newlines.partition_point(|&it| it <= offset); let start = self.start_offset(line)?; let col = offset - start; From 1bc6bca478872347faa32371663b3aff7b00624f Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:05:03 -0700 Subject: [PATCH 30/40] Use checked --- lib/line-index/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 6318cbde50b..622738ce8fb 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -184,7 +184,7 @@ impl LineIndex { if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { for c in wide_chars.iter() { if u32::from(c.end) <= line_col.col { - col -= u32::from(c.len()) - c.wide_len(enc); + col = col.checked_sub(u32::from(c.len()) - c.wide_len(enc))?; } else { // From here on, all utf16 characters come *after* the character we are mapping, // so we don't need to take them into account @@ -201,7 +201,7 @@ impl LineIndex { if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { for c in wide_chars.iter() { if col > u32::from(c.start) { - col += u32::from(c.len()) - c.wide_len(enc) as u32; + col = col.checked_add(u32::from(c.len()) - c.wide_len(enc))?; } else { // From here on, all utf16 characters come *after* the character we are mapping, // so we don't need to take them into account From e4d053e794ff48f773debaaf6df4966d711597c1 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:05:38 -0700 Subject: [PATCH 31/40] Rm into --- lib/line-index/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 622738ce8fb..c3f352b5d19 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -209,7 +209,7 @@ impl LineIndex { } } } - Some(LineCol { line: line_col.line, col: col.into() }) + Some(LineCol { line: line_col.line, col }) } /// Returns an iterator over the ranges for the lines. From 7cdca727a8ecf583c4f1fef2ac49410434656c55 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:06:23 -0700 Subject: [PATCH 32/40] Use all not instead of find is none --- lib/line-index/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index c3f352b5d19..e70bf68befa 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -161,8 +161,7 @@ impl LineIndex { .get(&ret.line) .into_iter() .flat_map(|it| it.iter()) - .find(|it| it.start < col && col < it.end) - .is_none() + .all(|it| !(it.start < col && col < it.end)) .then_some(ret) } From 7a7f90cf28445b6f3d3545c51ae312f003e19064 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:06:51 -0700 Subject: [PATCH 33/40] Use de morgan --- lib/line-index/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index e70bf68befa..97a24bcf6fc 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -161,7 +161,7 @@ impl LineIndex { .get(&ret.line) .into_iter() .flat_map(|it| it.iter()) - .all(|it| !(it.start < col && col < it.end)) + .all(|it| col <= it.start || it.end <= col) .then_some(ret) } From 510050ecdc5d7eede55533246e713fe847038af8 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:09:34 -0700 Subject: [PATCH 34/40] Add more types --- lib/line-index/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 97a24bcf6fc..370bbf68f38 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -94,13 +94,13 @@ pub struct LineIndex { impl LineIndex { /// Returns a `LineIndex` for the `text`. pub fn new(text: &str) -> LineIndex { - let mut newlines = Vec::with_capacity(16); - let mut line_wide_chars = IntMap::default(); + let mut newlines = Vec::::with_capacity(16); + let mut line_wide_chars = IntMap::>::default(); - let mut wide_chars = Vec::new(); + let mut wide_chars = Vec::::new(); let mut cur_row = TextSize::from(0); let mut cur_col = TextSize::from(0); - let mut line = 0; + let mut line = 0u32; for c in text.chars() { let c_len = TextSize::of(c); From cc2936b93ebeadceb1dfd0b3aa6194d95a5cb5a6 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:10:35 -0700 Subject: [PATCH 35/40] Use size field --- lib/line-index/src/lib.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 370bbf68f38..b29717e0a7b 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -84,11 +84,11 @@ impl WideChar { #[derive(Debug, Clone, PartialEq, Eq)] pub struct LineIndex { /// Offset the beginning of each line (except the first, which always has offset 0). - /// - /// Invariant: Always non-empty and the last element holds the length of the original text. newlines: Box<[TextSize]>, /// List of non-ASCII characters on each line. line_wide_chars: IntMap>, + /// The size of the entire text. + size: TextSize, } impl LineIndex { @@ -127,14 +127,16 @@ impl LineIndex { cur_col += c_len; } - newlines.push(TextSize::of(text)); - // Save any wide characters seen in the last line if !wide_chars.is_empty() { line_wide_chars.insert(line, wide_chars.into_boxed_slice()); } - LineIndex { newlines: newlines.into_boxed_slice(), line_wide_chars } + LineIndex { + newlines: newlines.into_boxed_slice(), + line_wide_chars, + size: TextSize::of(text), + } } /// Transforms the `TextSize` into a `LineCol`. @@ -150,7 +152,7 @@ impl LineIndex { /// e.g. if it extends past the end of the text or points to the middle of a multi-byte /// character. pub fn try_line_col(&self, offset: TextSize) -> Option { - if offset > *self.newlines.last().unwrap() { + if offset > self.size { return None; } let line = self.newlines.partition_point(|&it| it <= offset); From ed498b6eff1b627507b763f714f9dae8279f3c7f Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:11:18 -0700 Subject: [PATCH 36/40] Rm out of bounds --- lib/line-index/tests/it.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/line-index/tests/it.rs b/lib/line-index/tests/it.rs index fcaf0e4a8c9..ce1c0bc6f14 100644 --- a/lib/line-index/tests/it.rs +++ b/lib/line-index/tests/it.rs @@ -12,7 +12,6 @@ fn test_line_index() { (08, 1, 2), (10, 1, 4), (11, 1, 5), - (12, 1, 6), ]; let index = LineIndex::new(text); From 1cf74802ab111085a2137a2188892ef5d8641b0b Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:14:02 -0700 Subject: [PATCH 37/40] Improve docs --- lib/line-index/src/lib.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index b29717e0a7b..2c71e8e7632 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -143,14 +143,15 @@ impl LineIndex { /// /// # Panics /// - /// If the offset is invalid. + /// If the offset is invalid. See [`Self::try_line_col`]. pub fn line_col(&self, offset: TextSize) -> LineCol { self.try_line_col(offset).expect("invalid offset") } - /// Transforms the `TextSize` into a `LineCol`, or returns `None` if the `offset` was invalid, - /// e.g. if it extends past the end of the text or points to the middle of a multi-byte - /// character. + /// Transforms the `TextSize` into a `LineCol`. + /// + /// Returns `None` if the `offset` was invalid, e.g. if it extends past the end of the text or + /// points to the middle of a multi-byte character. pub fn try_line_col(&self, offset: TextSize) -> Option { if offset > self.size { return None; From 369e430b05534e694183b6faa8e4e0e540973158 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:42:15 -0700 Subject: [PATCH 38/40] Improve doc --- lib/line-index/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 2c71e8e7632..23f9d989c0c 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -214,7 +214,9 @@ impl LineIndex { Some(LineCol { line: line_col.line, col }) } - /// Returns an iterator over the ranges for the lines. + /// Given a range [start, end), returns a sorted iterator of non-empty ranges [start, x1), [x1, + /// x2), ..., [xn, end) where all the xi, which are positions of newlines, are inside the range + /// [start, end). pub fn lines(&self, range: TextRange) -> impl Iterator + '_ { let lo = self.newlines.partition_point(|&it| it < range.start()); let hi = self.newlines.partition_point(|&it| it <= range.end()); From 9d4d45215a95c92f05189e51131c60c1d15702d4 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:44:09 -0700 Subject: [PATCH 39/40] Rename --- lib/line-index/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index 23f9d989c0c..aac5b156499 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -87,8 +87,8 @@ pub struct LineIndex { newlines: Box<[TextSize]>, /// List of non-ASCII characters on each line. line_wide_chars: IntMap>, - /// The size of the entire text. - size: TextSize, + /// The length of the entire text. + len: TextSize, } impl LineIndex { @@ -135,7 +135,7 @@ impl LineIndex { LineIndex { newlines: newlines.into_boxed_slice(), line_wide_chars, - size: TextSize::of(text), + len: TextSize::of(text), } } @@ -153,7 +153,7 @@ impl LineIndex { /// Returns `None` if the `offset` was invalid, e.g. if it extends past the end of the text or /// points to the middle of a multi-byte character. pub fn try_line_col(&self, offset: TextSize) -> Option { - if offset > self.size { + if offset > self.len { return None; } let line = self.newlines.partition_point(|&it| it <= offset); From 60056b884536c77df8b82834d253b2bc784c02e4 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Sat, 6 May 2023 15:44:37 -0700 Subject: [PATCH 40/40] Expose len --- lib/line-index/src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs index aac5b156499..ad67d3f246e 100644 --- a/lib/line-index/src/lib.rs +++ b/lib/line-index/src/lib.rs @@ -229,4 +229,9 @@ impl LineIndex { .map(|(lo, hi)| TextRange::new(lo, hi)) .filter(|it| !it.is_empty()) } + + /// Returns the length of the original text. + pub fn len(&self) -> TextSize { + self.len + } }