Auto merge of #14733 - azdavis:master, r=matklad
Make line-index a lib, use nohash_hasher These seem like they are not specific to rust-analyzer and could be pulled out to their own libraries. So I did. https://github.com/azdavis/millet/issues/31
This commit is contained in:
commit
260e996140
21
Cargo.lock
generated
21
Cargo.lock
generated
@ -587,6 +587,7 @@ dependencies = [
|
||||
"itertools",
|
||||
"la-arena",
|
||||
"limit",
|
||||
"nohash-hasher",
|
||||
"once_cell",
|
||||
"profile",
|
||||
"project-model",
|
||||
@ -650,6 +651,7 @@ dependencies = [
|
||||
"ide-diagnostics",
|
||||
"ide-ssr",
|
||||
"itertools",
|
||||
"nohash-hasher",
|
||||
"oorandom",
|
||||
"profile",
|
||||
"pulldown-cmark",
|
||||
@ -717,7 +719,9 @@ dependencies = [
|
||||
"indexmap",
|
||||
"itertools",
|
||||
"limit",
|
||||
"line-index",
|
||||
"memchr",
|
||||
"nohash-hasher",
|
||||
"once_cell",
|
||||
"oorandom",
|
||||
"parser",
|
||||
@ -763,6 +767,7 @@ dependencies = [
|
||||
"hir",
|
||||
"ide-db",
|
||||
"itertools",
|
||||
"nohash-hasher",
|
||||
"parser",
|
||||
"stdx",
|
||||
"syntax",
|
||||
@ -912,6 +917,14 @@ dependencies = [
|
||||
name = "limit"
|
||||
version = "0.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "line-index"
|
||||
version = "0.1.0-pre.1"
|
||||
dependencies = [
|
||||
"nohash-hasher",
|
||||
"text-size",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.9"
|
||||
@ -1054,6 +1067,12 @@ dependencies = [
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nohash-hasher"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
|
||||
|
||||
[[package]]
|
||||
name = "notify"
|
||||
version = "5.1.0"
|
||||
@ -1470,6 +1489,7 @@ dependencies = [
|
||||
"mbe",
|
||||
"mimalloc",
|
||||
"mio",
|
||||
"nohash-hasher",
|
||||
"num_cpus",
|
||||
"oorandom",
|
||||
"parking_lot 0.12.1",
|
||||
@ -2050,6 +2070,7 @@ version = "0.0.0"
|
||||
dependencies = [
|
||||
"fst",
|
||||
"indexmap",
|
||||
"nohash-hasher",
|
||||
"paths",
|
||||
"rustc-hash",
|
||||
"stdx",
|
||||
|
@ -74,10 +74,17 @@ toolchain = { path = "./crates/toolchain", version = "0.0.0" }
|
||||
tt = { path = "./crates/tt", version = "0.0.0" }
|
||||
vfs-notify = { path = "./crates/vfs-notify", version = "0.0.0" }
|
||||
vfs = { path = "./crates/vfs", version = "0.0.0" }
|
||||
line-index = { version = "0.1.0-pre.1", path = "./lib/line-index" }
|
||||
|
||||
# non-local crates
|
||||
smallvec = { version = "1.10.0", features = ["const_new", "union", "const_generics"] }
|
||||
smallvec = { version = "1.10.0", features = [
|
||||
"const_new",
|
||||
"union",
|
||||
"const_generics",
|
||||
] }
|
||||
smol_str = "0.2.0"
|
||||
nohash-hasher = "0.2.0"
|
||||
text-size = "1.1.0"
|
||||
# the following crates are pinned to prevent us from pulling in syn 2 until all our dependencies have moved
|
||||
serde = { version = "=1.0.156", features = ["derive"] }
|
||||
serde_json = "1.0.94"
|
||||
|
@ -29,6 +29,7 @@ chalk-derive = "0.89.0"
|
||||
la-arena = { version = "0.3.0", path = "../../lib/la-arena" }
|
||||
once_cell = "1.17.0"
|
||||
triomphe.workspace = true
|
||||
nohash-hasher.workspace = true
|
||||
typed-arena = "2.0.1"
|
||||
rustc_index = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_index", default-features = false }
|
||||
|
||||
|
@ -8,8 +8,8 @@ use base_db::{
|
||||
};
|
||||
use hir_def::{db::DefDatabase, ModuleId};
|
||||
use hir_expand::db::ExpandDatabase;
|
||||
use nohash_hasher::IntMap;
|
||||
use rustc_hash::FxHashSet;
|
||||
use stdx::hash::NoHashHashMap;
|
||||
use syntax::TextRange;
|
||||
use test_utils::extract_annotations;
|
||||
use triomphe::Arc;
|
||||
@ -102,7 +102,7 @@ impl TestDB {
|
||||
self.module_for_file_opt(file_id).unwrap()
|
||||
}
|
||||
|
||||
pub(crate) fn extract_annotations(&self) -> NoHashHashMap<FileId, Vec<(TextRange, String)>> {
|
||||
pub(crate) fn extract_annotations(&self) -> IntMap<FileId, Vec<(TextRange, String)>> {
|
||||
let mut files = Vec::new();
|
||||
let crate_graph = self.crate_graph();
|
||||
for krate in crate_graph.iter() {
|
||||
|
@ -24,6 +24,7 @@ arrayvec = "0.7.2"
|
||||
indexmap = "1.9.1"
|
||||
memchr = "2.5.0"
|
||||
triomphe.workspace = true
|
||||
nohash-hasher.workspace = true
|
||||
|
||||
# local deps
|
||||
base-db.workspace = true
|
||||
@ -37,6 +38,8 @@ text-edit.workspace = true
|
||||
# something from some `hir-xxx` subpackage, reexport the API via `hir`.
|
||||
hir.workspace = true
|
||||
|
||||
line-index.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
expect-test = "1.4.0"
|
||||
oorandom = "11.1.3"
|
||||
|
@ -13,7 +13,6 @@ pub mod famous_defs;
|
||||
pub mod helpers;
|
||||
pub mod items_locator;
|
||||
pub mod label;
|
||||
pub mod line_index;
|
||||
pub mod path_transform;
|
||||
pub mod rename;
|
||||
pub mod rust_doc;
|
||||
@ -55,6 +54,8 @@ use triomphe::Arc;
|
||||
use crate::{line_index::LineIndex, symbol_index::SymbolsDatabase};
|
||||
pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
|
||||
|
||||
pub use ::line_index;
|
||||
|
||||
/// `base_db` is normally also needed in places where `ide_db` is used, so this re-export is for convenience.
|
||||
pub use base_db;
|
||||
|
||||
@ -414,4 +415,5 @@ impl SnippetCap {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
mod sourcegen_lints;
|
||||
mod line_index;
|
||||
}
|
||||
|
@ -1,317 +0,0 @@
|
||||
//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
|
||||
//! representation.
|
||||
use std::{iter, mem};
|
||||
|
||||
use stdx::hash::NoHashHashMap;
|
||||
use syntax::{TextRange, TextSize};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct LineIndex {
|
||||
/// Offset the beginning of each line, zero-based.
|
||||
pub(crate) newlines: Vec<TextSize>,
|
||||
/// List of non-ASCII characters on each line.
|
||||
pub(crate) line_wide_chars: NoHashHashMap<u32, Vec<WideChar>>,
|
||||
}
|
||||
|
||||
/// Line/Column information in native, utf8 format.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct LineCol {
|
||||
/// Zero-based
|
||||
pub line: u32,
|
||||
/// Zero-based utf8 offset
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum WideEncoding {
|
||||
Utf16,
|
||||
Utf32,
|
||||
}
|
||||
|
||||
/// Line/Column information in legacy encodings.
|
||||
///
|
||||
/// Deliberately not a generic type and different from `LineCol`.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct WideLineCol {
|
||||
/// Zero-based
|
||||
pub line: u32,
|
||||
/// Zero-based
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
pub(crate) struct WideChar {
|
||||
/// Start offset of a character inside a line, zero-based
|
||||
pub(crate) start: TextSize,
|
||||
/// End offset of a character inside a line, zero-based
|
||||
pub(crate) end: TextSize,
|
||||
}
|
||||
|
||||
impl WideChar {
|
||||
/// Returns the length in 8-bit UTF-8 code units.
|
||||
fn len(&self) -> TextSize {
|
||||
self.end - self.start
|
||||
}
|
||||
|
||||
/// Returns the length in UTF-16 or UTF-32 code units.
|
||||
fn wide_len(&self, enc: WideEncoding) -> usize {
|
||||
match enc {
|
||||
WideEncoding::Utf16 => {
|
||||
if self.len() == TextSize::from(4) {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
|
||||
WideEncoding::Utf32 => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LineIndex {
|
||||
pub fn new(text: &str) -> LineIndex {
|
||||
let mut line_wide_chars = NoHashHashMap::default();
|
||||
let mut wide_chars = Vec::new();
|
||||
|
||||
let mut newlines = Vec::with_capacity(16);
|
||||
newlines.push(TextSize::from(0));
|
||||
|
||||
let mut curr_row = 0.into();
|
||||
let mut curr_col = 0.into();
|
||||
let mut line = 0;
|
||||
for c in text.chars() {
|
||||
let c_len = TextSize::of(c);
|
||||
curr_row += c_len;
|
||||
if c == '\n' {
|
||||
newlines.push(curr_row);
|
||||
|
||||
// Save any utf-16 characters seen in the previous line
|
||||
if !wide_chars.is_empty() {
|
||||
line_wide_chars.insert(line, mem::take(&mut wide_chars));
|
||||
}
|
||||
|
||||
// Prepare for processing the next line
|
||||
curr_col = 0.into();
|
||||
line += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if !c.is_ascii() {
|
||||
wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len });
|
||||
}
|
||||
|
||||
curr_col += c_len;
|
||||
}
|
||||
|
||||
// Save any utf-16 characters seen in the last line
|
||||
if !wide_chars.is_empty() {
|
||||
line_wide_chars.insert(line, wide_chars);
|
||||
}
|
||||
|
||||
newlines.shrink_to_fit();
|
||||
line_wide_chars.shrink_to_fit();
|
||||
|
||||
LineIndex { newlines, line_wide_chars }
|
||||
}
|
||||
|
||||
pub fn line_col(&self, offset: TextSize) -> LineCol {
|
||||
let line = self.newlines.partition_point(|&it| it <= offset) - 1;
|
||||
let line_start_offset = self.newlines[line];
|
||||
let col = offset - line_start_offset;
|
||||
LineCol { line: line as u32, col: col.into() }
|
||||
}
|
||||
|
||||
pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
|
||||
self.newlines
|
||||
.get(line_col.line as usize)
|
||||
.map(|offset| offset + TextSize::from(line_col.col))
|
||||
}
|
||||
|
||||
pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {
|
||||
let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());
|
||||
WideLineCol { line: line_col.line, col: col as u32 }
|
||||
}
|
||||
|
||||
pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {
|
||||
let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);
|
||||
LineCol { line: line_col.line, col: col.into() }
|
||||
}
|
||||
|
||||
pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
|
||||
let lo = self.newlines.partition_point(|&it| it < range.start());
|
||||
let hi = self.newlines.partition_point(|&it| it <= range.end());
|
||||
let all = iter::once(range.start())
|
||||
.chain(self.newlines[lo..hi].iter().copied())
|
||||
.chain(iter::once(range.end()));
|
||||
|
||||
all.clone()
|
||||
.zip(all.skip(1))
|
||||
.map(|(lo, hi)| TextRange::new(lo, hi))
|
||||
.filter(|it| !it.is_empty())
|
||||
}
|
||||
|
||||
fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize {
|
||||
let mut res: usize = col.into();
|
||||
if let Some(wide_chars) = self.line_wide_chars.get(&line) {
|
||||
for c in wide_chars {
|
||||
if c.end <= col {
|
||||
res -= usize::from(c.len()) - c.wide_len(enc);
|
||||
} else {
|
||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||
// so we don't need to take them into account
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize {
|
||||
if let Some(wide_chars) = self.line_wide_chars.get(&line) {
|
||||
for c in wide_chars {
|
||||
if col > u32::from(c.start) {
|
||||
col += u32::from(c.len()) - c.wide_len(enc) as u32;
|
||||
} else {
|
||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||
// so we don't need to take them into account
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
col.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use test_utils::skip_slow_tests;
|
||||
|
||||
use super::WideEncoding::{Utf16, Utf32};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_line_index() {
|
||||
let text = "hello\nworld";
|
||||
let table = [
|
||||
(00, 0, 0),
|
||||
(01, 0, 1),
|
||||
(05, 0, 5),
|
||||
(06, 1, 0),
|
||||
(07, 1, 1),
|
||||
(08, 1, 2),
|
||||
(10, 1, 4),
|
||||
(11, 1, 5),
|
||||
(12, 1, 6),
|
||||
];
|
||||
|
||||
let index = LineIndex::new(text);
|
||||
for (offset, line, col) in table {
|
||||
assert_eq!(index.line_col(offset.into()), LineCol { line, col });
|
||||
}
|
||||
|
||||
let text = "\nhello\nworld";
|
||||
let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
|
||||
let index = LineIndex::new(text);
|
||||
for (offset, line, col) in table {
|
||||
assert_eq!(index.line_col(offset.into()), LineCol { line, col });
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_char_len() {
|
||||
assert_eq!('メ'.len_utf8(), 3);
|
||||
assert_eq!('メ'.len_utf16(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_index() {
|
||||
let col_index = LineIndex::new(
|
||||
"
|
||||
const C: char = 'x';
|
||||
",
|
||||
);
|
||||
assert_eq!(col_index.line_wide_chars.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_every_chars() {
|
||||
if skip_slow_tests() {
|
||||
return;
|
||||
}
|
||||
|
||||
let text: String = {
|
||||
let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
|
||||
chars.extend("\n".repeat(chars.len() / 16).chars());
|
||||
let mut rng = oorandom::Rand32::new(stdx::rand::seed());
|
||||
stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
|
||||
chars.into_iter().collect()
|
||||
};
|
||||
assert!(text.contains('💩')); // Sanity check.
|
||||
|
||||
let line_index = LineIndex::new(&text);
|
||||
|
||||
let mut lin_col = LineCol { line: 0, col: 0 };
|
||||
let mut col_utf16 = 0;
|
||||
let mut col_utf32 = 0;
|
||||
for (offset, c) in text.char_indices() {
|
||||
let got_offset = line_index.offset(lin_col).unwrap();
|
||||
assert_eq!(usize::from(got_offset), offset);
|
||||
|
||||
let got_lin_col = line_index.line_col(got_offset);
|
||||
assert_eq!(got_lin_col, lin_col);
|
||||
|
||||
for enc in [Utf16, Utf32] {
|
||||
let wide_lin_col = line_index.to_wide(enc, lin_col);
|
||||
let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
|
||||
assert_eq!(got_lin_col, lin_col);
|
||||
|
||||
let want_col = match enc {
|
||||
Utf16 => col_utf16,
|
||||
Utf32 => col_utf32,
|
||||
};
|
||||
assert_eq!(wide_lin_col.col, want_col)
|
||||
}
|
||||
|
||||
if c == '\n' {
|
||||
lin_col.line += 1;
|
||||
lin_col.col = 0;
|
||||
col_utf16 = 0;
|
||||
col_utf32 = 0;
|
||||
} else {
|
||||
lin_col.col += c.len_utf8() as u32;
|
||||
col_utf16 += c.len_utf16() as u32;
|
||||
col_utf32 += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_splitlines() {
|
||||
fn r(lo: u32, hi: u32) -> TextRange {
|
||||
TextRange::new(lo.into(), hi.into())
|
||||
}
|
||||
|
||||
let text = "a\nbb\nccc\n";
|
||||
let line_index = LineIndex::new(text);
|
||||
|
||||
let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
|
||||
let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let text = "";
|
||||
let line_index = LineIndex::new(text);
|
||||
|
||||
let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
|
||||
let expected = vec![];
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let text = "\n";
|
||||
let line_index = LineIndex::new(text);
|
||||
|
||||
let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
|
||||
let expected = vec![r(0, 1)];
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
}
|
@ -11,9 +11,9 @@ use hir::{
|
||||
AsAssocItem, DefWithBody, HasAttrs, HasSource, InFile, ModuleSource, Semantics, Visibility,
|
||||
};
|
||||
use memchr::memmem::Finder;
|
||||
use nohash_hasher::IntMap;
|
||||
use once_cell::unsync::Lazy;
|
||||
use parser::SyntaxKind;
|
||||
use stdx::hash::NoHashHashMap;
|
||||
use syntax::{ast, match_ast, AstNode, TextRange, TextSize};
|
||||
use triomphe::Arc;
|
||||
|
||||
@ -25,7 +25,7 @@ use crate::{
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct UsageSearchResult {
|
||||
pub references: NoHashHashMap<FileId, Vec<FileReference>>,
|
||||
pub references: IntMap<FileId, Vec<FileReference>>,
|
||||
}
|
||||
|
||||
impl UsageSearchResult {
|
||||
@ -50,7 +50,7 @@ impl UsageSearchResult {
|
||||
|
||||
impl IntoIterator for UsageSearchResult {
|
||||
type Item = (FileId, Vec<FileReference>);
|
||||
type IntoIter = <NoHashHashMap<FileId, Vec<FileReference>> as IntoIterator>::IntoIter;
|
||||
type IntoIter = <IntMap<FileId, Vec<FileReference>> as IntoIterator>::IntoIter;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.references.into_iter()
|
||||
@ -84,17 +84,17 @@ pub enum ReferenceCategory {
|
||||
/// e.g. for things like local variables.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SearchScope {
|
||||
entries: NoHashHashMap<FileId, Option<TextRange>>,
|
||||
entries: IntMap<FileId, Option<TextRange>>,
|
||||
}
|
||||
|
||||
impl SearchScope {
|
||||
fn new(entries: NoHashHashMap<FileId, Option<TextRange>>) -> SearchScope {
|
||||
fn new(entries: IntMap<FileId, Option<TextRange>>) -> SearchScope {
|
||||
SearchScope { entries }
|
||||
}
|
||||
|
||||
/// Build a search scope spanning the entire crate graph of files.
|
||||
fn crate_graph(db: &RootDatabase) -> SearchScope {
|
||||
let mut entries = NoHashHashMap::default();
|
||||
let mut entries = IntMap::default();
|
||||
|
||||
let graph = db.crate_graph();
|
||||
for krate in graph.iter() {
|
||||
@ -108,7 +108,7 @@ impl SearchScope {
|
||||
|
||||
/// Build a search scope spanning all the reverse dependencies of the given crate.
|
||||
fn reverse_dependencies(db: &RootDatabase, of: hir::Crate) -> SearchScope {
|
||||
let mut entries = NoHashHashMap::default();
|
||||
let mut entries = IntMap::default();
|
||||
for rev_dep in of.transitive_reverse_dependencies(db) {
|
||||
let root_file = rev_dep.root_file(db);
|
||||
let source_root_id = db.file_source_root(root_file);
|
||||
@ -128,7 +128,7 @@ impl SearchScope {
|
||||
|
||||
/// Build a search scope spanning the given module and all its submodules.
|
||||
fn module_and_children(db: &RootDatabase, module: hir::Module) -> SearchScope {
|
||||
let mut entries = NoHashHashMap::default();
|
||||
let mut entries = IntMap::default();
|
||||
|
||||
let (file_id, range) = {
|
||||
let InFile { file_id, value } = module.definition_source(db);
|
||||
@ -161,7 +161,7 @@ impl SearchScope {
|
||||
|
||||
/// Build an empty search scope.
|
||||
pub fn empty() -> SearchScope {
|
||||
SearchScope::new(NoHashHashMap::default())
|
||||
SearchScope::new(IntMap::default())
|
||||
}
|
||||
|
||||
/// Build a empty search scope spanning the given file.
|
||||
|
@ -5,16 +5,16 @@
|
||||
|
||||
use std::{collections::hash_map::Entry, iter, mem};
|
||||
|
||||
use crate::SnippetCap;
|
||||
use base_db::{AnchoredPathBuf, FileId};
|
||||
use stdx::{hash::NoHashHashMap, never};
|
||||
use nohash_hasher::IntMap;
|
||||
use stdx::never;
|
||||
use syntax::{algo, ast, ted, AstNode, SyntaxNode, SyntaxNodePtr, TextRange, TextSize};
|
||||
use text_edit::{TextEdit, TextEditBuilder};
|
||||
|
||||
use crate::SnippetCap;
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct SourceChange {
|
||||
pub source_file_edits: NoHashHashMap<FileId, TextEdit>,
|
||||
pub source_file_edits: IntMap<FileId, TextEdit>,
|
||||
pub file_system_edits: Vec<FileSystemEdit>,
|
||||
pub is_snippet: bool,
|
||||
}
|
||||
@ -23,7 +23,7 @@ impl SourceChange {
|
||||
/// Creates a new SourceChange with the given label
|
||||
/// from the edits.
|
||||
pub fn from_edits(
|
||||
source_file_edits: NoHashHashMap<FileId, TextEdit>,
|
||||
source_file_edits: IntMap<FileId, TextEdit>,
|
||||
file_system_edits: Vec<FileSystemEdit>,
|
||||
) -> Self {
|
||||
SourceChange { source_file_edits, file_system_edits, is_snippet: false }
|
||||
@ -77,8 +77,8 @@ impl Extend<FileSystemEdit> for SourceChange {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NoHashHashMap<FileId, TextEdit>> for SourceChange {
|
||||
fn from(source_file_edits: NoHashHashMap<FileId, TextEdit>) -> SourceChange {
|
||||
impl From<IntMap<FileId, TextEdit>> for SourceChange {
|
||||
fn from(source_file_edits: IntMap<FileId, TextEdit>) -> SourceChange {
|
||||
SourceChange { source_file_edits, file_system_edits: Vec::new(), is_snippet: false }
|
||||
}
|
||||
}
|
||||
|
49
crates/ide-db/src/tests/line_index.rs
Normal file
49
crates/ide-db/src/tests/line_index.rs
Normal file
@ -0,0 +1,49 @@
|
||||
use line_index::{LineCol, LineIndex, WideEncoding};
|
||||
use test_utils::skip_slow_tests;
|
||||
|
||||
#[test]
|
||||
fn test_every_chars() {
|
||||
if skip_slow_tests() {
|
||||
return;
|
||||
}
|
||||
|
||||
let text: String = {
|
||||
let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
|
||||
chars.extend("\n".repeat(chars.len() / 16).chars());
|
||||
let mut rng = oorandom::Rand32::new(stdx::rand::seed());
|
||||
stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
|
||||
chars.into_iter().collect()
|
||||
};
|
||||
assert!(text.contains('💩')); // Sanity check.
|
||||
|
||||
let line_index = LineIndex::new(&text);
|
||||
|
||||
let mut lin_col = LineCol { line: 0, col: 0 };
|
||||
let mut col_utf16 = 0;
|
||||
let mut col_utf32 = 0;
|
||||
for (offset, c) in text.char_indices() {
|
||||
let got_offset = line_index.offset(lin_col).unwrap();
|
||||
assert_eq!(usize::from(got_offset), offset);
|
||||
|
||||
let got_lin_col = line_index.line_col(got_offset);
|
||||
assert_eq!(got_lin_col, lin_col);
|
||||
|
||||
for (enc, col) in [(WideEncoding::Utf16, col_utf16), (WideEncoding::Utf32, col_utf32)] {
|
||||
let wide_lin_col = line_index.to_wide(enc, lin_col).unwrap();
|
||||
let got_lin_col = line_index.to_utf8(enc, wide_lin_col).unwrap();
|
||||
assert_eq!(got_lin_col, lin_col);
|
||||
assert_eq!(wide_lin_col.col, col)
|
||||
}
|
||||
|
||||
if c == '\n' {
|
||||
lin_col.line += 1;
|
||||
lin_col.col = 0;
|
||||
col_utf16 = 0;
|
||||
col_utf32 = 0;
|
||||
} else {
|
||||
lin_col.col += c.len_utf8() as u32;
|
||||
col_utf16 += c.len_utf16() as u32;
|
||||
col_utf32 += 1;
|
||||
}
|
||||
}
|
||||
}
|
@ -16,6 +16,7 @@ doctest = false
|
||||
cov-mark = "2.0.0-pre.1"
|
||||
itertools = "0.10.5"
|
||||
triomphe.workspace = true
|
||||
nohash-hasher.workspace = true
|
||||
|
||||
# local deps
|
||||
hir.workspace = true
|
||||
|
@ -87,8 +87,8 @@ pub use crate::{errors::SsrError, from_comment::ssr_from_comment, matching::Matc
|
||||
use crate::{errors::bail, matching::MatchFailureReason};
|
||||
use hir::Semantics;
|
||||
use ide_db::base_db::{FileId, FilePosition, FileRange};
|
||||
use nohash_hasher::IntMap;
|
||||
use resolving::ResolvedRule;
|
||||
use stdx::hash::NoHashHashMap;
|
||||
use syntax::{ast, AstNode, SyntaxNode, TextRange};
|
||||
use text_edit::TextEdit;
|
||||
|
||||
@ -168,9 +168,9 @@ impl<'db> MatchFinder<'db> {
|
||||
}
|
||||
|
||||
/// Finds matches for all added rules and returns edits for all found matches.
|
||||
pub fn edits(&self) -> NoHashHashMap<FileId, TextEdit> {
|
||||
pub fn edits(&self) -> IntMap<FileId, TextEdit> {
|
||||
use ide_db::base_db::SourceDatabaseExt;
|
||||
let mut matches_by_file = NoHashHashMap::default();
|
||||
let mut matches_by_file = IntMap::default();
|
||||
for m in self.matches().matches {
|
||||
matches_by_file
|
||||
.entry(m.range.file_id)
|
||||
|
@ -24,6 +24,7 @@ url = "2.3.1"
|
||||
dot = "0.1.4"
|
||||
smallvec.workspace = true
|
||||
triomphe.workspace = true
|
||||
nohash-hasher.workspace = true
|
||||
|
||||
# local deps
|
||||
cfg.workspace = true
|
||||
|
@ -17,7 +17,7 @@ use ide_db::{
|
||||
RootDatabase,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use stdx::hash::NoHashHashMap;
|
||||
use nohash_hasher::IntMap;
|
||||
use syntax::{
|
||||
algo::find_node_at_offset,
|
||||
ast::{self, HasName},
|
||||
@ -31,7 +31,7 @@ use crate::{FilePosition, NavigationTarget, TryToNav};
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ReferenceSearchResult {
|
||||
pub declaration: Option<Declaration>,
|
||||
pub references: NoHashHashMap<FileId, Vec<(TextRange, Option<ReferenceCategory>)>>,
|
||||
pub references: IntMap<FileId, Vec<(TextRange, Option<ReferenceCategory>)>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -46,6 +46,7 @@ tracing-subscriber = { version = "0.3.16", default-features = false, features =
|
||||
tracing-log = "0.1.3"
|
||||
tracing-tree = "0.2.1"
|
||||
triomphe.workspace = true
|
||||
nohash-hasher.workspace = true
|
||||
always-assert = "0.1.2"
|
||||
|
||||
# These dependencies are unused, but we pin them to a version here to restrict them for our transitive dependencies
|
||||
@ -95,7 +96,4 @@ mbe.workspace = true
|
||||
[features]
|
||||
jemalloc = ["jemallocator", "profile/jemalloc"]
|
||||
force-always-assert = ["always-assert/force"]
|
||||
in-rust-tree = [
|
||||
"ide/in-rust-tree",
|
||||
"syntax/in-rust-tree",
|
||||
]
|
||||
in-rust-tree = ["ide/in-rust-tree", "syntax/in-rust-tree"]
|
||||
|
@ -23,13 +23,14 @@ use crate::semantic_tokens;
|
||||
|
||||
pub fn server_capabilities(config: &Config) -> ServerCapabilities {
|
||||
ServerCapabilities {
|
||||
position_encoding: Some(match negotiated_encoding(config.caps()) {
|
||||
PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
|
||||
position_encoding: match negotiated_encoding(config.caps()) {
|
||||
PositionEncoding::Utf8 => Some(PositionEncodingKind::UTF8),
|
||||
PositionEncoding::Wide(wide) => match wide {
|
||||
WideEncoding::Utf16 => PositionEncodingKind::UTF16,
|
||||
WideEncoding::Utf32 => PositionEncodingKind::UTF32,
|
||||
WideEncoding::Utf16 => Some(PositionEncodingKind::UTF16),
|
||||
WideEncoding::Utf32 => Some(PositionEncodingKind::UTF32),
|
||||
_ => None,
|
||||
},
|
||||
}),
|
||||
},
|
||||
text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions {
|
||||
open_close: Some(true),
|
||||
change: Some(TextDocumentSyncKind::INCREMENTAL),
|
||||
|
@ -5,12 +5,12 @@ use std::mem;
|
||||
|
||||
use ide::FileId;
|
||||
use ide_db::FxHashMap;
|
||||
use stdx::hash::{NoHashHashMap, NoHashHashSet};
|
||||
use nohash_hasher::{IntMap, IntSet};
|
||||
use triomphe::Arc;
|
||||
|
||||
use crate::lsp_ext;
|
||||
|
||||
pub(crate) type CheckFixes = Arc<NoHashHashMap<usize, NoHashHashMap<FileId, Vec<Fix>>>>;
|
||||
pub(crate) type CheckFixes = Arc<IntMap<usize, IntMap<FileId, Vec<Fix>>>>;
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct DiagnosticsMapConfig {
|
||||
@ -21,12 +21,12 @@ pub struct DiagnosticsMapConfig {
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub(crate) struct DiagnosticCollection {
|
||||
// FIXME: should be NoHashHashMap<FileId, Vec<ra_id::Diagnostic>>
|
||||
pub(crate) native: NoHashHashMap<FileId, Vec<lsp_types::Diagnostic>>,
|
||||
// FIXME: should be IntMap<FileId, Vec<ra_id::Diagnostic>>
|
||||
pub(crate) native: IntMap<FileId, Vec<lsp_types::Diagnostic>>,
|
||||
// FIXME: should be Vec<flycheck::Diagnostic>
|
||||
pub(crate) check: NoHashHashMap<usize, NoHashHashMap<FileId, Vec<lsp_types::Diagnostic>>>,
|
||||
pub(crate) check: IntMap<usize, IntMap<FileId, Vec<lsp_types::Diagnostic>>>,
|
||||
pub(crate) check_fixes: CheckFixes,
|
||||
changes: NoHashHashSet<FileId>,
|
||||
changes: IntSet<FileId>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@ -106,7 +106,7 @@ impl DiagnosticCollection {
|
||||
native.chain(check)
|
||||
}
|
||||
|
||||
pub(crate) fn take_changes(&mut self) -> Option<NoHashHashSet<FileId>> {
|
||||
pub(crate) fn take_changes(&mut self) -> Option<IntSet<FileId>> {
|
||||
if self.changes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
@ -3,7 +3,6 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan};
|
||||
use ide_db::line_index::WideEncoding;
|
||||
use itertools::Itertools;
|
||||
use stdx::format_to;
|
||||
use vfs::{AbsPath, AbsPathBuf};
|
||||
@ -80,37 +79,33 @@ fn position(
|
||||
position_encoding: &PositionEncoding,
|
||||
span: &DiagnosticSpan,
|
||||
line_offset: usize,
|
||||
column_offset: usize,
|
||||
column_offset_utf32: usize,
|
||||
) -> lsp_types::Position {
|
||||
let line_index = line_offset - span.line_start;
|
||||
|
||||
let mut true_column_offset = column_offset;
|
||||
if let Some(line) = span.text.get(line_index) {
|
||||
if line.text.chars().count() == line.text.len() {
|
||||
// all one byte utf-8 char
|
||||
return lsp_types::Position {
|
||||
line: (line_offset as u32).saturating_sub(1),
|
||||
character: (column_offset as u32).saturating_sub(1),
|
||||
};
|
||||
}
|
||||
let mut char_offset = 0;
|
||||
let len_func = match position_encoding {
|
||||
PositionEncoding::Utf8 => char::len_utf8,
|
||||
PositionEncoding::Wide(WideEncoding::Utf16) => char::len_utf16,
|
||||
PositionEncoding::Wide(WideEncoding::Utf32) => |_| 1,
|
||||
};
|
||||
for c in line.text.chars() {
|
||||
char_offset += 1;
|
||||
if char_offset > column_offset {
|
||||
break;
|
||||
let column_offset_encoded = match span.text.get(line_index) {
|
||||
// Fast path.
|
||||
Some(line) if line.text.is_ascii() => column_offset_utf32,
|
||||
Some(line) => {
|
||||
let line_prefix_len = line
|
||||
.text
|
||||
.char_indices()
|
||||
.take(column_offset_utf32)
|
||||
.last()
|
||||
.map(|(pos, c)| pos + c.len_utf8())
|
||||
.unwrap_or(0);
|
||||
let line_prefix = &line.text[..line_prefix_len];
|
||||
match position_encoding {
|
||||
PositionEncoding::Utf8 => line_prefix.len(),
|
||||
PositionEncoding::Wide(enc) => enc.measure(line_prefix),
|
||||
}
|
||||
true_column_offset += len_func(c) - 1;
|
||||
}
|
||||
}
|
||||
None => column_offset_utf32,
|
||||
};
|
||||
|
||||
lsp_types::Position {
|
||||
line: (line_offset as u32).saturating_sub(1),
|
||||
character: (true_column_offset as u32).saturating_sub(1),
|
||||
character: (column_offset_encoded as u32).saturating_sub(1),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -31,7 +31,10 @@ pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> R
|
||||
PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character },
|
||||
PositionEncoding::Wide(enc) => {
|
||||
let line_col = WideLineCol { line: position.line, col: position.character };
|
||||
line_index.index.to_utf8(enc, line_col)
|
||||
line_index
|
||||
.index
|
||||
.to_utf8(enc, line_col)
|
||||
.ok_or_else(|| format_err!("Invalid wide col offset"))?
|
||||
}
|
||||
};
|
||||
let text_size =
|
||||
|
@ -10,11 +10,11 @@ use flycheck::FlycheckHandle;
|
||||
use ide::{Analysis, AnalysisHost, Cancellable, Change, FileId};
|
||||
use ide_db::base_db::{CrateId, FileLoader, ProcMacroPaths, SourceDatabase};
|
||||
use lsp_types::{SemanticTokens, Url};
|
||||
use nohash_hasher::IntMap;
|
||||
use parking_lot::{Mutex, RwLock};
|
||||
use proc_macro_api::ProcMacroServer;
|
||||
use project_model::{CargoWorkspace, ProjectWorkspace, Target, WorkspaceBuildScripts};
|
||||
use rustc_hash::FxHashMap;
|
||||
use stdx::hash::NoHashHashMap;
|
||||
use triomphe::Arc;
|
||||
use vfs::AnchoredPathBuf;
|
||||
|
||||
@ -70,7 +70,7 @@ pub(crate) struct GlobalState {
|
||||
pub(crate) flycheck_sender: Sender<flycheck::Message>,
|
||||
pub(crate) flycheck_receiver: Receiver<flycheck::Message>,
|
||||
|
||||
pub(crate) vfs: Arc<RwLock<(vfs::Vfs, NoHashHashMap<FileId, LineEndings>)>>,
|
||||
pub(crate) vfs: Arc<RwLock<(vfs::Vfs, IntMap<FileId, LineEndings>)>>,
|
||||
pub(crate) vfs_config_version: u32,
|
||||
pub(crate) vfs_progress_config_version: u32,
|
||||
pub(crate) vfs_progress_n_total: usize,
|
||||
@ -117,7 +117,7 @@ pub(crate) struct GlobalStateSnapshot {
|
||||
pub(crate) check_fixes: CheckFixes,
|
||||
mem_docs: MemDocs,
|
||||
pub(crate) semantic_tokens_cache: Arc<Mutex<FxHashMap<Url, SemanticTokens>>>,
|
||||
vfs: Arc<RwLock<(vfs::Vfs, NoHashHashMap<FileId, LineEndings>)>>,
|
||||
vfs: Arc<RwLock<(vfs::Vfs, IntMap<FileId, LineEndings>)>>,
|
||||
pub(crate) workspaces: Arc<Vec<ProjectWorkspace>>,
|
||||
// used to signal semantic highlighting to fall back to syntax based highlighting until proc-macros have been loaded
|
||||
pub(crate) proc_macros_loaded: bool,
|
||||
@ -170,7 +170,7 @@ impl GlobalState {
|
||||
flycheck_sender,
|
||||
flycheck_receiver,
|
||||
|
||||
vfs: Arc::new(RwLock::new((vfs::Vfs::default(), NoHashHashMap::default()))),
|
||||
vfs: Arc::new(RwLock::new((vfs::Vfs::default(), IntMap::default()))),
|
||||
vfs_config_version: 0,
|
||||
vfs_progress_config_version: 0,
|
||||
vfs_progress_n_total: 0,
|
||||
|
@ -32,7 +32,7 @@ pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp_types::P
|
||||
match line_index.encoding {
|
||||
PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col),
|
||||
PositionEncoding::Wide(enc) => {
|
||||
let line_col = line_index.index.to_wide(enc, line_col);
|
||||
let line_col = line_index.index.to_wide(enc, line_col).unwrap();
|
||||
lsp_types::Position::new(line_col.line, line_col.col)
|
||||
}
|
||||
}
|
||||
|
@ -1,80 +0,0 @@
|
||||
//! A none hashing [`Hasher`] implementation.
|
||||
use std::{
|
||||
hash::{BuildHasher, Hasher},
|
||||
marker::PhantomData,
|
||||
};
|
||||
|
||||
pub type NoHashHashMap<K, V> = std::collections::HashMap<K, V, NoHashHasherBuilder<K>>;
|
||||
pub type NoHashHashSet<K> = std::collections::HashSet<K, NoHashHasherBuilder<K>>;
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub struct NoHashHasherBuilder<T>(PhantomData<T>);
|
||||
|
||||
impl<T> Default for NoHashHasherBuilder<T> {
|
||||
fn default() -> Self {
|
||||
Self(Default::default())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait NoHashHashable {}
|
||||
impl NoHashHashable for usize {}
|
||||
impl NoHashHashable for u32 {}
|
||||
|
||||
pub struct NoHashHasher(u64);
|
||||
|
||||
impl<T: NoHashHashable> BuildHasher for NoHashHasherBuilder<T> {
|
||||
type Hasher = NoHashHasher;
|
||||
fn build_hasher(&self) -> Self::Hasher {
|
||||
NoHashHasher(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Hasher for NoHashHasher {
|
||||
fn finish(&self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
fn write(&mut self, _: &[u8]) {
|
||||
unimplemented!("NoHashHasher should only be used for hashing primitive integers")
|
||||
}
|
||||
|
||||
fn write_u8(&mut self, i: u8) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_u16(&mut self, i: u16) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_u32(&mut self, i: u32) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_u64(&mut self, i: u64) {
|
||||
self.0 = i;
|
||||
}
|
||||
|
||||
fn write_usize(&mut self, i: usize) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_i8(&mut self, i: i8) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_i16(&mut self, i: i16) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_i32(&mut self, i: i32) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_i64(&mut self, i: i64) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
|
||||
fn write_isize(&mut self, i: isize) {
|
||||
self.0 = i as u64;
|
||||
}
|
||||
}
|
@ -7,7 +7,6 @@ use std::process::Command;
|
||||
use std::{cmp::Ordering, ops, time::Instant};
|
||||
|
||||
mod macros;
|
||||
pub mod hash;
|
||||
pub mod process;
|
||||
pub mod panic_context;
|
||||
pub mod non_empty_vec;
|
||||
|
@ -14,7 +14,7 @@ doctest = false
|
||||
[dependencies]
|
||||
# Avoid adding deps here, this crate is widely used in tests it should compile fast!
|
||||
dissimilar = "1.0.4"
|
||||
text-size = "1.1.0"
|
||||
text-size.workspace = true
|
||||
rustc-hash = "1.1.0"
|
||||
|
||||
stdx.workspace = true
|
||||
|
@ -13,4 +13,4 @@ doctest = false
|
||||
|
||||
[dependencies]
|
||||
itertools = "0.10.5"
|
||||
text-size = "1.1.0"
|
||||
text-size.workspace = true
|
||||
|
@ -15,6 +15,7 @@ doctest = false
|
||||
rustc-hash = "1.1.0"
|
||||
fst = "0.4.7"
|
||||
indexmap = "1.9.1"
|
||||
nohash-hasher.workspace = true
|
||||
|
||||
paths.workspace = true
|
||||
stdx.workspace = true
|
||||
|
@ -5,8 +5,8 @@
|
||||
use std::fmt;
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use nohash_hasher::IntMap;
|
||||
use rustc_hash::FxHashMap;
|
||||
use stdx::hash::NoHashHashMap;
|
||||
|
||||
use crate::{AnchoredPath, FileId, Vfs, VfsPath};
|
||||
|
||||
@ -14,7 +14,7 @@ use crate::{AnchoredPath, FileId, Vfs, VfsPath};
|
||||
#[derive(Default, Clone, Eq, PartialEq)]
|
||||
pub struct FileSet {
|
||||
files: FxHashMap<VfsPath, FileId>,
|
||||
paths: NoHashHashMap<FileId, VfsPath>,
|
||||
paths: IntMap<FileId, VfsPath>,
|
||||
}
|
||||
|
||||
impl FileSet {
|
||||
|
@ -62,7 +62,8 @@ pub use paths::{AbsPath, AbsPathBuf};
|
||||
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||
pub struct FileId(pub u32);
|
||||
|
||||
impl stdx::hash::NoHashHashable for FileId {}
|
||||
/// safe because `FileId` is a newtype of `u32`
|
||||
impl nohash_hasher::IsEnabled for FileId {}
|
||||
|
||||
/// Storage for all files read by rust-analyzer.
|
||||
///
|
||||
|
11
lib/line-index/Cargo.toml
Normal file
11
lib/line-index/Cargo.toml
Normal file
@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "line-index"
|
||||
version = "0.1.0-pre.1"
|
||||
description = "Maps flat `TextSize` offsets to/from `(line, column)` representation."
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-index"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
text-size.workspace = true
|
||||
nohash-hasher.workspace = true
|
237
lib/line-index/src/lib.rs
Normal file
237
lib/line-index/src/lib.rs
Normal file
@ -0,0 +1,237 @@
|
||||
//! See [`LineIndex`].
|
||||
|
||||
#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)]
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use nohash_hasher::IntMap;
|
||||
|
||||
pub use text_size::{TextRange, TextSize};
|
||||
|
||||
/// `(line, column)` information in the native, UTF-8 encoding.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct LineCol {
|
||||
/// Zero-based.
|
||||
pub line: u32,
|
||||
/// Zero-based UTF-8 offset.
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
/// A kind of wide character encoding.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
#[non_exhaustive]
|
||||
pub enum WideEncoding {
|
||||
/// UTF-16.
|
||||
Utf16,
|
||||
/// UTF-32.
|
||||
Utf32,
|
||||
}
|
||||
|
||||
impl WideEncoding {
|
||||
/// Returns the number of code units it takes to encode `text` in this encoding.
|
||||
pub fn measure(&self, text: &str) -> usize {
|
||||
match self {
|
||||
WideEncoding::Utf16 => text.encode_utf16().count(),
|
||||
WideEncoding::Utf32 => text.chars().count(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `(line, column)` information in wide encodings.
|
||||
///
|
||||
/// See [`WideEncoding`] for the kinds of wide encodings available.
|
||||
//
|
||||
// Deliberately not a generic type and different from `LineCol`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct WideLineCol {
|
||||
/// Zero-based.
|
||||
pub line: u32,
|
||||
/// Zero-based.
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
struct WideChar {
|
||||
/// Start offset of a character inside a line, zero-based.
|
||||
start: TextSize,
|
||||
/// End offset of a character inside a line, zero-based.
|
||||
end: TextSize,
|
||||
}
|
||||
|
||||
impl WideChar {
|
||||
/// Returns the length in 8-bit UTF-8 code units.
|
||||
fn len(&self) -> TextSize {
|
||||
self.end - self.start
|
||||
}
|
||||
|
||||
/// Returns the length in UTF-16 or UTF-32 code units.
|
||||
fn wide_len(&self, enc: WideEncoding) -> u32 {
|
||||
match enc {
|
||||
WideEncoding::Utf16 => {
|
||||
if self.len() == TextSize::from(4) {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
WideEncoding::Utf32 => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Maps flat [`TextSize`] offsets to/from `(line, column)` representation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct LineIndex {
|
||||
/// Offset the beginning of each line (except the first, which always has offset 0).
|
||||
newlines: Box<[TextSize]>,
|
||||
/// List of non-ASCII characters on each line.
|
||||
line_wide_chars: IntMap<u32, Box<[WideChar]>>,
|
||||
/// The length of the entire text.
|
||||
len: TextSize,
|
||||
}
|
||||
|
||||
impl LineIndex {
|
||||
/// Returns a `LineIndex` for the `text`.
|
||||
pub fn new(text: &str) -> LineIndex {
|
||||
let mut newlines = Vec::<TextSize>::with_capacity(16);
|
||||
let mut line_wide_chars = IntMap::<u32, Box<[WideChar]>>::default();
|
||||
|
||||
let mut wide_chars = Vec::<WideChar>::new();
|
||||
let mut cur_row = TextSize::from(0);
|
||||
let mut cur_col = TextSize::from(0);
|
||||
let mut line = 0u32;
|
||||
|
||||
for c in text.chars() {
|
||||
let c_len = TextSize::of(c);
|
||||
cur_row += c_len;
|
||||
if c == '\n' {
|
||||
newlines.push(cur_row);
|
||||
|
||||
// Save any wide characters seen in the previous line
|
||||
if !wide_chars.is_empty() {
|
||||
let cs = std::mem::take(&mut wide_chars).into_boxed_slice();
|
||||
line_wide_chars.insert(line, cs);
|
||||
}
|
||||
|
||||
// Prepare for processing the next line
|
||||
cur_col = TextSize::from(0);
|
||||
line += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if !c.is_ascii() {
|
||||
wide_chars.push(WideChar { start: cur_col, end: cur_col + c_len });
|
||||
}
|
||||
|
||||
cur_col += c_len;
|
||||
}
|
||||
|
||||
// Save any wide characters seen in the last line
|
||||
if !wide_chars.is_empty() {
|
||||
line_wide_chars.insert(line, wide_chars.into_boxed_slice());
|
||||
}
|
||||
|
||||
LineIndex {
|
||||
newlines: newlines.into_boxed_slice(),
|
||||
line_wide_chars,
|
||||
len: TextSize::of(text),
|
||||
}
|
||||
}
|
||||
|
||||
/// Transforms the `TextSize` into a `LineCol`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the offset is invalid. See [`Self::try_line_col`].
|
||||
pub fn line_col(&self, offset: TextSize) -> LineCol {
|
||||
self.try_line_col(offset).expect("invalid offset")
|
||||
}
|
||||
|
||||
/// Transforms the `TextSize` into a `LineCol`.
|
||||
///
|
||||
/// Returns `None` if the `offset` was invalid, e.g. if it extends past the end of the text or
|
||||
/// points to the middle of a multi-byte character.
|
||||
pub fn try_line_col(&self, offset: TextSize) -> Option<LineCol> {
|
||||
if offset > self.len {
|
||||
return None;
|
||||
}
|
||||
let line = self.newlines.partition_point(|&it| it <= offset);
|
||||
let start = self.start_offset(line)?;
|
||||
let col = offset - start;
|
||||
let ret = LineCol { line: line as u32, col: col.into() };
|
||||
self.line_wide_chars
|
||||
.get(&ret.line)
|
||||
.into_iter()
|
||||
.flat_map(|it| it.iter())
|
||||
.all(|it| col <= it.start || it.end <= col)
|
||||
.then_some(ret)
|
||||
}
|
||||
|
||||
/// Transforms the `LineCol` into a `TextSize`.
|
||||
pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
|
||||
self.start_offset(line_col.line as usize).map(|start| start + TextSize::from(line_col.col))
|
||||
}
|
||||
|
||||
fn start_offset(&self, line: usize) -> Option<TextSize> {
|
||||
match line.checked_sub(1) {
|
||||
None => Some(TextSize::from(0)),
|
||||
Some(it) => self.newlines.get(it).copied(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`.
|
||||
pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option<WideLineCol> {
|
||||
let mut col = line_col.col;
|
||||
if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) {
|
||||
for c in wide_chars.iter() {
|
||||
if u32::from(c.end) <= line_col.col {
|
||||
col = col.checked_sub(u32::from(c.len()) - c.wide_len(enc))?;
|
||||
} else {
|
||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||
// so we don't need to take them into account
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(WideLineCol { line: line_col.line, col })
|
||||
}
|
||||
|
||||
/// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`.
|
||||
pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> Option<LineCol> {
|
||||
let mut col = line_col.col;
|
||||
if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) {
|
||||
for c in wide_chars.iter() {
|
||||
if col > u32::from(c.start) {
|
||||
col = col.checked_add(u32::from(c.len()) - c.wide_len(enc))?;
|
||||
} else {
|
||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||
// so we don't need to take them into account
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(LineCol { line: line_col.line, col })
|
||||
}
|
||||
|
||||
/// Given a range [start, end), returns a sorted iterator of non-empty ranges [start, x1), [x1,
|
||||
/// x2), ..., [xn, end) where all the xi, which are positions of newlines, are inside the range
|
||||
/// [start, end).
|
||||
pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
|
||||
let lo = self.newlines.partition_point(|&it| it < range.start());
|
||||
let hi = self.newlines.partition_point(|&it| it <= range.end());
|
||||
let all = std::iter::once(range.start())
|
||||
.chain(self.newlines[lo..hi].iter().copied())
|
||||
.chain(std::iter::once(range.end()));
|
||||
|
||||
all.clone()
|
||||
.zip(all.skip(1))
|
||||
.map(|(lo, hi)| TextRange::new(lo, hi))
|
||||
.filter(|it| !it.is_empty())
|
||||
}
|
||||
|
||||
/// Returns the length of the original text.
|
||||
pub fn len(&self) -> TextSize {
|
||||
self.len
|
||||
}
|
||||
}
|
11
lib/line-index/src/tests.rs
Normal file
11
lib/line-index/src/tests.rs
Normal file
@ -0,0 +1,11 @@
|
||||
use super::LineIndex;
|
||||
|
||||
#[test]
|
||||
fn test_empty_index() {
|
||||
let col_index = LineIndex::new(
|
||||
"
|
||||
const C: char = 'x';
|
||||
",
|
||||
);
|
||||
assert_eq!(col_index.line_wide_chars.len(), 0);
|
||||
}
|
62
lib/line-index/tests/it.rs
Normal file
62
lib/line-index/tests/it.rs
Normal file
@ -0,0 +1,62 @@
|
||||
use line_index::{LineCol, LineIndex, TextRange};
|
||||
|
||||
#[test]
|
||||
fn test_line_index() {
|
||||
let text = "hello\nworld";
|
||||
let table = [
|
||||
(00, 0, 0),
|
||||
(01, 0, 1),
|
||||
(05, 0, 5),
|
||||
(06, 1, 0),
|
||||
(07, 1, 1),
|
||||
(08, 1, 2),
|
||||
(10, 1, 4),
|
||||
(11, 1, 5),
|
||||
];
|
||||
|
||||
let index = LineIndex::new(text);
|
||||
for (offset, line, col) in table {
|
||||
assert_eq!(index.line_col(offset.into()), LineCol { line, col });
|
||||
}
|
||||
|
||||
let text = "\nhello\nworld";
|
||||
let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
|
||||
let index = LineIndex::new(text);
|
||||
for (offset, line, col) in table {
|
||||
assert_eq!(index.line_col(offset.into()), LineCol { line, col });
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_char_len() {
|
||||
assert_eq!('メ'.len_utf8(), 3);
|
||||
assert_eq!('メ'.len_utf16(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_splitlines() {
|
||||
fn r(lo: u32, hi: u32) -> TextRange {
|
||||
TextRange::new(lo.into(), hi.into())
|
||||
}
|
||||
|
||||
let text = "a\nbb\nccc\n";
|
||||
let line_index = LineIndex::new(text);
|
||||
|
||||
let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
|
||||
let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let text = "";
|
||||
let line_index = LineIndex::new(text);
|
||||
|
||||
let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
|
||||
let expected = vec![];
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let text = "\n";
|
||||
let line_index = LineIndex::new(text);
|
||||
|
||||
let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
|
||||
let expected = vec![r(0, 1)];
|
||||
assert_eq!(actual, expected)
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user