2021-02-12 16:28:48 -06:00
|
|
|
//! Enhances `ide::LineIndex` with additional info required to convert offsets
|
|
|
|
//! into lsp positions.
|
|
|
|
//!
|
2020-06-11 04:04:09 -05:00
|
|
|
//! We maintain invariant that all internal strings use `\n` as line separator.
|
|
|
|
//! This module does line ending conversion and detection (so that we can
|
|
|
|
//! convert back to `\r\n` on the way out).
|
|
|
|
|
2023-02-13 18:56:28 -06:00
|
|
|
use ide_db::line_index::WideEncoding;
|
2024-01-18 03:03:29 -06:00
|
|
|
use memchr::memmem;
|
2023-05-02 09:12:22 -05:00
|
|
|
use triomphe::Arc;
|
2023-02-13 18:56:28 -06:00
|
|
|
|
|
|
|
#[derive(Clone, Copy)]
|
2022-10-25 06:43:26 -05:00
|
|
|
pub enum PositionEncoding {
|
2021-02-12 15:55:27 -06:00
|
|
|
Utf8,
|
2023-02-13 18:56:28 -06:00
|
|
|
Wide(WideEncoding),
|
2021-02-12 15:55:27 -06:00
|
|
|
}
|
|
|
|
|
2021-02-12 15:44:28 -06:00
|
|
|
pub(crate) struct LineIndex {
|
|
|
|
pub(crate) index: Arc<ide::LineIndex>,
|
|
|
|
pub(crate) endings: LineEndings,
|
2022-10-25 06:43:26 -05:00
|
|
|
pub(crate) encoding: PositionEncoding,
|
2021-02-12 15:44:28 -06:00
|
|
|
}
|
|
|
|
|
2020-06-11 04:04:09 -05:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
|
|
pub(crate) enum LineEndings {
|
|
|
|
Unix,
|
|
|
|
Dos,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl LineEndings {
|
|
|
|
/// Replaces `\r\n` with `\n` in-place in `src`.
|
|
|
|
pub(crate) fn normalize(src: String) -> (String, LineEndings) {
|
|
|
|
// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
|
|
|
|
// While we *can* call `as_mut_vec` and do surgery on the live string
|
|
|
|
// directly, let's rather steal the contents of `src`. This makes the code
|
|
|
|
// safe even if a panic occurs.
|
|
|
|
|
|
|
|
let mut buf = src.into_bytes();
|
|
|
|
let mut gap_len = 0;
|
|
|
|
let mut tail = buf.as_mut_slice();
|
2022-11-05 05:00:17 -05:00
|
|
|
let mut crlf_seen = false;
|
|
|
|
|
2024-01-18 03:03:29 -06:00
|
|
|
let finder = memmem::Finder::new(b"\r\n");
|
2022-11-05 05:00:17 -05:00
|
|
|
|
2020-06-11 04:04:09 -05:00
|
|
|
loop {
|
2024-01-18 03:03:29 -06:00
|
|
|
let idx = match finder.find(&tail[gap_len..]) {
|
2022-11-05 05:00:17 -05:00
|
|
|
None if crlf_seen => tail.len(),
|
2022-11-07 04:53:33 -06:00
|
|
|
// SAFETY: buf is unchanged and therefore still contains utf8 data
|
2022-11-05 05:00:17 -05:00
|
|
|
None => return (unsafe { String::from_utf8_unchecked(buf) }, LineEndings::Unix),
|
|
|
|
Some(idx) => {
|
|
|
|
crlf_seen = true;
|
|
|
|
idx + gap_len
|
|
|
|
}
|
2020-06-11 04:04:09 -05:00
|
|
|
};
|
|
|
|
tail.copy_within(gap_len..idx, 0);
|
|
|
|
tail = &mut tail[idx - gap_len..];
|
|
|
|
if tail.len() == gap_len {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
gap_len += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Account for removed `\r`.
|
|
|
|
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
|
|
|
|
let src = unsafe {
|
2022-11-05 05:00:17 -05:00
|
|
|
let new_len = buf.len() - gap_len;
|
2020-06-11 04:04:09 -05:00
|
|
|
buf.set_len(new_len);
|
|
|
|
String::from_utf8_unchecked(buf)
|
|
|
|
};
|
2022-11-05 05:00:17 -05:00
|
|
|
(src, LineEndings::Dos)
|
|
|
|
}
|
|
|
|
}
|
2020-06-11 04:04:09 -05:00
|
|
|
|
2022-11-05 05:00:17 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unix() {
|
|
|
|
let src = "a\nb\nc\n\n\n\n";
|
|
|
|
let (res, endings) = LineEndings::normalize(src.into());
|
|
|
|
assert_eq!(endings, LineEndings::Unix);
|
|
|
|
assert_eq!(res, src);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn dos() {
|
|
|
|
let src = "\r\na\r\n\r\nb\r\nc\r\n\r\n\r\n\r\n";
|
|
|
|
let (res, endings) = LineEndings::normalize(src.into());
|
|
|
|
assert_eq!(endings, LineEndings::Dos);
|
|
|
|
assert_eq!(res, "\na\n\nb\nc\n\n\n\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn mixed() {
|
|
|
|
let src = "a\r\nb\r\nc\r\n\n\r\n\n";
|
|
|
|
let (res, endings) = LineEndings::normalize(src.into());
|
|
|
|
assert_eq!(endings, LineEndings::Dos);
|
|
|
|
assert_eq!(res, "a\nb\nc\n\n\n\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn none() {
|
|
|
|
let src = "abc";
|
|
|
|
let (res, endings) = LineEndings::normalize(src.into());
|
|
|
|
assert_eq!(endings, LineEndings::Unix);
|
|
|
|
assert_eq!(res, src);
|
2020-06-11 04:04:09 -05:00
|
|
|
}
|
|
|
|
}
|