65 lines
2.2 KiB
Rust
65 lines
2.2 KiB
Rust
//! We maintain invariant that all internal strings use `\n` as line separator.
|
|
//! This module does line ending conversion and detection (so that we can
|
|
//! convert back to `\r\n` on the way out).
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub(crate) enum LineEndings {
|
|
Unix,
|
|
Dos,
|
|
}
|
|
|
|
impl LineEndings {
|
|
/// Replaces `\r\n` with `\n` in-place in `src`.
|
|
pub(crate) fn normalize(src: String) -> (String, LineEndings) {
|
|
if !src.as_bytes().contains(&b'\r') {
|
|
return (src, LineEndings::Unix);
|
|
}
|
|
|
|
// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
|
|
// While we *can* call `as_mut_vec` and do surgery on the live string
|
|
// directly, let's rather steal the contents of `src`. This makes the code
|
|
// safe even if a panic occurs.
|
|
|
|
let mut buf = src.into_bytes();
|
|
let mut gap_len = 0;
|
|
let mut tail = buf.as_mut_slice();
|
|
loop {
|
|
let idx = match find_crlf(&tail[gap_len..]) {
|
|
None => tail.len(),
|
|
Some(idx) => idx + gap_len,
|
|
};
|
|
tail.copy_within(gap_len..idx, 0);
|
|
tail = &mut tail[idx - gap_len..];
|
|
if tail.len() == gap_len {
|
|
break;
|
|
}
|
|
gap_len += 1;
|
|
}
|
|
|
|
// Account for removed `\r`.
|
|
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
|
|
let new_len = buf.len() - gap_len;
|
|
let src = unsafe {
|
|
buf.set_len(new_len);
|
|
String::from_utf8_unchecked(buf)
|
|
};
|
|
return (src, LineEndings::Dos);
|
|
|
|
fn find_crlf(src: &[u8]) -> Option<usize> {
|
|
let mut search_idx = 0;
|
|
while let Some(idx) = find_cr(&src[search_idx..]) {
|
|
if src[search_idx..].get(idx + 1) != Some(&b'\n') {
|
|
search_idx += idx + 1;
|
|
continue;
|
|
}
|
|
return Some(search_idx + idx);
|
|
}
|
|
None
|
|
}
|
|
|
|
fn find_cr(src: &[u8]) -> Option<usize> {
|
|
src.iter().enumerate().find_map(|(idx, &b)| if b == b'\r' { Some(idx) } else { None })
|
|
}
|
|
}
|
|
}
|