rust/lib/line-index/src/tests.rs

use crate::{LineCol, LineIndex, TextSize, WideChar, WideEncoding, WideLineCol};

macro_rules! test {
    (
        case: $test_name:ident,
     text: $text:expr,
     lines: $lines:expr,
     multi_byte_chars: $multi_byte_chars:expr,
    ) => {
        #[test]
        fn $test_name() {
            let line_index = LineIndex::new($text);

            let expected_lines: Vec<TextSize> =
                $lines.into_iter().map(<TextSize as From<u32>>::from).collect();

            assert_eq!(&*line_index.newlines, &*expected_lines);

            let expected_mbcs: Vec<_> = $multi_byte_chars
                .into_iter()
                .map(|(line, (pos, end)): (u32, (u32, u32))| {
                    (line, WideChar { start: TextSize::from(pos), end: TextSize::from(end) })
                })
                .collect();

            assert_eq!(
                line_index
                    .line_wide_chars
                    .iter()
                    .flat_map(|(line, val)| std::iter::repeat(*line).zip(val.iter().copied()))
                    .collect::<Vec<_>>(),
                expected_mbcs
            );
        }
    };
}

test!(
    case: empty_text,
    text: "",
    lines: vec![],
    multi_byte_chars: vec![],
);

test!(
    case: newlines_short,
    text: "a\nc",
    lines: vec![2],
    multi_byte_chars: vec![],
);

test!(
    case: newlines_long,
    text: "012345678\nabcdef012345678\na",
    lines: vec![10, 26],
    multi_byte_chars: vec![],
);

test!(
    case: newline_and_multi_byte_char_in_same_chunk,
    text: "01234β789\nbcdef0123456789abcdef",
    lines: vec![11],
    multi_byte_chars: vec![(0, (5, 7))],
);

test!(
    case: newline_and_control_char_in_same_chunk,
    text: "01234\u{07}6789\nbcdef0123456789abcdef",
    lines: vec![11],
    multi_byte_chars: vec![],
);

test!(
    case: multi_byte_char_short,
    text: "aβc",
    lines: vec![],
    multi_byte_chars: vec![(0, (1, 3))],
);

test!(
    case: multi_byte_char_long,
    text: "0123456789abcΔf012345β",
    lines: vec![],
    multi_byte_chars: vec![(0, (13, 15)), (0, (22, 24))],
);

test!(
    case: multi_byte_char_across_chunk_boundary,
    text: "0123456789abcdeΔ123456789abcdef01234",
    lines: vec![],
    multi_byte_chars: vec![(0, (15, 17))],
);

test!(
    case: multi_byte_char_across_chunk_boundary_tail,
    text: "0123456789abcdeΔ....",
    lines: vec![],
    multi_byte_chars: vec![(0, (15, 17))],
);

test!(
    case: multi_byte_with_new_lines,
    text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
    lines: vec![7, 27],
    multi_byte_chars: vec![(1, (6, 8)), (2, (2, 4))],
);

test!(
    case: trailing_newline,
    text: "0123456789\n",
    lines: vec![11],
    multi_byte_chars: vec![],
);

test!(
    case: trailing_newline_chunk_boundary,
    text: "0123456789abcde\n",
    lines: vec![16],
    multi_byte_chars: vec![],
);

#[test]
fn test_try_line_col() {
    let text = "\n\n\n\n\n宽3456";
    assert_eq!(&text[5..8], "宽");
    assert_eq!(&text[11..12], "6");
    let line_index = LineIndex::new(text);
    let before_6 = TextSize::from(11);
    let line_col = line_index.try_line_col(before_6);
    assert_eq!(line_col, Some(LineCol { line: 5, col: 6 }));
}

#[test]
fn test_to_wide() {
    let text = "\n\n\n\n\n宽3456";
    assert_eq!(&text[5..8], "宽");
    assert_eq!(&text[11..12], "6");
    let line_index = LineIndex::new(text);
    let before_6 = TextSize::from(11);
    let line_col = line_index.try_line_col(before_6);
    assert_eq!(line_col, Some(LineCol { line: 5, col: 6 }));
    let wide_line_col = line_index.to_wide(WideEncoding::Utf16, line_col.unwrap());
    assert_eq!(wide_line_col, Some(WideLineCol { line: 5, col: 4 }));
}

#[test]
fn test_every_chars() {
    let text: String = {
        let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
        chars.extend("\n".repeat(chars.len() / 16).chars());
        let seed = std::hash::Hasher::finish(&std::hash::BuildHasher::build_hasher(
            #[allow(clippy::disallowed_types)]
            &std::collections::hash_map::RandomState::new(),
        ));
        let mut rng = oorandom::Rand32::new(seed);
        let mut rand_index = |i| rng.rand_range(0..i as u32) as usize;
        let mut remaining = chars.len() - 1;
        while remaining > 0 {
            let index = rand_index(remaining);
            chars.swap(remaining, index);
            remaining -= 1;
        }
        chars.into_iter().collect()
    };
    assert!(text.contains('💩')); // Sanity check.

    let line_index = LineIndex::new(&text);

    let mut lin_col = LineCol { line: 0, col: 0 };
    let mut col_utf16 = 0;
    let mut col_utf32 = 0;
    for (offset, c) in text.char_indices() {
        let got_offset = line_index.offset(lin_col).unwrap();
        assert_eq!(usize::from(got_offset), offset);

        let got_lin_col = line_index.line_col(got_offset);
        assert_eq!(got_lin_col, lin_col);

        for (enc, col) in [(WideEncoding::Utf16, col_utf16), (WideEncoding::Utf32, col_utf32)] {
            let wide_lin_col = line_index.to_wide(enc, lin_col).unwrap();
            let got_lin_col = line_index.to_utf8(enc, wide_lin_col).unwrap();
            assert_eq!(got_lin_col, lin_col);
            assert_eq!(wide_lin_col.col, col)
        }

        if c == '\n' {
            lin_col.line += 1;
            lin_col.col = 0;
            col_utf16 = 0;
            col_utf32 = 0;
        } else {
            lin_col.col += c.len_utf8() as u32;
            col_utf16 += c.len_utf16() as u32;
            col_utf32 += 1;
        }
    }
}