2019-05-06 03:53:40 -05:00
|
|
|
use std::str::Chars;
|
|
|
|
|
2019-10-26 11:12:58 -05:00
|
|
|
/// Peekable iterator over a char sequence.
|
|
|
|
///
|
2021-11-30 09:06:58 -06:00
|
|
|
/// Next characters can be peeked via `first` method,
|
2019-10-26 11:12:58 -05:00
|
|
|
/// and position can be shifted forward via `bump` method.
|
2022-09-25 18:18:23 -05:00
|
|
|
pub struct Cursor<'a> {
|
2022-09-25 22:05:54 -05:00
|
|
|
len_remaining: usize,
|
2021-11-30 09:06:58 -06:00
|
|
|
/// Iterator over chars. Slightly faster than a &str.
|
2019-05-06 03:53:40 -05:00
|
|
|
chars: Chars<'a>,
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
prev: char,
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) const EOF_CHAR: char = '\0';
|
|
|
|
|
|
|
|
impl<'a> Cursor<'a> {
|
2022-09-25 18:18:23 -05:00
|
|
|
pub fn new(input: &'a str) -> Cursor<'a> {
|
2019-05-06 03:53:40 -05:00
|
|
|
Cursor {
|
2022-09-25 22:05:54 -05:00
|
|
|
len_remaining: input.len(),
|
2019-05-06 03:53:40 -05:00
|
|
|
chars: input.chars(),
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
prev: EOF_CHAR,
|
|
|
|
}
|
|
|
|
}
|
2019-10-26 11:12:58 -05:00
|
|
|
|
2020-06-28 14:32:58 -05:00
|
|
|
/// Returns the last eaten symbol (or `'\0'` in release builds).
|
|
|
|
/// (For debug assertions only.)
|
2019-05-06 03:53:40 -05:00
|
|
|
pub(crate) fn prev(&self) -> char {
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
{
|
|
|
|
self.prev
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(not(debug_assertions))]
|
|
|
|
{
|
2021-01-07 06:20:04 -06:00
|
|
|
EOF_CHAR
|
2019-05-06 03:53:40 -05:00
|
|
|
}
|
|
|
|
}
|
2019-10-26 11:12:58 -05:00
|
|
|
|
2021-11-30 09:06:58 -06:00
|
|
|
/// Peeks the next symbol from the input stream without consuming it.
|
2019-10-26 11:12:58 -05:00
|
|
|
/// If requested position doesn't exist, `EOF_CHAR` is returned.
|
|
|
|
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
|
|
|
|
/// it should be checked with `is_eof` method.
|
2019-11-03 02:39:39 -06:00
|
|
|
pub(crate) fn first(&self) -> char {
|
2021-11-30 09:06:58 -06:00
|
|
|
// `.next()` optimizes better than `.nth(0)`
|
|
|
|
self.chars.clone().next().unwrap_or(EOF_CHAR)
|
2019-11-03 02:39:39 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Peeks the second symbol from the input stream without consuming it.
|
|
|
|
pub(crate) fn second(&self) -> char {
|
2021-11-30 09:06:58 -06:00
|
|
|
// `.next()` optimizes better than `.nth(1)`
|
|
|
|
let mut iter = self.chars.clone();
|
|
|
|
iter.next();
|
|
|
|
iter.next().unwrap_or(EOF_CHAR)
|
2019-11-03 02:39:39 -06:00
|
|
|
}
|
|
|
|
|
2019-10-26 11:12:58 -05:00
|
|
|
/// Checks if there is nothing more to consume.
|
2019-05-06 03:53:40 -05:00
|
|
|
pub(crate) fn is_eof(&self) -> bool {
|
|
|
|
self.chars.as_str().is_empty()
|
|
|
|
}
|
2019-10-26 11:12:58 -05:00
|
|
|
|
|
|
|
/// Returns amount of already consumed symbols.
|
2022-09-25 22:05:54 -05:00
|
|
|
pub(crate) fn pos_within_token(&self) -> u32 {
|
|
|
|
(self.len_remaining - self.chars.as_str().len()) as u32
|
2019-05-06 03:53:40 -05:00
|
|
|
}
|
2019-10-26 11:12:58 -05:00
|
|
|
|
2021-11-30 09:06:58 -06:00
|
|
|
/// Resets the number of bytes consumed to 0.
|
2022-09-25 22:05:54 -05:00
|
|
|
pub(crate) fn reset_pos_within_token(&mut self) {
|
|
|
|
self.len_remaining = self.chars.as_str().len();
|
2019-05-06 03:53:40 -05:00
|
|
|
}
|
2019-10-26 11:12:58 -05:00
|
|
|
|
2019-05-06 03:53:40 -05:00
|
|
|
/// Moves to the next character.
|
|
|
|
pub(crate) fn bump(&mut self) -> Option<char> {
|
|
|
|
let c = self.chars.next()?;
|
|
|
|
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
{
|
|
|
|
self.prev = c;
|
|
|
|
}
|
|
|
|
|
|
|
|
Some(c)
|
|
|
|
}
|
2021-11-30 09:06:58 -06:00
|
|
|
|
|
|
|
/// Eats symbols while predicate returns true or until the end of file is reached.
|
|
|
|
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
|
|
|
|
// It was tried making optimized version of this for eg. line comments, but
|
|
|
|
// LLVM can inline all of this and compile it down to fast iteration over bytes.
|
|
|
|
while predicate(self.first()) && !self.is_eof() {
|
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
}
|
2019-05-06 03:53:40 -05:00
|
|
|
}
|