rust/crates/syntax/src/ast/token_ext.rs

//! There are many AstNodes, but only a few tokens, so we hand-write them here.

use std::{
    borrow::Cow,
    convert::{TryFrom, TryInto},
};

use rustc_lexer::unescape::{unescape_literal, Mode};

use crate::{
    ast::{AstToken, Comment, RawString, String, Whitespace},
    TextRange, TextSize,
};

impl Comment {
    pub fn kind(&self) -> CommentKind {
        kind_by_prefix(self.text())
    }

    pub fn prefix(&self) -> &'static str {
        for (prefix, k) in COMMENT_PREFIX_TO_KIND.iter() {
            if *k == self.kind() && self.text().starts_with(prefix) {
                return prefix;
            }
        }
        unreachable!()
    }
}

#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct CommentKind {
    pub shape: CommentShape,
    pub doc: Option<CommentPlacement>,
}

#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum CommentShape {
    Line,
    Block,
}

impl CommentShape {
    pub fn is_line(self) -> bool {
        self == CommentShape::Line
    }

    pub fn is_block(self) -> bool {
        self == CommentShape::Block
    }
}

#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum CommentPlacement {
    Inner,
    Outer,
}

const COMMENT_PREFIX_TO_KIND: &[(&str, CommentKind)] = {
    use {CommentPlacement::*, CommentShape::*};
    &[
        ("////", CommentKind { shape: Line, doc: None }),
        ("///", CommentKind { shape: Line, doc: Some(Outer) }),
        ("//!", CommentKind { shape: Line, doc: Some(Inner) }),
        ("/**", CommentKind { shape: Block, doc: Some(Outer) }),
        ("/*!", CommentKind { shape: Block, doc: Some(Inner) }),
        ("//", CommentKind { shape: Line, doc: None }),
        ("/*", CommentKind { shape: Block, doc: None }),
    ]
};

fn kind_by_prefix(text: &str) -> CommentKind {
    if text == "/**/" {
        return CommentKind { shape: CommentShape::Block, doc: None };
    }
    for (prefix, kind) in COMMENT_PREFIX_TO_KIND.iter() {
        if text.starts_with(prefix) {
            return *kind;
        }
    }
    panic!("bad comment text: {:?}", text)
}

impl Whitespace {
    pub fn spans_multiple_lines(&self) -> bool {
        let text = self.text();
        text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
    }
}

pub struct QuoteOffsets {
    pub quotes: (TextRange, TextRange),
    pub contents: TextRange,
}

impl QuoteOffsets {
    fn new(literal: &str) -> Option<QuoteOffsets> {
        let left_quote = literal.find('"')?;
        let right_quote = literal.rfind('"')?;
        if left_quote == right_quote {
            // `literal` only contains one quote
            return None;
        }

        let start = TextSize::from(0);
        let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
        let right_quote = TextSize::try_from(right_quote).unwrap();
        let end = TextSize::of(literal);

        let res = QuoteOffsets {
            quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
            contents: TextRange::new(left_quote, right_quote),
        };
        Some(res)
    }
}

pub trait HasQuotes: AstToken {
    fn quote_offsets(&self) -> Option<QuoteOffsets> {
        let text = self.text().as_str();
        let offsets = QuoteOffsets::new(text)?;
        let o = self.syntax().text_range().start();
        let offsets = QuoteOffsets {
            quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
            contents: offsets.contents + o,
        };
        Some(offsets)
    }
    fn open_quote_text_range(&self) -> Option<TextRange> {
        self.quote_offsets().map(|it| it.quotes.0)
    }

    fn close_quote_text_range(&self) -> Option<TextRange> {
        self.quote_offsets().map(|it| it.quotes.1)
    }

    fn text_range_between_quotes(&self) -> Option<TextRange> {
        self.quote_offsets().map(|it| it.contents)
    }
}

impl HasQuotes for String {}
impl HasQuotes for RawString {}

pub trait HasStringValue: HasQuotes {
    fn value(&self) -> Option<Cow<'_, str>>;
}

impl HasStringValue for String {
    fn value(&self) -> Option<Cow<'_, str>> {
        let text = self.text().as_str();
        let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];

        let mut buf = std::string::String::with_capacity(text.len());
        let mut has_error = false;
        unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char {
            Ok(c) => buf.push(c),
            Err(_) => has_error = true,
        });

        if has_error {
            return None;
        }
        // FIXME: don't actually allocate for borrowed case
        let res = if buf == text { Cow::Borrowed(text) } else { Cow::Owned(buf) };
        Some(res)
    }
}

impl HasStringValue for RawString {
    fn value(&self) -> Option<Cow<'_, str>> {
        let text = self.text().as_str();
        let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
        Some(Cow::Borrowed(text))
    }
}

impl RawString {
    pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
        let contents_range = self.text_range_between_quotes()?;
        assert!(TextRange::up_to(contents_range.len()).contains_range(range));
        Some(range + contents_range.start())
    }
}

#[derive(Debug)]
pub enum FormatSpecifier {
    Open,
    Close,
    Integer,
    Identifier,
    Colon,
    Fill,
    Align,
    Sign,
    NumberSign,
    Zero,
    DollarSign,
    Dot,
    Asterisk,
    QuestionMark,
}

pub trait HasFormatSpecifier: AstToken {
    fn char_ranges(
        &self,
    ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;

    fn lex_format_specifier<F>(&self, mut callback: F)
    where
        F: FnMut(TextRange, FormatSpecifier),
    {
        let char_ranges = if let Some(char_ranges) = self.char_ranges() {
            char_ranges
        } else {
            return;
        };
        let mut chars = char_ranges.iter().peekable();

        while let Some((range, first_char)) = chars.next() {
            match first_char {
                Ok('{') => {
                    // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
                    if let Some((_, Ok('{'))) = chars.peek() {
                        // Escaped format specifier, `{{`
                        chars.next();
                        continue;
                    }

                    callback(*range, FormatSpecifier::Open);

                    // check for integer/identifier
                    match chars
                        .peek()
                        .and_then(|next| next.1.as_ref().ok())
                        .copied()
                        .unwrap_or_default()
                    {
                        '0'..='9' => {
                            // integer
                            read_integer(&mut chars, &mut callback);
                        }
                        c if c == '_' || c.is_alphabetic() => {
                            // identifier
                            read_identifier(&mut chars, &mut callback);
                        }
                        _ => {}
                    }

                    if let Some((_, Ok(':'))) = chars.peek() {
                        skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);

                        // check for fill/align
                        let mut cloned = chars.clone().take(2);
                        let first = cloned
                            .next()
                            .and_then(|next| next.1.as_ref().ok())
                            .copied()
                            .unwrap_or_default();
                        let second = cloned
                            .next()
                            .and_then(|next| next.1.as_ref().ok())
                            .copied()
                            .unwrap_or_default();
                        match second {
                            '<' | '^' | '>' => {
                                // alignment specifier, first char specifies fillment
                                skip_char_and_emit(
                                    &mut chars,
                                    FormatSpecifier::Fill,
                                    &mut callback,
                                );
                                skip_char_and_emit(
                                    &mut chars,
                                    FormatSpecifier::Align,
                                    &mut callback,
                                );
                            }
                            _ => match first {
                                '<' | '^' | '>' => {
                                    skip_char_and_emit(
                                        &mut chars,
                                        FormatSpecifier::Align,
                                        &mut callback,
                                    );
                                }
                                _ => {}
                            },
                        }

                        // check for sign
                        match chars
                            .peek()
                            .and_then(|next| next.1.as_ref().ok())
                            .copied()
                            .unwrap_or_default()
                        {
                            '+' | '-' => {
                                skip_char_and_emit(
                                    &mut chars,
                                    FormatSpecifier::Sign,
                                    &mut callback,
                                );
                            }
                            _ => {}
                        }

                        // check for `#`
                        if let Some((_, Ok('#'))) = chars.peek() {
                            skip_char_and_emit(
                                &mut chars,
                                FormatSpecifier::NumberSign,
                                &mut callback,
                            );
                        }

                        // check for `0`
                        let mut cloned = chars.clone().take(2);
                        let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
                        let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();

                        if first == Some('0') && second != Some('$') {
                            skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
                        }

                        // width
                        match chars
                            .peek()
                            .and_then(|next| next.1.as_ref().ok())
                            .copied()
                            .unwrap_or_default()
                        {
                            '0'..='9' => {
                                read_integer(&mut chars, &mut callback);
                                if let Some((_, Ok('$'))) = chars.peek() {
                                    skip_char_and_emit(
                                        &mut chars,
                                        FormatSpecifier::DollarSign,
                                        &mut callback,
                                    );
                                }
                            }
                            c if c == '_' || c.is_alphabetic() => {
                                read_identifier(&mut chars, &mut callback);
                                // can be either width (indicated by dollar sign, or type in which case
                                // the next sign has to be `}`)
                                let next =
                                    chars.peek().and_then(|next| next.1.as_ref().ok()).copied();
                                match next {
                                    Some('$') => skip_char_and_emit(
                                        &mut chars,
                                        FormatSpecifier::DollarSign,
                                        &mut callback,
                                    ),
                                    Some('}') => {
                                        skip_char_and_emit(
                                            &mut chars,
                                            FormatSpecifier::Close,
                                            &mut callback,
                                        );
                                        continue;
                                    }
                                    _ => continue,
                                };
                            }
                            _ => {}
                        }

                        // precision
                        if let Some((_, Ok('.'))) = chars.peek() {
                            skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);

                            match chars
                                .peek()
                                .and_then(|next| next.1.as_ref().ok())
                                .copied()
                                .unwrap_or_default()
                            {
                                '*' => {
                                    skip_char_and_emit(
                                        &mut chars,
                                        FormatSpecifier::Asterisk,
                                        &mut callback,
                                    );
                                }
                                '0'..='9' => {
                                    read_integer(&mut chars, &mut callback);
                                    if let Some((_, Ok('$'))) = chars.peek() {
                                        skip_char_and_emit(
                                            &mut chars,
                                            FormatSpecifier::DollarSign,
                                            &mut callback,
                                        );
                                    }
                                }
                                c if c == '_' || c.is_alphabetic() => {
                                    read_identifier(&mut chars, &mut callback);
                                    if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
                                        != Some('$')
                                    {
                                        continue;
                                    }
                                    skip_char_and_emit(
                                        &mut chars,
                                        FormatSpecifier::DollarSign,
                                        &mut callback,
                                    );
                                }
                                _ => {
                                    continue;
                                }
                            }
                        }

                        // type
                        match chars
                            .peek()
                            .and_then(|next| next.1.as_ref().ok())
                            .copied()
                            .unwrap_or_default()
                        {
                            '?' => {
                                skip_char_and_emit(
                                    &mut chars,
                                    FormatSpecifier::QuestionMark,
                                    &mut callback,
                                );
                            }
                            c if c == '_' || c.is_alphabetic() => {
                                read_identifier(&mut chars, &mut callback);
                            }
                            _ => {}
                        }
                    }

                    if let Some((_, Ok('}'))) = chars.peek() {
                        skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
                    } else {
                        continue;
                    }
                }
                _ => {
                    while let Some((_, Ok(next_char))) = chars.peek() {
                        match next_char {
                            '{' => break,
                            _ => {}
                        }
                        chars.next();
                    }
                }
            };
        }

        fn skip_char_and_emit<'a, I, F>(
            chars: &mut std::iter::Peekable<I>,
            emit: FormatSpecifier,
            callback: &mut F,
        ) where
            I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
            F: FnMut(TextRange, FormatSpecifier),
        {
            let (range, _) = chars.next().unwrap();
            callback(*range, emit);
        }

        fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
        where
            I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
            F: FnMut(TextRange, FormatSpecifier),
        {
            let (mut range, c) = chars.next().unwrap();
            assert!(c.as_ref().unwrap().is_ascii_digit());
            while let Some((r, Ok(next_char))) = chars.peek() {
                if next_char.is_ascii_digit() {
                    chars.next();
                    range = range.cover(*r);
                } else {
                    break;
                }
            }
            callback(range, FormatSpecifier::Integer);
        }

        fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
        where
            I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
            F: FnMut(TextRange, FormatSpecifier),
        {
            let (mut range, c) = chars.next().unwrap();
            assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
            while let Some((r, Ok(next_char))) = chars.peek() {
                if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
                    chars.next();
                    range = range.cover(*r);
                } else {
                    break;
                }
            }
            callback(range, FormatSpecifier::Identifier);
        }
    }
}

impl HasFormatSpecifier for String {
    fn char_ranges(
        &self,
    ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
        let text = self.text().as_str();
        let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
        let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();

        let mut res = Vec::with_capacity(text.len());
        unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
            res.push((
                TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
                    + offset,
                unescaped_char,
            ))
        });

        Some(res)
    }
}

impl HasFormatSpecifier for RawString {
    fn char_ranges(
        &self,
    ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
        let text = self.text().as_str();
        let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
        let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();

        let mut res = Vec::with_capacity(text.len());
        for (idx, c) in text.char_indices() {
            res.push((TextRange::at(idx.try_into().unwrap(), TextSize::of(c)) + offset, Ok(c)));
        }
        Some(res)
    }
}