From 445052f6d426043b543033f3fa4594fc1a09d7fa Mon Sep 17 00:00:00 2001 From: Leander Tentrup Date: Wed, 22 Apr 2020 15:28:35 +0200 Subject: [PATCH] Adapt format specifier highlighting to support escaped squences and unicode identifiers --- .../src/snapshots/highlight_strings.html | 5 + crates/ra_ide/src/syntax_highlighting.rs | 63 ++-- .../ra_ide/src/syntax_highlighting/tests.rs | 5 + crates/ra_syntax/src/ast/tokens.rs | 278 ++++++++++-------- 4 files changed, 208 insertions(+), 143 deletions(-) diff --git a/crates/ra_ide/src/snapshots/highlight_strings.html b/crates/ra_ide/src/snapshots/highlight_strings.html index d70627da03f..433f2e0c5e9 100644 --- a/crates/ra_ide/src/snapshots/highlight_strings.html +++ b/crates/ra_ide/src/snapshots/highlight_strings.html @@ -74,4 +74,9 @@ pre { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56"); println!("Hello {{}}"); println!("{{ Hello"); + + println!(r"Hello, {}!", "world"); + + println!("{\x41}", A = 92); + println!("{ничоси}", ничоси = 92); } \ No newline at end of file diff --git a/crates/ra_ide/src/syntax_highlighting.rs b/crates/ra_ide/src/syntax_highlighting.rs index 8ee3a78c64d..b5fd3390f20 100644 --- a/crates/ra_ide/src/syntax_highlighting.rs +++ b/crates/ra_ide/src/syntax_highlighting.rs @@ -245,28 +245,29 @@ pub(crate) fn highlight( stack.push(); if is_format_string { string.lex_format_specifier(|piece_range, kind| { - let highlight = match kind { - FormatSpecifier::Open - | FormatSpecifier::Close - | FormatSpecifier::Colon - | FormatSpecifier::Fill - | FormatSpecifier::Align - | FormatSpecifier::Sign - | FormatSpecifier::NumberSign - | FormatSpecifier::DollarSign - | FormatSpecifier::Dot - | FormatSpecifier::Asterisk - | FormatSpecifier::QuestionMark => HighlightTag::Attribute, - FormatSpecifier::Integer | FormatSpecifier::Zero => { - HighlightTag::NumericLiteral - } - FormatSpecifier::Identifier => HighlightTag::Local, - }; - stack.add(HighlightedRange { - range: piece_range + range.start(), - highlight: highlight.into(), - binding_hash: None, - }); + if let Some(highlight) = highlight_format_specifier(kind) { + stack.add(HighlightedRange { + range: piece_range + range.start(), + highlight: highlight.into(), + binding_hash: None, + }); + } + }); + } + stack.pop(); + } else if let Some(string) = + element_to_highlight.as_token().cloned().and_then(ast::RawString::cast) + { + stack.push(); + if is_format_string { + string.lex_format_specifier(|piece_range, kind| { + if let Some(highlight) = highlight_format_specifier(kind) { + stack.add(HighlightedRange { + range: piece_range + range.start(), + highlight: highlight.into(), + binding_hash: None, + }); + } }); } stack.pop(); @@ -277,6 +278,24 @@ pub(crate) fn highlight( stack.flattened() } +fn highlight_format_specifier(kind: FormatSpecifier) -> Option { + Some(match kind { + FormatSpecifier::Open + | FormatSpecifier::Close + | FormatSpecifier::Colon + | FormatSpecifier::Fill + | FormatSpecifier::Align + | FormatSpecifier::Sign + | FormatSpecifier::NumberSign + | FormatSpecifier::DollarSign + | FormatSpecifier::Dot + | FormatSpecifier::Asterisk + | FormatSpecifier::QuestionMark => HighlightTag::Attribute, + FormatSpecifier::Integer | FormatSpecifier::Zero => HighlightTag::NumericLiteral, + FormatSpecifier::Identifier => HighlightTag::Local, + }) +} + fn macro_call_range(macro_call: &ast::MacroCall) -> Option { let path = macro_call.path()?; let name_ref = path.segment()?.name_ref()?; diff --git a/crates/ra_ide/src/syntax_highlighting/tests.rs b/crates/ra_ide/src/syntax_highlighting/tests.rs index f198767ce1f..a9aae957f01 100644 --- a/crates/ra_ide/src/syntax_highlighting/tests.rs +++ b/crates/ra_ide/src/syntax_highlighting/tests.rs @@ -223,6 +223,11 @@ fn main() { println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56"); println!("Hello {{}}"); println!("{{ Hello"); + + println!(r"Hello, {}!", "world"); + + println!("{\x41}", A = 92); + println!("{ничоси}", ничоси = 92); }"# .trim(), ); diff --git a/crates/ra_syntax/src/ast/tokens.rs b/crates/ra_syntax/src/ast/tokens.rs index 3e5c56b192d..aa34b682d92 100644 --- a/crates/ra_syntax/src/ast/tokens.rs +++ b/crates/ra_syntax/src/ast/tokens.rs @@ -192,68 +192,76 @@ pub enum FormatSpecifier { } pub trait HasFormatSpecifier: AstToken { + fn char_ranges( + &self, + ) -> Option)>>; + fn lex_format_specifier(&self, mut callback: F) where F: FnMut(TextRange, FormatSpecifier), { - let src = self.text().as_str(); - let initial_len = src.len(); - let mut chars = src.chars(); + let char_ranges = if let Some(char_ranges) = self.char_ranges() { + char_ranges + } else { + return; + }; + let mut chars = char_ranges.iter().peekable(); - while let Some(first_char) = chars.next() { + while let Some((range, first_char)) = chars.next() { match first_char { - '{' => { + Ok('{') => { // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax - if chars.clone().next() == Some('{') { + if let Some((_, Ok('{'))) = chars.peek() { // Escaped format specifier, `{{` chars.next(); continue; } - let start = initial_len - chars.as_str().len() - first_char.len_utf8(); - let end = initial_len - chars.as_str().len(); - callback( - TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), - FormatSpecifier::Open, - ); + callback(*range, FormatSpecifier::Open); // check for integer/identifier - match chars.clone().next().unwrap_or_default() { + match chars + .peek() + .and_then(|next| next.1.as_ref().ok()) + .copied() + .unwrap_or_default() + { '0'..='9' => { // integer - read_integer(&mut chars, initial_len, &mut callback); + read_integer(&mut chars, &mut callback); } - 'a'..='z' | 'A'..='Z' | '_' => { + c if c == '_' || c.is_alphabetic() => { // identifier - read_identifier(&mut chars, initial_len, &mut callback); + read_identifier(&mut chars, &mut callback); } _ => {} } - if chars.clone().next() == Some(':') { - skip_char_and_emit( - &mut chars, - initial_len, - FormatSpecifier::Colon, - &mut callback, - ); + if let Some((_, Ok(':'))) = chars.peek() { + skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback); // check for fill/align let mut cloned = chars.clone().take(2); - let first = cloned.next().unwrap_or_default(); - let second = cloned.next().unwrap_or_default(); + let first = cloned + .next() + .and_then(|next| next.1.as_ref().ok()) + .copied() + .unwrap_or_default(); + let second = cloned + .next() + .and_then(|next| next.1.as_ref().ok()) + .copied() + .unwrap_or_default(); match second { '<' | '^' | '>' => { // alignment specifier, first char specifies fillment skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::Fill, &mut callback, ); skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::Align, &mut callback, ); @@ -262,7 +270,6 @@ fn lex_format_specifier(&self, mut callback: F) '<' | '^' | '>' => { skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::Align, &mut callback, ); @@ -272,11 +279,15 @@ fn lex_format_specifier(&self, mut callback: F) } // check for sign - match chars.clone().next().unwrap_or_default() { + match chars + .peek() + .and_then(|next| next.1.as_ref().ok()) + .copied() + .unwrap_or_default() + { '+' | '-' => { skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::Sign, &mut callback, ); @@ -285,10 +296,9 @@ fn lex_format_specifier(&self, mut callback: F) } // check for `#` - if let Some('#') = chars.clone().next() { + if let Some((_, Ok('#'))) = chars.peek() { skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::NumberSign, &mut callback, ); @@ -296,39 +306,39 @@ fn lex_format_specifier(&self, mut callback: F) // check for `0` let mut cloned = chars.clone().take(2); - let first = cloned.next(); - let second = cloned.next(); + let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied(); + let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied(); if first == Some('0') && second != Some('$') { - skip_char_and_emit( - &mut chars, - initial_len, - FormatSpecifier::Zero, - &mut callback, - ); + skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback); } // width - match chars.clone().next().unwrap_or_default() { + match chars + .peek() + .and_then(|next| next.1.as_ref().ok()) + .copied() + .unwrap_or_default() + { '0'..='9' => { - read_integer(&mut chars, initial_len, &mut callback); - if chars.clone().next() == Some('$') { + read_integer(&mut chars, &mut callback); + if let Some((_, Ok('$'))) = chars.peek() { skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::DollarSign, &mut callback, ); } } - 'a'..='z' | 'A'..='Z' | '_' => { - read_identifier(&mut chars, initial_len, &mut callback); - if chars.clone().next() != Some('$') { + c if c == '_' || c.is_alphabetic() => { + read_identifier(&mut chars, &mut callback); + if chars.peek().and_then(|next| next.1.as_ref().ok()).copied() + != Some('$') + { continue; } skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::DollarSign, &mut callback, ); @@ -337,42 +347,41 @@ fn lex_format_specifier(&self, mut callback: F) } // precision - if chars.clone().next() == Some('.') { - skip_char_and_emit( - &mut chars, - initial_len, - FormatSpecifier::Dot, - &mut callback, - ); + if let Some((_, Ok('.'))) = chars.peek() { + skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback); - match chars.clone().next().unwrap_or_default() { + match chars + .peek() + .and_then(|next| next.1.as_ref().ok()) + .copied() + .unwrap_or_default() + { '*' => { skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::Asterisk, &mut callback, ); } '0'..='9' => { - read_integer(&mut chars, initial_len, &mut callback); - if chars.clone().next() == Some('$') { + read_integer(&mut chars, &mut callback); + if let Some((_, Ok('$'))) = chars.peek() { skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::DollarSign, &mut callback, ); } } - 'a'..='z' | 'A'..='Z' | '_' => { - read_identifier(&mut chars, initial_len, &mut callback); - if chars.clone().next() != Some('$') { + c if c == '_' || c.is_alphabetic() => { + read_identifier(&mut chars, &mut callback); + if chars.peek().and_then(|next| next.1.as_ref().ok()).copied() + != Some('$') + { continue; } skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::DollarSign, &mut callback, ); @@ -384,25 +393,29 @@ fn lex_format_specifier(&self, mut callback: F) } // type - match chars.clone().next().unwrap_or_default() { + match chars + .peek() + .and_then(|next| next.1.as_ref().ok()) + .copied() + .unwrap_or_default() + { '?' => { skip_char_and_emit( &mut chars, - initial_len, FormatSpecifier::QuestionMark, &mut callback, ); } - 'a'..='z' | 'A'..='Z' | '_' => { - read_identifier(&mut chars, initial_len, &mut callback); + c if c == '_' || c.is_alphabetic() => { + read_identifier(&mut chars, &mut callback); } _ => {} } } let mut cloned = chars.clone().take(2); - let first = cloned.next(); - let second = cloned.next(); + let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied(); + let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied(); if first != Some('}') { continue; } @@ -410,15 +423,10 @@ fn lex_format_specifier(&self, mut callback: F) // Escaped format end specifier, `}}` continue; } - skip_char_and_emit( - &mut chars, - initial_len, - FormatSpecifier::Close, - &mut callback, - ); + skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback); } _ => { - while let Some(next_char) = chars.clone().next() { + while let Some((_, Ok(next_char))) = chars.peek() { match next_char { '{' => break, _ => {} @@ -429,69 +437,97 @@ fn lex_format_specifier(&self, mut callback: F) }; } - fn skip_char_and_emit( - chars: &mut std::str::Chars, - initial_len: usize, + fn skip_char_and_emit<'a, I, F>( + chars: &mut std::iter::Peekable, emit: FormatSpecifier, callback: &mut F, ) where + I: Iterator)>, F: FnMut(TextRange, FormatSpecifier), { - let start = initial_len - chars.as_str().len(); - chars.next(); - let end = initial_len - chars.as_str().len(); - callback( - TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), - emit, - ); + let (range, _) = chars.next().unwrap(); + callback(*range, emit); } - fn read_integer(chars: &mut std::str::Chars, initial_len: usize, callback: &mut F) + fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable, callback: &mut F) where + I: Iterator)>, F: FnMut(TextRange, FormatSpecifier), { - let start = initial_len - chars.as_str().len(); - chars.next(); - while let Some(next_char) = chars.clone().next() { - match next_char { - '0'..='9' => { - chars.next(); - } - _ => { - break; - } + let (mut range, c) = chars.next().unwrap(); + assert!(c.as_ref().unwrap().is_ascii_digit()); + while let Some((r, Ok(next_char))) = chars.peek() { + if next_char.is_ascii_digit() { + chars.next(); + range = range.extend_to(r); + } else { + break; } } - let end = initial_len - chars.as_str().len(); - callback( - TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), - FormatSpecifier::Integer, - ); + callback(range, FormatSpecifier::Integer); } - fn read_identifier(chars: &mut std::str::Chars, initial_len: usize, callback: &mut F) + + fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable, callback: &mut F) where + I: Iterator)>, F: FnMut(TextRange, FormatSpecifier), { - let start = initial_len - chars.as_str().len(); - chars.next(); - while let Some(next_char) = chars.clone().next() { - match next_char { - 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => { - chars.next(); - } - _ => { - break; - } + let (mut range, c) = chars.next().unwrap(); + assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_'); + while let Some((r, Ok(next_char))) = chars.peek() { + if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() { + chars.next(); + range = range.extend_to(r); + } else { + break; } } - let end = initial_len - chars.as_str().len(); - callback( - TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), - FormatSpecifier::Identifier, - ); + callback(range, FormatSpecifier::Identifier); } } } -impl HasFormatSpecifier for String {} -impl HasFormatSpecifier for RawString {} +impl HasFormatSpecifier for String { + fn char_ranges( + &self, + ) -> Option)>> { + let text = self.text().as_str(); + let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; + let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start(); + + let mut res = Vec::with_capacity(text.len()); + rustc_lexer::unescape::unescape_str(text, &mut |range, unescaped_char| { + res.push(( + TextRange::from_to( + TextUnit::from_usize(range.start), + TextUnit::from_usize(range.end), + ) + offset, + unescaped_char, + )) + }); + + Some(res) + } +} + +impl HasFormatSpecifier for RawString { + fn char_ranges( + &self, + ) -> Option)>> { + let text = self.text().as_str(); + let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; + let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start(); + + let mut res = Vec::with_capacity(text.len()); + for (idx, c) in text.char_indices() { + res.push(( + TextRange::from_to( + TextUnit::from_usize(idx), + TextUnit::from_usize(idx + c.len_utf8()), + ) + offset, + Ok(c), + )); + } + Some(res) + } +}