From de1f766820d58bd87a94e9f055fbf269a3946e1f Mon Sep 17 00:00:00 2001 From: oxalica Date: Mon, 17 Jul 2023 22:28:57 +0800 Subject: [PATCH] Fix highlighting of byte escape sequences Currently non-UTF8 escape sequences in byte strings and any escape sequences in byte literals are ignored. --- crates/ide/src/syntax_highlighting.rs | 10 ++++++++- crates/ide/src/syntax_highlighting/escape.rs | 22 ++++++++++++++++++- .../test_data/highlight_strings.html | 6 +++-- crates/ide/src/syntax_highlighting/tests.rs | 6 +++-- crates/syntax/src/ast/token_ext.rs | 14 ++++++++---- 5 files changed, 48 insertions(+), 10 deletions(-) diff --git a/crates/ide/src/syntax_highlighting.rs b/crates/ide/src/syntax_highlighting.rs index 577bd2bc1f8..ae97236409e 100644 --- a/crates/ide/src/syntax_highlighting.rs +++ b/crates/ide/src/syntax_highlighting.rs @@ -24,7 +24,7 @@ use crate::{ syntax_highlighting::{ - escape::{highlight_escape_char, highlight_escape_string}, + escape::{highlight_escape_byte, highlight_escape_char, highlight_escape_string}, format::highlight_format_string, highlights::Highlights, macro_::MacroHighlighter, @@ -471,6 +471,14 @@ fn item(&self) -> &ast::Item { }; highlight_escape_char(hl, &char, range.start()) + } else if ast::Byte::can_cast(token.kind()) + && ast::Byte::can_cast(descended_token.kind()) + { + let Some(byte) = ast::Byte::cast(token) else { + continue; + }; + + highlight_escape_byte(hl, &byte, range.start()) } } diff --git a/crates/ide/src/syntax_highlighting/escape.rs b/crates/ide/src/syntax_highlighting/escape.rs index 211e3588095..2c63a69bdcb 100644 --- a/crates/ide/src/syntax_highlighting/escape.rs +++ b/crates/ide/src/syntax_highlighting/escape.rs @@ -1,7 +1,7 @@ //! Syntax highlighting for escape sequences use crate::syntax_highlighting::highlights::Highlights; use crate::{HlRange, HlTag}; -use syntax::ast::{Char, IsString}; +use syntax::ast::{Byte, Char, IsString}; use syntax::{AstToken, TextRange, TextSize}; pub(super) fn highlight_escape_string( @@ -43,3 +43,23 @@ pub(super) fn highlight_escape_char(stack: &mut Highlights, char: &Char, start: TextRange::new(start + TextSize::from(1), start + TextSize::from(text.len() as u32 + 1)); stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None }) } + +pub(super) fn highlight_escape_byte(stack: &mut Highlights, byte: &Byte, start: TextSize) { + if byte.value().is_none() { + return; + } + + let text = byte.text(); + if !text.starts_with("b'") || !text.ends_with('\'') { + return; + } + + let text = &text[2..text.len() - 1]; + if !text.starts_with('\\') { + return; + } + + let range = + TextRange::new(start + TextSize::from(2), start + TextSize::from(text.len() as u32 + 2)); + stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None }) +} diff --git a/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html b/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html index f4f164aa1de..061329d2397 100644 --- a/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html +++ b/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html @@ -105,6 +105,8 @@ pre { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd let a = '\x65'; let a = '\x00'; + let a = b'\xFF'; + println!("Hello {{Hello}}"); // from https://doc.rust-lang.org/std/fmt/index.html println!("Hello"); // => "Hello" @@ -159,8 +161,8 @@ pre { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd println!("Hello\nWorld"); println!("\u{48}\x65\x6C\x6C\x6F World"); - let _ = "\x28\x28\x00\x63\n"; - let _ = b"\x28\x28\x00\x63\n"; + let _ = "\x28\x28\x00\x63\xFF\n"; // invalid non-UTF8 escape sequences + let _ = b"\x28\x28\x00\x63\xFF\n"; // valid bytes let backslash = r"\\"; println!("{\x41}", A = 92); diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs index 1ee451a06d0..80a49bcaa3b 100644 --- a/crates/ide/src/syntax_highlighting/tests.rs +++ b/crates/ide/src/syntax_highlighting/tests.rs @@ -451,6 +451,8 @@ fn main() { let a = '\x65'; let a = '\x00'; + let a = b'\xFF'; + println!("Hello {{Hello}}"); // from https://doc.rust-lang.org/std/fmt/index.html println!("Hello"); // => "Hello" @@ -505,8 +507,8 @@ fn main() { println!("Hello\nWorld"); println!("\u{48}\x65\x6C\x6C\x6F World"); - let _ = "\x28\x28\x00\x63\n"; - let _ = b"\x28\x28\x00\x63\n"; + let _ = "\x28\x28\x00\x63\xFF\n"; // invalid non-UTF8 escape sequences + let _ = b"\x28\x28\x00\x63\xFF\n"; // valid bytes let backslash = r"\\"; println!("{\x41}", A = 92); diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 090eb89f470..aa8c9bbc0f8 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -146,6 +146,7 @@ fn new(literal: &str) -> Option { pub trait IsString: AstToken { const RAW_PREFIX: &'static str; + const MODE: Mode; fn is_raw(&self) -> bool { self.text().starts_with(Self::RAW_PREFIX) } @@ -181,7 +182,7 @@ fn escaped_char_ranges( let text = &self.text()[text_range_no_quotes - start]; let offset = text_range_no_quotes.start() - start; - unescape_literal(text, Mode::Str, &mut |range, unescaped_char| { + unescape_literal(text, Self::MODE, &mut |range, unescaped_char| { let text_range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()); cb(text_range + offset, unescaped_char); @@ -196,6 +197,7 @@ fn map_range_up(&self, range: TextRange) -> Option { impl IsString for ast::String { const RAW_PREFIX: &'static str = "r"; + const MODE: Mode = Mode::Str; } impl ast::String { @@ -213,7 +215,7 @@ pub fn value(&self) -> Option> { let mut buf = String::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match ( + unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { @@ -239,6 +241,7 @@ pub fn value(&self) -> Option> { impl IsString for ast::ByteString { const RAW_PREFIX: &'static str = "br"; + const MODE: Mode = Mode::ByteStr; } impl ast::ByteString { @@ -256,7 +259,7 @@ pub fn value(&self) -> Option> { let mut buf: Vec = Vec::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match ( + unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { @@ -282,6 +285,9 @@ pub fn value(&self) -> Option> { impl IsString for ast::CString { const RAW_PREFIX: &'static str = "cr"; + // XXX: `Mode::CStr` is not supported by `unescape_literal` of ra-ap-rustc_lexer yet. + // Here we pretend it to be a byte string. + const MODE: Mode = Mode::ByteStr; } impl ast::CString { @@ -299,7 +305,7 @@ pub fn value(&self) -> Option> { let mut buf = String::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match ( + unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) {