Optimize bidi character detection.
This commit is contained in:
parent
473eaa42e9
commit
2d9f0e2c50
@ -137,12 +137,45 @@ impl<'a> StringReader<'a> {
|
|||||||
// Opening delimiter of the length 2 is not included into the comment text.
|
// Opening delimiter of the length 2 is not included into the comment text.
|
||||||
let content_start = start + BytePos(2);
|
let content_start = start + BytePos(2);
|
||||||
let content = self.str_from(content_start);
|
let content = self.str_from(content_start);
|
||||||
|
|
||||||
|
// Char - UTF-8
|
||||||
|
// U+202A - E2 80 AA
|
||||||
|
// U+202B - E2 80 AB
|
||||||
|
// U+202C - E2 80 AC
|
||||||
|
// U+202D - E2 80 AD
|
||||||
|
// U+202E - E2 80 AE
|
||||||
|
// U+2066 - E2 81 A6
|
||||||
|
// U+2067 - E2 81 A7
|
||||||
|
// U+2068 - E2 81 A8
|
||||||
|
// U+2069 - E2 81 A9
|
||||||
|
let mut bytes = content.as_bytes();
|
||||||
|
let contains_flow_control_chars = loop {
|
||||||
|
match core::slice::memchr::memchr(0xE2, &bytes) {
|
||||||
|
Some(idx) => {
|
||||||
|
// bytes are valid UTF-8 -> E2 must be followed by two bytes
|
||||||
|
match bytes[idx + 1] {
|
||||||
|
0x80 => {
|
||||||
|
if (0xAA..=0xAE).contains(&bytes[idx + 2]) {
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
0x81 => {
|
||||||
|
if (0xA6..=0xA9).contains(&bytes[idx + 2]) {
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
bytes = &bytes[idx + 3..];
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
break false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if contains_flow_control_chars {
|
||||||
let span = self.mk_sp(start, self.pos);
|
let span = self.mk_sp(start, self.pos);
|
||||||
const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
|
|
||||||
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
|
|
||||||
'\u{202C}', '\u{2069}',
|
|
||||||
];
|
|
||||||
if content.contains(UNICODE_TEXT_FLOW_CHARS) {
|
|
||||||
self.sess.buffer_lint_with_diagnostic(
|
self.sess.buffer_lint_with_diagnostic(
|
||||||
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
||||||
span,
|
span,
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#![feature(crate_visibility_modifier)]
|
#![feature(crate_visibility_modifier)]
|
||||||
#![feature(if_let_guard)]
|
#![feature(if_let_guard)]
|
||||||
#![feature(box_patterns)]
|
#![feature(box_patterns)]
|
||||||
|
#![feature(slice_internals)]
|
||||||
#![recursion_limit = "256"]
|
#![recursion_limit = "256"]
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user