Auto merge of #88834 - the8472:char-count, r=joshtriplett

optimize str::from_utf8() validation when slice contains multibyte chars and str.chars().count() in all cases

The change shows small but consistent improvements across several x86 target feature levels. I also tried to optimize counting with `slice.as_chunks` but that yielded more inconsistent results, bigger improvements for some optimization levels, lesser ones in others.

```
old, -O2, x86-64
test str::str_char_count_emoji                                  ... bench:       1,924 ns/iter (+/- 26)
test str::str_char_count_lorem                                  ... bench:         879 ns/iter (+/- 12)
test str::str_char_count_lorem_short                            ... bench:           5 ns/iter (+/- 0)

new, -O2, x86-64
test str::str_char_count_emoji                                  ... bench:       1,878 ns/iter (+/- 21)
test str::str_char_count_lorem                                  ... bench:         851 ns/iter (+/- 11)
test str::str_char_count_lorem_short                            ... bench:           4 ns/iter (+/- 0)

old, -O2, x86-64-v2
test str::str_char_count_emoji                                  ... bench:       1,477 ns/iter (+/- 46)
test str::str_char_count_lorem                                  ... bench:         675 ns/iter (+/- 15)
test str::str_char_count_lorem_short                            ... bench:           5 ns/iter (+/- 0)

new, -O2, x86-64-v2
test str::str_char_count_emoji                                  ... bench:       1,323 ns/iter (+/- 39)
test str::str_char_count_lorem                                  ... bench:         593 ns/iter (+/- 18)
test str::str_char_count_lorem_short                            ... bench:           4 ns/iter (+/- 0)

old, -O2, x86-64-v3
test str::str_char_count_emoji                                  ... bench:         748 ns/iter (+/- 7)
test str::str_char_count_lorem                                  ... bench:         348 ns/iter (+/- 2)
test str::str_char_count_lorem_short                            ... bench:           5 ns/iter (+/- 0)

new, -O2, x86-64-v3
test str::str_char_count_emoji                                  ... bench:         650 ns/iter (+/- 4)
test str::str_char_count_lorem                                  ... bench:         301 ns/iter (+/- 1)
test str::str_char_count_lorem_short                            ... bench:           5 ns/iter (+/- 0)
```

and for the multibyte-char string validation:

```
old, -O2, x86-64
test str::str_validate_emoji                                    ... bench:       4,606 ns/iter (+/- 64)

new, -O2, x86-64
test str::str_validate_emoji                                    ... bench:       3,837 ns/iter (+/- 60)
```
This commit is contained in:
bors 2021-10-04 12:49:57 +00:00
commit 175b8db73b
3 changed files with 39 additions and 7 deletions

View File

@ -15,3 +15,4 @@ mod num;
mod ops;
mod pattern;
mod slice;
mod str;

File diff suppressed because one or more lines are too long

View File

@ -22,7 +22,7 @@ fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
/// bits `10`).
#[inline]
pub(super) fn utf8_is_cont_byte(byte: u8) -> bool {
(byte & !CONT_MASK) == TAG_CONT_U8
(byte as i8) < -64
}
#[inline]
@ -163,7 +163,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
// %xF4 %x80-8F 2( UTF8-tail )
match w {
2 => {
if next!() & !CONT_MASK != TAG_CONT_U8 {
if next!() as i8 >= -64 {
err!(Some(1))
}
}
@ -175,7 +175,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
| (0xEE..=0xEF, 0x80..=0xBF) => {}
_ => err!(Some(1)),
}
if next!() & !CONT_MASK != TAG_CONT_U8 {
if next!() as i8 >= -64 {
err!(Some(2))
}
}
@ -184,10 +184,10 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
(0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
_ => err!(Some(1)),
}
if next!() & !CONT_MASK != TAG_CONT_U8 {
if next!() as i8 >= -64 {
err!(Some(2))
}
if next!() & !CONT_MASK != TAG_CONT_U8 {
if next!() as i8 >= -64 {
err!(Some(3))
}
}
@ -258,8 +258,6 @@ pub fn utf8_char_width(b: u8) -> usize {
/// Mask of the value bits of a continuation byte.
const CONT_MASK: u8 = 0b0011_1111;
/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
const TAG_CONT_U8: u8 = 0b1000_0000;
// truncate `&str` to length at most equal to `max`
// return `true` if it were truncated, and the new str.