diff --git a/library/core/benches/str/char_count.rs b/library/core/benches/str/char_count.rs index f19d0941142..25d9b2e2992 100644 --- a/library/core/benches/str/char_count.rs +++ b/library/core/benches/str/char_count.rs @@ -3,21 +3,25 @@ use test::{black_box, Bencher}; macro_rules! define_benches { ($( fn $name: ident($arg: ident: &str) $body: block )+) => { + define_benches!(mod en_tiny, en::TINY, $($name $arg $body)+); define_benches!(mod en_small, en::SMALL, $($name $arg $body)+); define_benches!(mod en_medium, en::MEDIUM, $($name $arg $body)+); define_benches!(mod en_large, en::LARGE, $($name $arg $body)+); define_benches!(mod en_huge, en::HUGE, $($name $arg $body)+); + define_benches!(mod zh_tiny, zh::TINY, $($name $arg $body)+); define_benches!(mod zh_small, zh::SMALL, $($name $arg $body)+); define_benches!(mod zh_medium, zh::MEDIUM, $($name $arg $body)+); define_benches!(mod zh_large, zh::LARGE, $($name $arg $body)+); define_benches!(mod zh_huge, zh::HUGE, $($name $arg $body)+); + define_benches!(mod ru_tiny, ru::TINY, $($name $arg $body)+); define_benches!(mod ru_small, ru::SMALL, $($name $arg $body)+); define_benches!(mod ru_medium, ru::MEDIUM, $($name $arg $body)+); define_benches!(mod ru_large, ru::LARGE, $($name $arg $body)+); define_benches!(mod ru_huge, ru::HUGE, $($name $arg $body)+); + define_benches!(mod emoji_tiny, emoji::TINY, $($name $arg $body)+); define_benches!(mod emoji_small, emoji::SMALL, $($name $arg $body)+); define_benches!(mod emoji_medium, emoji::MEDIUM, $($name $arg $body)+); define_benches!(mod emoji_large, emoji::LARGE, $($name $arg $body)+); @@ -43,12 +47,12 @@ macro_rules! define_benches { } define_benches! { - fn case00_cur_libcore(s: &str) { - cur_libcore(s) + fn case00_libcore(s: &str) { + libcore(s) } - fn case01_old_libcore(s: &str) { - old_libcore(s) + fn case01_filter_count_cont_bytes(s: &str) { + filter_count_cont_bytes(s) } fn case02_iter_increment(s: &str) { @@ -60,14 +64,16 @@ define_benches! { } } -fn cur_libcore(s: &str) -> usize { +fn libcore(s: &str) -> usize { s.chars().count() } + #[inline] fn utf8_is_cont_byte(byte: u8) -> bool { (byte as i8) < -64 } -fn old_libcore(s: &str) -> usize { + +fn filter_count_cont_bytes(s: &str) -> usize { s.as_bytes().iter().filter(|&&byte| !utf8_is_cont_byte(byte)).count() } diff --git a/library/core/benches/str/corpora.rs b/library/core/benches/str/corpora.rs index 04e60f0144a..b4ac625061d 100644 --- a/library/core/benches/str/corpora.rs +++ b/library/core/benches/str/corpora.rs @@ -1,8 +1,9 @@ //! Exposes a number of modules with different kinds of strings. //! -//! Each module contains `&str` constants named `SMALL`, `MEDIUM`, `LARGE`, and -//! `HUGE`. +//! Each module contains `&str` constants named `TINY`, `SMALL`, `MEDIUM`, +//! `LARGE`, and `HUGE`. //! +//! - The `TINY` string is generally around 8 bytes. //! - The `SMALL` string is generally around 30-40 bytes. //! - The `MEDIUM` string is generally around 600-700 bytes. //! - The `LARGE` string is the `MEDIUM` string repeated 8x, and is around 5kb. @@ -27,6 +28,7 @@ macro_rules! define_consts { } pub mod en { + pub const TINY: &str = "Mary had"; pub const SMALL: &str = "Mary had a little lamb, Little lamb"; define_consts! { "Rust is blazingly fast and memory-efficient: with no runtime or garbage @@ -42,6 +44,7 @@ pub mod en { } pub mod zh { + pub const TINY: &str = "速度惊"; pub const SMALL: &str = "速度惊人且内存利用率极高"; define_consts! { "Rust 速度惊人且内存利用率极高。由于\ @@ -59,6 +62,7 @@ pub mod zh { } pub mod ru { + pub const TINY: &str = "Сотни"; pub const SMALL: &str = "Сотни компаний по"; define_consts! { "Сотни компаний по всему миру используют Rust в реальных\ @@ -72,6 +76,7 @@ pub mod ru { } pub mod emoji { + pub const TINY: &str = "😀😃"; pub const SMALL: &str = "😀😃😄😁😆😅🤣😂🙂🙃😉😊😇🥰😍🤩😘"; define_consts! { "😀😃😄😁😆😅🤣😂🙂🙃😉😊😇🥰😍🤩😘😗☺😚😙🥲😋😛😜🤪😝🤑🤗🤭🤫🤔🤐🤨😐😑😶😶‍🌫️😏😒\ diff --git a/library/core/src/str/count.rs b/library/core/src/str/count.rs index 464c6889c32..a80ebac734d 100644 --- a/library/core/src/str/count.rs +++ b/library/core/src/str/count.rs @@ -17,27 +17,57 @@ //! Note: Because the term "leading byte" can sometimes be ambiguous (for //! example, it could also refer to the first byte of a slice), we'll often use //! the term "non-continuation byte" to refer to these bytes in the code. +use core::intrinsics::unlikely; +const USIZE_SIZE: usize = core::mem::size_of::(); +const UNROLL_INNER: usize = 4; + +#[inline] pub(super) fn count_chars(s: &str) -> usize { + if s.len() < USIZE_SIZE * UNROLL_INNER { + // Avoid entering the optimized implementation for strings where the + // difference is not likely to matter, or where it might even be slower. + // That said, a ton of thought was not spent on the particular threshold + // here, beyond "this value seems to make sense". + char_count_general_case(s.as_bytes()) + } else { + do_count_chars(s) + } +} + +fn do_count_chars(s: &str) -> usize { // For correctness, `CHUNK_SIZE` must be: + // // - Less than or equal to 255, otherwise we'll overflow bytes in `counts`. // - A multiple of `UNROLL_INNER`, otherwise our `break` inside the // `body.chunks(CHUNK_SIZE)` loop. // // For performance, `CHUNK_SIZE` should be: - // - Relatively cheap to `%` against. + // - Relatively cheap to `/` against (so some simple sum of powers of two). // - Large enough to avoid paying for the cost of the `sum_bytes_in_usize` // too often. const CHUNK_SIZE: usize = 192; - const UNROLL_INNER: usize = 4; - // Check the properties of `CHUNK_SIZE` / `UNROLL_INNER` that are required + // Check the properties of `CHUNK_SIZE` and `UNROLL_INNER` that are required // for correctness. - const _: [(); 1] = [(); (CHUNK_SIZE < 256 && (CHUNK_SIZE % UNROLL_INNER) == 0) as usize]; + const _: () = assert!(CHUNK_SIZE < 256); + const _: () = assert!(CHUNK_SIZE % UNROLL_INNER == 0); + // SAFETY: transmuting `[u8]` to `[usize]` is safe except for size // differences which are handled by `align_to`. let (head, body, tail) = unsafe { s.as_bytes().align_to::() }; + // This should be quite rare, and basically exists to handle the degenerate + // cases where align_to fails (as well as miri under symbolic alignment + // mode). + // + // The `unlikely` helps discourage LLVM from inlining the body, which is + // nice, as we would rather not mark the `char_count_general_case` function + // as cold. + if unlikely(body.is_empty() || head.len() > USIZE_SIZE || tail.len() > USIZE_SIZE) { + return char_count_general_case(s.as_bytes()); + } + let mut total = char_count_general_case(head) + char_count_general_case(tail); // Split `body` into `CHUNK_SIZE` chunks to reduce the frequency with which // we call `sum_bytes_in_usize`. @@ -45,11 +75,8 @@ pub(super) fn count_chars(s: &str) -> usize { // We accumulate intermediate sums in `counts`, where each byte contains // a subset of the sum of this chunk, like a `[u8; size_of::()]`. let mut counts = 0; - let unrolled_chunks = chunk.array_chunks::(); - // If there's a remainder (know can only happen for the last item in - // `chunks`, because `CHUNK_SIZE % UNROLL == 0`), then we need to - // account for that (although we don't use it to later). - let remainder = unrolled_chunks.remainder(); + + let (unrolled_chunks, remainder) = chunk.as_chunks::(); for unrolled in unrolled_chunks { for &word in unrolled { // Because `CHUNK_SIZE` is < 256, this addition can't cause the @@ -85,8 +112,8 @@ pub(super) fn count_chars(s: &str) -> usize { // true) #[inline] fn contains_non_continuation_byte(w: usize) -> usize { - let lsb = 0x0101_0101_0101_0101u64 as usize; - ((!w >> 7) | (w >> 6)) & lsb + const LSB: usize = 0x0101_0101_0101_0101u64 as usize; + ((!w >> 7) | (w >> 6)) & LSB } // Morally equivalent to `values.to_ne_bytes().into_iter().sum::()`, but @@ -97,7 +124,7 @@ fn sum_bytes_in_usize(values: usize) -> usize { const SKIP_BYTES: usize = 0x00ff_00ff_00ff_00ff_u64 as usize; let pair_sum: usize = (values & SKIP_BYTES) + ((values >> 8) & SKIP_BYTES); - pair_sum.wrapping_mul(LSB_SHORTS) >> ((core::mem::size_of::() - 2) * 8) + pair_sum.wrapping_mul(LSB_SHORTS) >> ((USIZE_SIZE - 2) * 8) } // This is the most direct implementation of the concept of "count the number of @@ -105,12 +132,5 @@ fn sum_bytes_in_usize(values: usize) -> usize { // head and tail of the input string (the first and last item in the tuple // returned by `slice::align_to`). fn char_count_general_case(s: &[u8]) -> usize { - const CONT_MASK_U8: u8 = 0b0011_1111; - const TAG_CONT_U8: u8 = 0b1000_0000; - let mut leads = 0; - for &byte in s { - let is_lead = (byte & !CONT_MASK_U8) != TAG_CONT_U8; - leads += is_lead as usize; - } - leads + s.iter().filter(|&&byte| !super::validations::utf8_is_cont_byte(byte)).count() }