From dc4a644980e919539962f2bbf267381c20441f8e Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Sun, 5 Jul 2020 15:52:13 -0700 Subject: [PATCH] Add 'unrolled' is_ascii_align_to benchmark, and move is_ascii benchmarks into own file --- src/libcore/benches/ascii.rs | 78 +------------------------- src/libcore/benches/ascii/is_ascii.rs | 81 +++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 76 deletions(-) create mode 100644 src/libcore/benches/ascii/is_ascii.rs diff --git a/src/libcore/benches/ascii.rs b/src/libcore/benches/ascii.rs index 21199ecaac1..6a4d706cff6 100644 --- a/src/libcore/benches/ascii.rs +++ b/src/libcore/benches/ascii.rs @@ -1,3 +1,5 @@ +mod is_ascii; + // Lower-case ASCII 'a' is the first byte that has its highest bit set // after wrap-adding 0x1F: // @@ -59,48 +61,6 @@ macro_rules! benches { )+ } }; - - // For some tests the vec allocation tends to dominate, so it can be avoided. - (@readonly $( fn $name: ident($arg: ident: &[u8]) $body: block )+) => { - benches!(@ro mod short_readonly SHORT $($name $arg $body)+); - benches!(@ro mod medium_readonly MEDIUM $($name $arg $body)+); - benches!(@ro mod long_readonly LONG $($name $arg $body)+); - // Add another `MEDIUM` bench, but trim the ends so that we can (try to) - // benchmark a case where the function has to handle misalignment. - mod medium_unaligned { - use super::*; - $( - #[bench] - fn $name(bencher: &mut Bencher) { - bencher.bytes = MEDIUM.len() as u64 - 2; - let mut vec = MEDIUM.as_bytes().to_vec(); - bencher.iter(|| { - black_box(&mut vec); - let $arg = black_box(&vec[1..(vec.len() - 1)]); - black_box($body) - }) - } - )+ - } - }; - (@ro mod $mod_name: ident $input: ident $($name: ident $arg: ident $body: block)+) => { - mod $mod_name { - use super::*; - - $( - #[bench] - fn $name(bencher: &mut Bencher) { - bencher.bytes = $input.len() as u64; - let mut vec = $input.as_bytes().to_vec(); - bencher.iter(|| { - black_box(&mut vec); - let $arg = black_box(&vec[..]); - black_box($body) - }) - } - )+ - } - }; } use test::black_box; @@ -287,40 +247,6 @@ benches! { is_ascii_control, } -benches! { - @readonly - fn is_ascii_slice_libcore(bytes: &[u8]) { - bytes.is_ascii() - } - - fn is_ascii_slice_iter_all(bytes: &[u8]) { - bytes.iter().all(|b| b.is_ascii()) - } - - fn is_ascii_slice_align_to(bytes: &[u8]) { - is_ascii_align_to_impl(bytes) - } -} - -// Separate since it's easier to debug errors if they don't go through macro -// expansion first. -fn is_ascii_align_to_impl(bytes: &[u8]) -> bool { - if bytes.len() < core::mem::size_of::() { - return bytes.iter().all(|b| b.is_ascii()); - } - // SAFETY: transmuting a sequence of `u8` to `usize` is always fine - let (head, body, tail) = unsafe { bytes.align_to::() }; - head.iter().all(|b| b.is_ascii()) - && body.iter().all(|w| !contains_nonascii(*w)) - && tail.iter().all(|b| b.is_ascii()) -} - -#[inline] -fn contains_nonascii(v: usize) -> bool { - const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize; - (NONASCII_MASK & v) != 0 -} - macro_rules! repeat { ($s: expr) => { concat!($s, $s, $s, $s, $s, $s, $s, $s, $s, $s) diff --git a/src/libcore/benches/ascii/is_ascii.rs b/src/libcore/benches/ascii/is_ascii.rs new file mode 100644 index 00000000000..45ba2f8fe82 --- /dev/null +++ b/src/libcore/benches/ascii/is_ascii.rs @@ -0,0 +1,81 @@ +use super::{LONG, MEDIUM, SHORT}; +use test::black_box; +use test::Bencher; + +macro_rules! benches { + ($( fn $name: ident($arg: ident: &[u8]) $body: block )+) => { + benches!(mod short SHORT[..] $($name $arg $body)+); + benches!(mod medium MEDIUM[..] $($name $arg $body)+); + benches!(mod long LONG[..] $($name $arg $body)+); + + benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+); + benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+); + benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+); + }; + + (mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => { + mod $mod_name { + use super::*; + $( + #[bench] + fn $name(bencher: &mut Bencher) { + bencher.bytes = $input[$range].len() as u64; + let mut vec = $input.as_bytes().to_vec(); + bencher.iter(|| { + black_box(&mut vec); + let $arg = black_box(&vec[$range]); + black_box($body) + }) + } + )+ + } + }; +} + +benches! { + fn case00_libcore(bytes: &[u8]) { + bytes.is_ascii() + } + + fn case01_iter_all(bytes: &[u8]) { + bytes.iter().all(|b| b.is_ascii()) + } + + fn case02_align_to(bytes: &[u8]) { + is_ascii_align_to(bytes) + } + + fn case03_align_to_unrolled(bytes: &[u8]) { + is_ascii_align_to_unrolled(bytes) + } +} + +// These are separate since it's easier to debug errors if they don't go through +// macro expansion first. +fn is_ascii_align_to(bytes: &[u8]) -> bool { + if bytes.len() < core::mem::size_of::() { + return bytes.iter().all(|b| b.is_ascii()); + } + // SAFETY: transmuting a sequence of `u8` to `usize` is always fine + let (head, body, tail) = unsafe { bytes.align_to::() }; + head.iter().all(|b| b.is_ascii()) + && body.iter().all(|w| !contains_nonascii(*w)) + && tail.iter().all(|b| b.is_ascii()) +} + +fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool { + if bytes.len() < core::mem::size_of::() { + return bytes.iter().all(|b| b.is_ascii()); + } + // SAFETY: transmuting a sequence of `u8` to `[usize; 2]` is always fine + let (head, body, tail) = unsafe { bytes.align_to::<[usize; 2]>() }; + head.iter().all(|b| b.is_ascii()) + && body.iter().all(|w| !contains_nonascii(w[0] | w[1])) + && tail.iter().all(|b| b.is_ascii()) +} + +#[inline] +fn contains_nonascii(v: usize) -> bool { + const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize; + (NONASCII_MASK & v) != 0 +}