Add 'unrolled' is_ascii_align_to benchmark, and move is_ascii benchmarks into own file

This commit is contained in:
Thom Chiovoloni 2020-07-05 15:52:13 -07:00
parent 13e380d798
commit dc4a644980
2 changed files with 83 additions and 76 deletions

View File

@ -1,3 +1,5 @@
mod is_ascii;
// Lower-case ASCII 'a' is the first byte that has its highest bit set
// after wrap-adding 0x1F:
//
@ -59,48 +61,6 @@ macro_rules! benches {
)+
}
};
// For some tests the vec allocation tends to dominate, so it can be avoided.
(@readonly $( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
benches!(@ro mod short_readonly SHORT $($name $arg $body)+);
benches!(@ro mod medium_readonly MEDIUM $($name $arg $body)+);
benches!(@ro mod long_readonly LONG $($name $arg $body)+);
// Add another `MEDIUM` bench, but trim the ends so that we can (try to)
// benchmark a case where the function has to handle misalignment.
mod medium_unaligned {
use super::*;
$(
#[bench]
fn $name(bencher: &mut Bencher) {
bencher.bytes = MEDIUM.len() as u64 - 2;
let mut vec = MEDIUM.as_bytes().to_vec();
bencher.iter(|| {
black_box(&mut vec);
let $arg = black_box(&vec[1..(vec.len() - 1)]);
black_box($body)
})
}
)+
}
};
(@ro mod $mod_name: ident $input: ident $($name: ident $arg: ident $body: block)+) => {
mod $mod_name {
use super::*;
$(
#[bench]
fn $name(bencher: &mut Bencher) {
bencher.bytes = $input.len() as u64;
let mut vec = $input.as_bytes().to_vec();
bencher.iter(|| {
black_box(&mut vec);
let $arg = black_box(&vec[..]);
black_box($body)
})
}
)+
}
};
}
use test::black_box;
@ -287,40 +247,6 @@ benches! {
is_ascii_control,
}
benches! {
@readonly
fn is_ascii_slice_libcore(bytes: &[u8]) {
bytes.is_ascii()
}
fn is_ascii_slice_iter_all(bytes: &[u8]) {
bytes.iter().all(|b| b.is_ascii())
}
fn is_ascii_slice_align_to(bytes: &[u8]) {
is_ascii_align_to_impl(bytes)
}
}
// Separate since it's easier to debug errors if they don't go through macro
// expansion first.
fn is_ascii_align_to_impl(bytes: &[u8]) -> bool {
if bytes.len() < core::mem::size_of::<usize>() {
return bytes.iter().all(|b| b.is_ascii());
}
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
head.iter().all(|b| b.is_ascii())
&& body.iter().all(|w| !contains_nonascii(*w))
&& tail.iter().all(|b| b.is_ascii())
}
#[inline]
fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
(NONASCII_MASK & v) != 0
}
macro_rules! repeat {
($s: expr) => {
concat!($s, $s, $s, $s, $s, $s, $s, $s, $s, $s)

View File

@ -0,0 +1,81 @@
use super::{LONG, MEDIUM, SHORT};
use test::black_box;
use test::Bencher;
macro_rules! benches {
($( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
benches!(mod short SHORT[..] $($name $arg $body)+);
benches!(mod medium MEDIUM[..] $($name $arg $body)+);
benches!(mod long LONG[..] $($name $arg $body)+);
benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+);
benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+);
benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+);
};
(mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => {
mod $mod_name {
use super::*;
$(
#[bench]
fn $name(bencher: &mut Bencher) {
bencher.bytes = $input[$range].len() as u64;
let mut vec = $input.as_bytes().to_vec();
bencher.iter(|| {
black_box(&mut vec);
let $arg = black_box(&vec[$range]);
black_box($body)
})
}
)+
}
};
}
benches! {
fn case00_libcore(bytes: &[u8]) {
bytes.is_ascii()
}
fn case01_iter_all(bytes: &[u8]) {
bytes.iter().all(|b| b.is_ascii())
}
fn case02_align_to(bytes: &[u8]) {
is_ascii_align_to(bytes)
}
fn case03_align_to_unrolled(bytes: &[u8]) {
is_ascii_align_to_unrolled(bytes)
}
}
// These are separate since it's easier to debug errors if they don't go through
// macro expansion first.
fn is_ascii_align_to(bytes: &[u8]) -> bool {
if bytes.len() < core::mem::size_of::<usize>() {
return bytes.iter().all(|b| b.is_ascii());
}
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
head.iter().all(|b| b.is_ascii())
&& body.iter().all(|w| !contains_nonascii(*w))
&& tail.iter().all(|b| b.is_ascii())
}
fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {
if bytes.len() < core::mem::size_of::<usize>() {
return bytes.iter().all(|b| b.is_ascii());
}
// SAFETY: transmuting a sequence of `u8` to `[usize; 2]` is always fine
let (head, body, tail) = unsafe { bytes.align_to::<[usize; 2]>() };
head.iter().all(|b| b.is_ascii())
&& body.iter().all(|w| !contains_nonascii(w[0] | w[1]))
&& tail.iter().all(|b| b.is_ascii())
}
#[inline]
fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
(NONASCII_MASK & v) != 0
}