Add 'unrolled' is_ascii_align_to benchmark, and move is_ascii benchmarks into own file
This commit is contained in:
parent
13e380d798
commit
dc4a644980
@ -1,3 +1,5 @@
|
||||
mod is_ascii;
|
||||
|
||||
// Lower-case ASCII 'a' is the first byte that has its highest bit set
|
||||
// after wrap-adding 0x1F:
|
||||
//
|
||||
@ -59,48 +61,6 @@ macro_rules! benches {
|
||||
)+
|
||||
}
|
||||
};
|
||||
|
||||
// For some tests the vec allocation tends to dominate, so it can be avoided.
|
||||
(@readonly $( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
|
||||
benches!(@ro mod short_readonly SHORT $($name $arg $body)+);
|
||||
benches!(@ro mod medium_readonly MEDIUM $($name $arg $body)+);
|
||||
benches!(@ro mod long_readonly LONG $($name $arg $body)+);
|
||||
// Add another `MEDIUM` bench, but trim the ends so that we can (try to)
|
||||
// benchmark a case where the function has to handle misalignment.
|
||||
mod medium_unaligned {
|
||||
use super::*;
|
||||
$(
|
||||
#[bench]
|
||||
fn $name(bencher: &mut Bencher) {
|
||||
bencher.bytes = MEDIUM.len() as u64 - 2;
|
||||
let mut vec = MEDIUM.as_bytes().to_vec();
|
||||
bencher.iter(|| {
|
||||
black_box(&mut vec);
|
||||
let $arg = black_box(&vec[1..(vec.len() - 1)]);
|
||||
black_box($body)
|
||||
})
|
||||
}
|
||||
)+
|
||||
}
|
||||
};
|
||||
(@ro mod $mod_name: ident $input: ident $($name: ident $arg: ident $body: block)+) => {
|
||||
mod $mod_name {
|
||||
use super::*;
|
||||
|
||||
$(
|
||||
#[bench]
|
||||
fn $name(bencher: &mut Bencher) {
|
||||
bencher.bytes = $input.len() as u64;
|
||||
let mut vec = $input.as_bytes().to_vec();
|
||||
bencher.iter(|| {
|
||||
black_box(&mut vec);
|
||||
let $arg = black_box(&vec[..]);
|
||||
black_box($body)
|
||||
})
|
||||
}
|
||||
)+
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
use test::black_box;
|
||||
@ -287,40 +247,6 @@ benches! {
|
||||
is_ascii_control,
|
||||
}
|
||||
|
||||
benches! {
|
||||
@readonly
|
||||
fn is_ascii_slice_libcore(bytes: &[u8]) {
|
||||
bytes.is_ascii()
|
||||
}
|
||||
|
||||
fn is_ascii_slice_iter_all(bytes: &[u8]) {
|
||||
bytes.iter().all(|b| b.is_ascii())
|
||||
}
|
||||
|
||||
fn is_ascii_slice_align_to(bytes: &[u8]) {
|
||||
is_ascii_align_to_impl(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
// Separate since it's easier to debug errors if they don't go through macro
|
||||
// expansion first.
|
||||
fn is_ascii_align_to_impl(bytes: &[u8]) -> bool {
|
||||
if bytes.len() < core::mem::size_of::<usize>() {
|
||||
return bytes.iter().all(|b| b.is_ascii());
|
||||
}
|
||||
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
|
||||
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
|
||||
head.iter().all(|b| b.is_ascii())
|
||||
&& body.iter().all(|w| !contains_nonascii(*w))
|
||||
&& tail.iter().all(|b| b.is_ascii())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains_nonascii(v: usize) -> bool {
|
||||
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
|
||||
(NONASCII_MASK & v) != 0
|
||||
}
|
||||
|
||||
macro_rules! repeat {
|
||||
($s: expr) => {
|
||||
concat!($s, $s, $s, $s, $s, $s, $s, $s, $s, $s)
|
||||
|
81
src/libcore/benches/ascii/is_ascii.rs
Normal file
81
src/libcore/benches/ascii/is_ascii.rs
Normal file
@ -0,0 +1,81 @@
|
||||
use super::{LONG, MEDIUM, SHORT};
|
||||
use test::black_box;
|
||||
use test::Bencher;
|
||||
|
||||
macro_rules! benches {
|
||||
($( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
|
||||
benches!(mod short SHORT[..] $($name $arg $body)+);
|
||||
benches!(mod medium MEDIUM[..] $($name $arg $body)+);
|
||||
benches!(mod long LONG[..] $($name $arg $body)+);
|
||||
|
||||
benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+);
|
||||
benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+);
|
||||
benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+);
|
||||
};
|
||||
|
||||
(mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => {
|
||||
mod $mod_name {
|
||||
use super::*;
|
||||
$(
|
||||
#[bench]
|
||||
fn $name(bencher: &mut Bencher) {
|
||||
bencher.bytes = $input[$range].len() as u64;
|
||||
let mut vec = $input.as_bytes().to_vec();
|
||||
bencher.iter(|| {
|
||||
black_box(&mut vec);
|
||||
let $arg = black_box(&vec[$range]);
|
||||
black_box($body)
|
||||
})
|
||||
}
|
||||
)+
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
benches! {
|
||||
fn case00_libcore(bytes: &[u8]) {
|
||||
bytes.is_ascii()
|
||||
}
|
||||
|
||||
fn case01_iter_all(bytes: &[u8]) {
|
||||
bytes.iter().all(|b| b.is_ascii())
|
||||
}
|
||||
|
||||
fn case02_align_to(bytes: &[u8]) {
|
||||
is_ascii_align_to(bytes)
|
||||
}
|
||||
|
||||
fn case03_align_to_unrolled(bytes: &[u8]) {
|
||||
is_ascii_align_to_unrolled(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
// These are separate since it's easier to debug errors if they don't go through
|
||||
// macro expansion first.
|
||||
fn is_ascii_align_to(bytes: &[u8]) -> bool {
|
||||
if bytes.len() < core::mem::size_of::<usize>() {
|
||||
return bytes.iter().all(|b| b.is_ascii());
|
||||
}
|
||||
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
|
||||
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
|
||||
head.iter().all(|b| b.is_ascii())
|
||||
&& body.iter().all(|w| !contains_nonascii(*w))
|
||||
&& tail.iter().all(|b| b.is_ascii())
|
||||
}
|
||||
|
||||
fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {
|
||||
if bytes.len() < core::mem::size_of::<usize>() {
|
||||
return bytes.iter().all(|b| b.is_ascii());
|
||||
}
|
||||
// SAFETY: transmuting a sequence of `u8` to `[usize; 2]` is always fine
|
||||
let (head, body, tail) = unsafe { bytes.align_to::<[usize; 2]>() };
|
||||
head.iter().all(|b| b.is_ascii())
|
||||
&& body.iter().all(|w| !contains_nonascii(w[0] | w[1]))
|
||||
&& tail.iter().all(|b| b.is_ascii())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains_nonascii(v: usize) -> bool {
|
||||
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
|
||||
(NONASCII_MASK & v) != 0
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user