Tune the is_ascii
implementation used for short slices
This commit is contained in:
parent
1cfcf71e04
commit
c8c5a587ac
@ -268,6 +268,24 @@ const fn contains_nonascii(v: usize) -> bool {
|
||||
(NONASCII_MASK & v) != 0
|
||||
}
|
||||
|
||||
/// ASCII test *without* the chunk-at-a-time optimizations.
|
||||
///
|
||||
/// This is carefully structured to produce nice small code -- it's smaller in
|
||||
/// `-O` than what the "obvious" ways produces under `-C opt-level=s`. If you
|
||||
/// touch it, be sure to run (and update if needed) the assembly test.
|
||||
#[unstable(feature = "str_internals", issue = "none")]
|
||||
#[doc(hidden)]
|
||||
#[inline]
|
||||
pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
|
||||
while let [rest @ .., last] = bytes {
|
||||
if !last.is_ascii() {
|
||||
break;
|
||||
}
|
||||
bytes = rest;
|
||||
}
|
||||
bytes.is_empty()
|
||||
}
|
||||
|
||||
/// Optimized ASCII test that will use usize-at-a-time operations instead of
|
||||
/// byte-at-a-time operations (when possible).
|
||||
///
|
||||
@ -293,16 +311,7 @@ const fn is_ascii(s: &[u8]) -> bool {
|
||||
// We also do this for architectures where `size_of::<usize>()` isn't
|
||||
// sufficient alignment for `usize`, because it's a weird edge case.
|
||||
if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem::align_of::<usize>() {
|
||||
// FIXME: once iterators and closures can be used in `const fn`,
|
||||
// return s.iter().all(|b| b.is_ascii());
|
||||
let mut i = 0;
|
||||
while i < len {
|
||||
if !s[i].is_ascii() {
|
||||
return false;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return true;
|
||||
return is_ascii_simple(s);
|
||||
}
|
||||
|
||||
// We always read the first word unaligned, which means `align_offset` is
|
||||
|
@ -44,6 +44,10 @@ mod raw;
|
||||
mod rotate;
|
||||
mod specialize;
|
||||
|
||||
#[unstable(feature = "str_internals", issue = "none")]
|
||||
#[doc(hidden)]
|
||||
pub use ascii::is_ascii_simple;
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub use iter::{Chunks, ChunksMut, Windows};
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
|
35
tests/assembly/slice-is_ascii.rs
Normal file
35
tests/assembly/slice-is_ascii.rs
Normal file
@ -0,0 +1,35 @@
|
||||
// revisions: WIN LIN
|
||||
// [WIN] only-windows
|
||||
// [LIN] only-linux
|
||||
// assembly-output: emit-asm
|
||||
// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel
|
||||
// min-llvm-version: 14
|
||||
// only-x86_64
|
||||
// ignore-sgx
|
||||
// ignore-debug
|
||||
|
||||
#![feature(str_internals)]
|
||||
|
||||
// CHECK-LABEL: is_ascii_simple_demo:
|
||||
#[no_mangle]
|
||||
pub fn is_ascii_simple_demo(bytes: &[u8]) -> bool {
|
||||
// Linux (System V): pointer is rdi; length is rsi
|
||||
// Windows: pointer is rcx; length is rdx.
|
||||
|
||||
// CHECK-NOT: mov
|
||||
// CHECK-NOT: test
|
||||
// CHECK-NOT: cmp
|
||||
|
||||
// CHECK: .[[LOOPHEAD:.+]]:
|
||||
// CHECK-NEXT: mov [[TEMP:.+]], [[LEN:rsi|rdx]]
|
||||
// CHECK-NEXT: sub [[LEN]], 1
|
||||
// CHECK-NEXT: jb .[[LOOPEXIT:.+]]
|
||||
// CHECK-NEXT: cmp byte ptr [{{rdi|rcx}} + [[TEMP]] - 1], 0
|
||||
// CHECK-NEXT: jns .[[LOOPHEAD]]
|
||||
|
||||
// CHECK-NEXT: .[[LOOPEXIT]]:
|
||||
// CHECK-NEXT: test [[TEMP]], [[TEMP]]
|
||||
// CHECK-NEXT: sete al
|
||||
// CHECK-NEXT: ret
|
||||
core::slice::is_ascii_simple(bytes)
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user