Simplify const memchr.

This commit is contained in:
Markus Reiter 2022-09-09 11:48:39 +02:00
parent c84083b08e
commit db29de7745
No known key found for this signature in database
GPG Key ID: 245293B51702655B

View File

@ -2,7 +2,6 @@
// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch // Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
use crate::cmp; use crate::cmp;
use crate::intrinsics;
use crate::mem; use crate::mem;
const LO_USIZE: usize = usize::repeat_u8(0x01); const LO_USIZE: usize = usize::repeat_u8(0x01);
@ -17,19 +16,19 @@ const USIZE_BYTES: usize = mem::size_of::<usize>();
/// bytes where the borrow propagated all the way to the most significant /// bytes where the borrow propagated all the way to the most significant
/// bit." /// bit."
#[inline] #[inline]
fn contains_zero_byte(x: usize) -> bool { const fn contains_zero_byte(x: usize) -> bool {
x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
} }
#[cfg(target_pointer_width = "16")] #[cfg(target_pointer_width = "16")]
#[inline] #[inline]
fn repeat_byte(b: u8) -> usize { const fn repeat_byte(b: u8) -> usize {
(b as usize) << 8 | b as usize (b as usize) << 8 | b as usize
} }
#[cfg(not(target_pointer_width = "16"))] #[cfg(not(target_pointer_width = "16"))]
#[inline] #[inline]
fn repeat_byte(b: u8) -> usize { const fn repeat_byte(b: u8) -> usize {
(b as usize) * (usize::MAX / 255) (b as usize) * (usize::MAX / 255)
} }
@ -37,33 +36,31 @@ fn repeat_byte(b: u8) -> usize {
#[must_use] #[must_use]
#[inline] #[inline]
pub const fn memchr(x: u8, text: &[u8]) -> Option<usize> { pub const fn memchr(x: u8, text: &[u8]) -> Option<usize> {
#[inline] // Fast path for small slices.
fn rt_impl(x: u8, text: &[u8]) -> Option<usize> { if text.len() < 2 * USIZE_BYTES {
// Fast path for small slices return memchr_naive(x, text);
if text.len() < 2 * USIZE_BYTES {
return text.iter().position(|elt| *elt == x);
}
memchr_general_case(x, text)
} }
const fn const_impl(x: u8, bytes: &[u8]) -> Option<usize> { memchr_aligned(x, text)
let mut i = 0;
while i < bytes.len() {
if bytes[i] == x {
return Some(i);
}
i += 1;
}
None
}
// SAFETY: The const and runtime versions have identical behavior
unsafe { intrinsics::const_eval_select((x, text), const_impl, rt_impl) }
} }
fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> { #[inline]
const fn memchr_naive(x: u8, text: &[u8]) -> Option<usize> {
let mut i = 0;
// FIXME(const-hack): Replace with `text.iter().pos(|c| *c == x)`.
while i < text.len() {
if text[i] == x {
return Some(i);
}
i += 1;
}
None
}
const fn memchr_aligned(x: u8, text: &[u8]) -> Option<usize> {
// Scan for a single byte value by reading two `usize` words at a time. // Scan for a single byte value by reading two `usize` words at a time.
// //
// Split `text` in three parts // Split `text` in three parts
@ -78,7 +75,7 @@ fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
if offset > 0 { if offset > 0 {
offset = cmp::min(offset, len); offset = cmp::min(offset, len);
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) { if let Some(index) = memchr_naive(x, &text[..offset]) {
return Some(index); return Some(index);
} }
} }
@ -103,7 +100,8 @@ fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
} }
// Find the byte after the point the body loop stopped. // Find the byte after the point the body loop stopped.
text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i) // FIXME(const-hack): Use `?` instead.
if let Some(i) = memchr_naive(x, &text[offset..]) { Some(offset + i) } else { None }
} }
/// Returns the last index matching the byte `x` in `text`. /// Returns the last index matching the byte `x` in `text`.