Add fast path for ascii to ascii in str::replace
This commit is contained in:
parent
a772336fb3
commit
4484085b18
@ -19,7 +19,7 @@
|
|||||||
pub use core::str::SplitWhitespace;
|
pub use core::str::SplitWhitespace;
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub use core::str::pattern;
|
pub use core::str::pattern;
|
||||||
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
|
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher, Utf8Pattern};
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub use core::str::{Bytes, CharIndices, Chars, from_utf8, from_utf8_mut};
|
pub use core::str::{Bytes, CharIndices, Chars, from_utf8, from_utf8_mut};
|
||||||
#[stable(feature = "str_escape", since = "1.34.0")]
|
#[stable(feature = "str_escape", since = "1.34.0")]
|
||||||
@ -268,6 +268,18 @@ pub fn into_boxed_bytes(self: Box<str>) -> Box<[u8]> {
|
|||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn replace<P: Pattern>(&self, from: P, to: &str) -> String {
|
pub fn replace<P: Pattern>(&self, from: P, to: &str) -> String {
|
||||||
|
// Fast path for ASCII to ASCII case.
|
||||||
|
|
||||||
|
if let Some(from_byte) = match from.as_utf8_pattern() {
|
||||||
|
Some(Utf8Pattern::StringPattern([from_byte])) => Some(*from_byte),
|
||||||
|
Some(Utf8Pattern::CharPattern(c)) => c.as_ascii().map(|ascii_char| ascii_char.to_u8()),
|
||||||
|
_ => None,
|
||||||
|
} {
|
||||||
|
if let [to_byte] = to.as_bytes() {
|
||||||
|
return unsafe { replace_ascii(self.as_bytes(), from_byte, *to_byte) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
let mut last_end = 0;
|
let mut last_end = 0;
|
||||||
for (start, part) in self.match_indices(from) {
|
for (start, part) in self.match_indices(from) {
|
||||||
@ -661,3 +673,14 @@ fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
|
|||||||
|
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
|
#[cfg(not(test))]
|
||||||
|
#[cfg(not(no_global_oom_handling))]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
/// Faster implementation of string replacement for ASCII to ASCII cases.
|
||||||
|
/// Should produce fast vectorized code.
|
||||||
|
unsafe fn replace_ascii(utf8_bytes: &[u8], from: u8, to: u8) -> String {
|
||||||
|
let result: Vec<u8> = utf8_bytes.iter().map(|b| if *b == from { to } else { *b }).collect();
|
||||||
|
// SAFETY: We replaced ascii with ascii on valid utf8 strings.
|
||||||
|
unsafe { String::from_utf8_unchecked(result) }
|
||||||
|
}
|
||||||
|
@ -53,7 +53,7 @@
|
|||||||
#[cfg(not(no_global_oom_handling))]
|
#[cfg(not(no_global_oom_handling))]
|
||||||
use core::ops::Bound::{Excluded, Included, Unbounded};
|
use core::ops::Bound::{Excluded, Included, Unbounded};
|
||||||
use core::ops::{self, Range, RangeBounds};
|
use core::ops::{self, Range, RangeBounds};
|
||||||
use core::str::pattern::Pattern;
|
use core::str::pattern::{Pattern, Utf8Pattern};
|
||||||
use core::{fmt, hash, ptr, slice};
|
use core::{fmt, hash, ptr, slice};
|
||||||
|
|
||||||
#[cfg(not(no_global_oom_handling))]
|
#[cfg(not(no_global_oom_handling))]
|
||||||
@ -2424,6 +2424,11 @@ fn strip_suffix_of<'a>(self, haystack: &'a str) -> Option<&'a str>
|
|||||||
{
|
{
|
||||||
self[..].strip_suffix_of(haystack)
|
self[..].strip_suffix_of(haystack)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
|
||||||
|
Some(Utf8Pattern::StringPattern(self.as_bytes()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! impl_eq {
|
macro_rules! impl_eq {
|
||||||
|
@ -160,6 +160,19 @@ fn strip_suffix_of<'a>(self, haystack: &'a str) -> Option<&'a str>
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the pattern as utf-8 bytes if possible.
|
||||||
|
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>>;
|
||||||
|
}
|
||||||
|
/// Result of calling [`Pattern::as_utf8_pattern()`].
|
||||||
|
/// Can be used for inspecting the contents of a [`Pattern`] in cases
|
||||||
|
/// where the underlying representation can be represented as UTF-8.
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||||
|
pub enum Utf8Pattern<'a> {
|
||||||
|
/// Type returned by String and str types.
|
||||||
|
StringPattern(&'a [u8]),
|
||||||
|
/// Type returned by char types.
|
||||||
|
CharPattern(char),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Searcher
|
// Searcher
|
||||||
@ -599,6 +612,11 @@ fn strip_suffix_of<'a>(self, haystack: &'a str) -> Option<&'a str>
|
|||||||
{
|
{
|
||||||
self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack)
|
self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
|
||||||
|
Some(Utf8Pattern::CharPattern(*self))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
@ -657,6 +675,11 @@ impl<C: MultiCharEq> Pattern for MultiCharEqPattern<C> {
|
|||||||
fn into_searcher(self, haystack: &str) -> MultiCharEqSearcher<'_, C> {
|
fn into_searcher(self, haystack: &str) -> MultiCharEqSearcher<'_, C> {
|
||||||
MultiCharEqSearcher { haystack, char_eq: self.0, char_indices: haystack.char_indices() }
|
MultiCharEqSearcher { haystack, char_eq: self.0, char_indices: haystack.char_indices() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
|
unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
|
||||||
@ -747,6 +770,11 @@ fn strip_suffix_of<$a>(self, haystack: &$a str) -> Option<&$a str>
|
|||||||
{
|
{
|
||||||
($pmap)(self).strip_suffix_of(haystack)
|
($pmap)(self).strip_suffix_of(haystack)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
|
||||||
|
None
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1022,6 +1050,11 @@ fn strip_suffix_of<'a>(self, haystack: &'a str) -> Option<&'a str>
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
|
||||||
|
Some(Utf8Pattern::StringPattern(self.as_bytes()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
|
Loading…
Reference in New Issue
Block a user