Auto merge of #96605 - Urgau:string-retain-codegen, r=thomcc
Improve codegen of String::retain method This pull-request improve the codegen of the `String::retain` method. Using `unwrap_unchecked` helps the optimizer to not generate a panicking path that will never be taken for valid UTF-8 like string. Using `encode_utf8` saves us from an expensive call to `memcpy`, as the optimizer is unable to realize that `ch_len <= 4` and so can generate much better assembly code. https://rust.godbolt.org/z/z73ohenfc
This commit is contained in:
commit
4a86c7907b
@ -1469,19 +1469,28 @@ impl String {
|
|||||||
let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 };
|
let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 };
|
||||||
|
|
||||||
while guard.idx < len {
|
while guard.idx < len {
|
||||||
let ch = unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap() };
|
let ch =
|
||||||
|
// SAFETY: `guard.idx` is positive-or-zero and less that len so the `get_unchecked`
|
||||||
|
// is in bound. `self` is valid UTF-8 like string and the returned slice starts at
|
||||||
|
// a unicode code point so the `Chars` always return one character.
|
||||||
|
unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap_unchecked() };
|
||||||
let ch_len = ch.len_utf8();
|
let ch_len = ch.len_utf8();
|
||||||
|
|
||||||
if !f(ch) {
|
if !f(ch) {
|
||||||
guard.del_bytes += ch_len;
|
guard.del_bytes += ch_len;
|
||||||
} else if guard.del_bytes > 0 {
|
} else if guard.del_bytes > 0 {
|
||||||
unsafe {
|
// SAFETY: `guard.idx` is in bound and `guard.del_bytes` represent the number of
|
||||||
ptr::copy(
|
// bytes that are erased from the string so the resulting `guard.idx -
|
||||||
guard.s.vec.as_ptr().add(guard.idx),
|
// guard.del_bytes` always represent a valid unicode code point.
|
||||||
guard.s.vec.as_mut_ptr().add(guard.idx - guard.del_bytes),
|
//
|
||||||
ch_len,
|
// `guard.del_bytes` >= `ch.len_utf8()`, so taking a slice with `ch.len_utf8()` len
|
||||||
);
|
// is safe.
|
||||||
}
|
ch.encode_utf8(unsafe {
|
||||||
|
crate::slice::from_raw_parts_mut(
|
||||||
|
guard.s.as_mut_ptr().add(guard.idx - guard.del_bytes),
|
||||||
|
ch.len_utf8(),
|
||||||
|
)
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Point idx to the next char
|
// Point idx to the next char
|
||||||
|
Loading…
x
Reference in New Issue
Block a user