Auto merge of #123878 - jwong101:inplacecollect, r=jhpratt
optimize inplace collection of Vec
This PR has the following changes:
1. Using `usize::unchecked_mul` in 79424056b0/library/alloc/src/vec/in_place_collect.rs (L262)
as LLVM, does not know that the operation can't wrap, since that's the size of the original allocation.
Given the following:
```rust
pub struct Foo([usize; 3]);
pub fn unwrap_copy(v: Vec<Foo>) -> Vec<[usize; 3]> {
v.into_iter().map(|f| f.0).collect()
}
```
<details>
<summary>Before this commit:</summary>
```llvm
define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
%me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
%me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
%me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
%me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
%me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
%_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
%0 = udiv i64 %_19.i.idx, 24
; Unnecessary calculation
%_16.i.i = mul i64 %me.sroa.0.0.copyload.i, 24
%dst_cap.i.i = udiv i64 %_16.i.i, 24
store i64 %dst_cap.i.i, ptr %_0, align 8
%1 = getelementptr inbounds i8, ptr %_0, i64 8
store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
%2 = getelementptr inbounds i8, ptr %_0, i64 16
store i64 %0, ptr %2, align 8
ret void
}
```
</details>
<details>
<summary>After:</summary>
```llvm
define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
%me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
%me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
%me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
%me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
%me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
%_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
%0 = udiv i64 %_19.i.idx, 24
store i64 %me.sroa.0.0.copyload.i, ptr %_0, align 8
%1 = getelementptr inbounds i8, ptr %_0, i64 8
store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
%2 = getelementptr inbounds i8, ptr %_0, i64 16
store i64 %0, ptr %2, align 8, !alias.scope !9, !noalias !14
ret void
}
```
</details>
Note that there is still one more `mul,udiv` pair that I couldn't get
rid of. The root cause is the same issue as https://github.com/rust-lang/rust/issues/121239, the `nuw` gets
stripped off of `ptr::sub_ptr`.
2.
`Iterator::try_fold` gets called on the underlying Iterator in
`SpecInPlaceCollect::collect_in_place` whenever it does not implement
`TrustedRandomAccess`. For types that impl `Drop`, LLVM currently can't
tell that the drop can never occur, when using the default
`Iterator::try_fold` implementation.
For example, given the following code from #120493
```rust
#[repr(transparent)]
struct WrappedClone {
inner: String
}
#[no_mangle]
pub fn unwrap_clone(list: Vec<WrappedClone>) -> Vec<String> {
list.into_iter().map(|s| s.inner).collect()
}
```
<details>
<summary>The asm for the `unwrap_clone` method is currently:</summary>
```asm
unwrap_clone:
push rbp
push r15
push r14
push r13
push r12
push rbx
push rax
mov rbx, rdi
mov r12, qword ptr [rsi]
mov rdi, qword ptr [rsi + 8]
mov rax, qword ptr [rsi + 16]
movabs rsi, -6148914691236517205
mov r14, r12
test rax, rax
je .LBB0_10
lea rcx, [rax + 2*rax]
lea r14, [r12 + 8*rcx]
shl rax, 3
lea rax, [rax + 2*rax]
xor ecx, ecx
.LBB0_2:
cmp qword ptr [r12 + rcx], 0
je .LBB0_4
add rcx, 24
cmp rax, rcx
jne .LBB0_2
jmp .LBB0_10
.LBB0_4:
lea rdx, [rax - 24]
lea r14, [r12 + rcx]
cmp rdx, rcx
je .LBB0_10
mov qword ptr [rsp], rdi
sub rax, rcx
add rax, -24
mul rsi
mov r15, rdx
lea rbp, [r12 + rcx]
add rbp, 32
shr r15, 4
mov r13, qword ptr [rip + __rust_dealloc@GOTPCREL]
jmp .LBB0_6
.LBB0_8:
add rbp, 24
dec r15
je .LBB0_9
.LBB0_6:
mov rsi, qword ptr [rbp]
test rsi, rsi
je .LBB0_8
mov rdi, qword ptr [rbp - 8]
mov edx, 1
call r13
jmp .LBB0_8
.LBB0_9:
mov rdi, qword ptr [rsp]
movabs rsi, -6148914691236517205
.LBB0_10:
sub r14, r12
mov rax, r14
mul rsi
shr rdx, 4
mov qword ptr [rbx], r12
mov qword ptr [rbx + 8], rdi
mov qword ptr [rbx + 16], rdx
mov rax, rbx
add rsp, 8
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
```
</details>
<details>
<summary>After this PR:</summary>
```asm
unwrap_clone:
mov rax, rdi
movups xmm0, xmmword ptr [rsi]
mov rcx, qword ptr [rsi + 16]
movups xmmword ptr [rdi], xmm0
mov qword ptr [rdi + 16], rcx
ret
```
</details>
Fixes https://github.com/rust-lang/rust/issues/120493
This commit is contained in:
commit
12075f04e6
@ -160,6 +160,7 @@
|
||||
#![feature(tuple_trait)]
|
||||
#![feature(unicode_internals)]
|
||||
#![feature(unsize)]
|
||||
#![feature(unwrap_infallible)]
|
||||
#![feature(vec_pop_if)]
|
||||
// tidy-alphabetical-end
|
||||
//
|
||||
|
@ -259,7 +259,8 @@ where
|
||||
inner.cap,
|
||||
inner.buf.cast::<T>(),
|
||||
inner.end as *const T,
|
||||
inner.cap * mem::size_of::<I::Src>() / mem::size_of::<T>(),
|
||||
// SAFETY: the multiplication can not overflow, since `inner.cap * size_of::<I::SRC>()` is the size of the allocation.
|
||||
inner.cap.unchecked_mul(mem::size_of::<I::Src>()) / mem::size_of::<T>(),
|
||||
)
|
||||
};
|
||||
|
||||
@ -374,7 +375,7 @@ where
|
||||
// - it lets us thread the write pointer through its innards and get it back in the end
|
||||
let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf };
|
||||
let sink =
|
||||
self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap();
|
||||
self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).into_ok();
|
||||
// iteration succeeded, don't drop head
|
||||
unsafe { ManuallyDrop::new(sink).dst.sub_ptr(dst_buf) }
|
||||
}
|
||||
|
@ -289,6 +289,60 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
|
||||
};
|
||||
}
|
||||
|
||||
fn fold<B, F>(mut self, mut accum: B, mut f: F) -> B
|
||||
where
|
||||
F: FnMut(B, Self::Item) -> B,
|
||||
{
|
||||
if T::IS_ZST {
|
||||
while self.ptr.as_ptr() != self.end.cast_mut() {
|
||||
// SAFETY: we just checked that `self.ptr` is in bounds.
|
||||
let tmp = unsafe { self.ptr.read() };
|
||||
// See `next` for why we subtract from `end` here.
|
||||
self.end = self.end.wrapping_byte_sub(1);
|
||||
accum = f(accum, tmp);
|
||||
}
|
||||
} else {
|
||||
// SAFETY: `self.end` can only be null if `T` is a ZST.
|
||||
while self.ptr != non_null!(self.end, T) {
|
||||
// SAFETY: we just checked that `self.ptr` is in bounds.
|
||||
let tmp = unsafe { self.ptr.read() };
|
||||
// SAFETY: the maximum this can be is `self.end`.
|
||||
// Increment `self.ptr` first to avoid double dropping in the event of a panic.
|
||||
self.ptr = unsafe { self.ptr.add(1) };
|
||||
accum = f(accum, tmp);
|
||||
}
|
||||
}
|
||||
accum
|
||||
}
|
||||
|
||||
fn try_fold<B, F, R>(&mut self, mut accum: B, mut f: F) -> R
|
||||
where
|
||||
Self: Sized,
|
||||
F: FnMut(B, Self::Item) -> R,
|
||||
R: core::ops::Try<Output = B>,
|
||||
{
|
||||
if T::IS_ZST {
|
||||
while self.ptr.as_ptr() != self.end.cast_mut() {
|
||||
// SAFETY: we just checked that `self.ptr` is in bounds.
|
||||
let tmp = unsafe { self.ptr.read() };
|
||||
// See `next` for why we subtract from `end` here.
|
||||
self.end = self.end.wrapping_byte_sub(1);
|
||||
accum = f(accum, tmp)?;
|
||||
}
|
||||
} else {
|
||||
// SAFETY: `self.end` can only be null if `T` is a ZST.
|
||||
while self.ptr != non_null!(self.end, T) {
|
||||
// SAFETY: we just checked that `self.ptr` is in bounds.
|
||||
let tmp = unsafe { self.ptr.read() };
|
||||
// SAFETY: the maximum this can be is `self.end`.
|
||||
// Increment `self.ptr` first to avoid double dropping in the event of a panic.
|
||||
self.ptr = unsafe { self.ptr.add(1) };
|
||||
accum = f(accum, tmp)?;
|
||||
}
|
||||
}
|
||||
R::from_output(accum)
|
||||
}
|
||||
|
||||
unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item
|
||||
where
|
||||
Self: TrustedRandomAccessNoCoerce,
|
||||
|
Loading…
x
Reference in New Issue
Block a user