optimize in_place_collect with vec::IntoIter::try_fold

`Iterator::try_fold` gets called on the underlying Iterator in
`SpecInPlaceCollect::collect_in_place` whenever it does not implement
`TrustedRandomAccess`. For types that impl `Drop`, LLVM currently can't
tell that the drop can never occur, when using the default
`Iterator::try_fold` implementation.

For example, the asm from the `unwrap_clone` method is currently:

```
unwrap_clone:
        push    rbp
        push    r15
        push    r14
        push    r13
        push    r12
        push    rbx
        push    rax
        mov     rbx, rdi
        mov     r12, qword ptr [rsi]
        mov     rdi, qword ptr [rsi + 8]
        mov     rax, qword ptr [rsi + 16]
        movabs  rsi, -6148914691236517205
        mov     r14, r12
        test    rax, rax
        je      .LBB0_10
        lea     rcx, [rax + 2*rax]
        lea     r14, [r12 + 8*rcx]
        shl     rax, 3
        lea     rax, [rax + 2*rax]
        xor     ecx, ecx
.LBB0_2:
        cmp     qword ptr [r12 + rcx], 0
        je      .LBB0_4
        add     rcx, 24
        cmp     rax, rcx
        jne     .LBB0_2
        jmp     .LBB0_10
.LBB0_4:
        lea     rdx, [rax - 24]
        lea     r14, [r12 + rcx]
        cmp     rdx, rcx
        je      .LBB0_10
        mov     qword ptr [rsp], rdi
        sub     rax, rcx
        add     rax, -24
        mul     rsi
        mov     r15, rdx
        lea     rbp, [r12 + rcx]
        add     rbp, 32
        shr     r15, 4
        mov     r13, qword ptr [rip + __rust_dealloc@GOTPCREL]
        jmp     .LBB0_6
.LBB0_8:
        add     rbp, 24
        dec     r15
        je      .LBB0_9
.LBB0_6:
        mov     rsi, qword ptr [rbp]
        test    rsi, rsi
        je      .LBB0_8
        mov     rdi, qword ptr [rbp - 8]
        mov     edx, 1
        call    r13
        jmp     .LBB0_8
.LBB0_9:
        mov     rdi, qword ptr [rsp]
        movabs  rsi, -6148914691236517205
.LBB0_10:
        sub     r14, r12
        mov     rax, r14
        mul     rsi
        shr     rdx, 4
        mov     qword ptr [rbx], r12
        mov     qword ptr [rbx + 8], rdi
        mov     qword ptr [rbx + 16], rdx
        mov     rax, rbx
        add     rsp, 8
        pop     rbx
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        pop     rbp
        ret
```

After this PR:

```
unwrap_clone:
	mov	rax, rdi
	movups	xmm0, xmmword ptr [rsi]
	mov	rcx, qword ptr [rsi + 16]
	movups	xmmword ptr [rdi], xmm0
	mov	qword ptr [rdi + 16], rcx
	ret
```

Fixes #120493
This commit is contained in:
Joshua Wong 2024-04-12 18:36:31 -05:00
parent c585541e67
commit 6165dca6db

View File

@ -289,6 +289,35 @@ fn count(self) -> usize {
};
}
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R
where
Self: Sized,
F: FnMut(B, Self::Item) -> R,
R: core::ops::Try<Output = B>,
{
let mut accum = init;
if T::IS_ZST {
while self.ptr.as_ptr() != self.end.cast_mut() {
// SAFETY: we just checked that `self.ptr` is in bounds.
let tmp = unsafe { self.ptr.read() };
// See `next` for why we subtract from `end` here.
self.end = self.end.wrapping_byte_sub(1);
accum = f(accum, tmp)?;
}
} else {
// SAFETY: `self.end` can only be null if `T` is a ZST.
while self.ptr != non_null!(self.end, T) {
// SAFETY: we just checked that `self.ptr` is in bounds.
let tmp = unsafe { self.ptr.read() };
// SAFETY: the maximum this can be is `self.end`.
// Increment `self.ptr` first to avoid double dropping in the event of a panic.
self.ptr = unsafe { self.ptr.add(1) };
accum = f(accum, tmp)?;
}
}
R::from_output(accum)
}
unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item
where
Self: TrustedRandomAccessNoCoerce,