Rollup merge of #122326 - Zoxc:win-alloc-tweak, r=ChrisDenton

Optimize `process_heap_alloc`

This optimizes `process_heap_alloc` introduced in https://github.com/rust-lang/rust/pull/120205.

From:
```
.text:0000000180027ED0 ; std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93
.text:0000000180027ED0                 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E
.text:0000000180027ED0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E proc near
.text:0000000180027ED0                                         ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p
.text:0000000180027ED0                                         ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+55↑p ...
.text:0000000180027ED0                 push    rsi
.text:0000000180027ED1                 push    rdi
.text:0000000180027ED2                 sub     rsp, 28h
.text:0000000180027ED6                 mov     rsi, rdx
.text:0000000180027ED9                 mov     edi, ecx
.text:0000000180027EDB                 mov     rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62
.text:0000000180027EE2                 test    rcx, rcx
.text:0000000180027EE5                 jnz     short loc_180027EFC
.text:0000000180027EE7                 call    cs:__imp_GetProcessHeap
.text:0000000180027EED                 test    rax, rax
.text:0000000180027EF0                 jz      short loc_180027F0E
.text:0000000180027EF2                 mov     rcx, rax
.text:0000000180027EF5                 mov     cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E, rax ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62
.text:0000000180027EFC
.text:0000000180027EFC loc_180027EFC:                          ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+15↑j
.text:0000000180027EFC                 mov     edx, edi
.text:0000000180027EFE                 mov     r8, rsi
.text:0000000180027F01                 add     rsp, 28h
.text:0000000180027F05                 pop     rdi
.text:0000000180027F06                 pop     rsi
.text:0000000180027F07                 jmp     cs:__imp_HeapAlloc
.text:0000000180027F0E ; ---------------------------------------------------------------------------
.text:0000000180027F0E
.text:0000000180027F0E loc_180027F0E:                          ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+20↑j
.text:0000000180027F0E                 xor     eax, eax
.text:0000000180027F10                 add     rsp, 28h
.text:0000000180027F14                 pop     rdi
.text:0000000180027F15                 pop     rsi
.text:0000000180027F16                 retn
.text:0000000180027F16 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E endp
```
to
```
.text:0000000180027EE0 ; std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16
.text:0000000180027EE0                 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E
.text:0000000180027EE0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E proc near
.text:0000000180027EE0                                         ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p
.text:0000000180027EE0                                         ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+54↑p ...
.text:0000000180027EE0                 mov     rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62
.text:0000000180027EE7                 test    rcx, rcx
.text:0000000180027EEA                 jz      short loc_180027EF3
.text:0000000180027EEC                 jmp     cs:__imp_HeapAlloc
.text:0000000180027EF3 ; ---------------------------------------------------------------------------
.text:0000000180027EF3
.text:0000000180027EF3 loc_180027EF3:                          ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16+A↑j
.text:0000000180027EF3                 mov     ecx, edx
.text:0000000180027EF5                 mov     rdx, r8
.text:0000000180027EF8                 jmp     std__sys__pal__windows__alloc__process_heap_init_and_alloc
.text:0000000180027EF8 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E endp
```

r? `@ChrisDenton`
This commit is contained in:
Jacob Pratt 2024-03-11 03:47:22 -04:00 committed by GitHub
commit 5a3d6c91b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6,6 +6,7 @@
use crate::sync::atomic::{AtomicPtr, Ordering};
use crate::sys::c;
use crate::sys::common::alloc::{realloc_fallback, MIN_ALIGN};
use core::mem::MaybeUninit;
#[cfg(test)]
mod tests;
@ -94,29 +95,30 @@ fn HeapReAlloc(
// a non-null handle returned by `GetProcessHeap`.
#[inline]
fn init_or_get_process_heap() -> c::HANDLE {
let heap = HEAP.load(Ordering::Relaxed);
if core::intrinsics::unlikely(heap.is_null()) {
// `HEAP` has not yet been successfully initialized
let heap = unsafe { GetProcessHeap() };
if !heap.is_null() {
// SAFETY: No locking is needed because within the same process,
// successful calls to `GetProcessHeap` will always return the same value, even on different threads.
HEAP.store(heap, Ordering::Release);
// `HEAP` has not yet been successfully initialized
let heap = unsafe { GetProcessHeap() };
if !heap.is_null() {
// SAFETY: No locking is needed because within the same process,
// successful calls to `GetProcessHeap` will always return the same value, even on different threads.
HEAP.store(heap, Ordering::Release);
// SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap`
heap
} else {
// Could not get the current process heap.
ptr::null_mut()
}
} else {
// SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap`
heap
} else {
// Could not get the current process heap.
ptr::null_mut()
}
}
/// This is outlined from `process_heap_alloc` so that `process_heap_alloc`
/// does not need any stack allocations.
#[inline(never)]
fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID {
#[cold]
extern "C" fn process_heap_init_and_alloc(
_heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`
flags: c::DWORD,
dwBytes: c::SIZE_T,
) -> c::LPVOID {
let heap = init_or_get_process_heap();
if core::intrinsics::unlikely(heap.is_null()) {
return ptr::null_mut();
@ -125,6 +127,21 @@ fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID {
unsafe { HeapAlloc(heap, flags, dwBytes) }
}
#[inline(never)]
fn process_heap_alloc(
_heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`,
flags: c::DWORD,
dwBytes: c::SIZE_T,
) -> c::LPVOID {
let heap = HEAP.load(Ordering::Relaxed);
if core::intrinsics::likely(!heap.is_null()) {
// SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`.
unsafe { HeapAlloc(heap, flags, dwBytes) }
} else {
process_heap_init_and_alloc(MaybeUninit::uninit(), flags, dwBytes)
}
}
// Get a non-null handle to the default heap of the current process.
// SAFETY: `HEAP` must have been successfully initialized.
#[inline]
@ -148,12 +165,12 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 {
if layout.align() <= MIN_ALIGN {
// The returned pointer points to the start of an allocated block.
process_heap_alloc(flags, layout.size()) as *mut u8
process_heap_alloc(MaybeUninit::uninit(), flags, layout.size()) as *mut u8
} else {
// Allocate extra padding in order to be able to satisfy the alignment.
let total = layout.align() + layout.size();
let ptr = process_heap_alloc(flags, total) as *mut u8;
let ptr = process_heap_alloc(MaybeUninit::uninit(), flags, total) as *mut u8;
if ptr.is_null() {
// Allocation has failed.
return ptr::null_mut();