Rollup merge of #122326 - Zoxc:win-alloc-tweak, r=ChrisDenton
Optimize `process_heap_alloc` This optimizes `process_heap_alloc` introduced in https://github.com/rust-lang/rust/pull/120205. From: ``` .text:0000000180027ED0 ; std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93 .text:0000000180027ED0 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E .text:0000000180027ED0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E proc near .text:0000000180027ED0 ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p .text:0000000180027ED0 ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+55↑p ... .text:0000000180027ED0 push rsi .text:0000000180027ED1 push rdi .text:0000000180027ED2 sub rsp, 28h .text:0000000180027ED6 mov rsi, rdx .text:0000000180027ED9 mov edi, ecx .text:0000000180027EDB mov rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EE2 test rcx, rcx .text:0000000180027EE5 jnz short loc_180027EFC .text:0000000180027EE7 call cs:__imp_GetProcessHeap .text:0000000180027EED test rax, rax .text:0000000180027EF0 jz short loc_180027F0E .text:0000000180027EF2 mov rcx, rax .text:0000000180027EF5 mov cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E, rax ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EFC .text:0000000180027EFC loc_180027EFC: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+15↑j .text:0000000180027EFC mov edx, edi .text:0000000180027EFE mov r8, rsi .text:0000000180027F01 add rsp, 28h .text:0000000180027F05 pop rdi .text:0000000180027F06 pop rsi .text:0000000180027F07 jmp cs:__imp_HeapAlloc .text:0000000180027F0E ; --------------------------------------------------------------------------- .text:0000000180027F0E .text:0000000180027F0E loc_180027F0E: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+20↑j .text:0000000180027F0E xor eax, eax .text:0000000180027F10 add rsp, 28h .text:0000000180027F14 pop rdi .text:0000000180027F15 pop rsi .text:0000000180027F16 retn .text:0000000180027F16 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E endp ``` to ``` .text:0000000180027EE0 ; std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16 .text:0000000180027EE0 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E .text:0000000180027EE0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E proc near .text:0000000180027EE0 ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p .text:0000000180027EE0 ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+54↑p ... .text:0000000180027EE0 mov rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EE7 test rcx, rcx .text:0000000180027EEA jz short loc_180027EF3 .text:0000000180027EEC jmp cs:__imp_HeapAlloc .text:0000000180027EF3 ; --------------------------------------------------------------------------- .text:0000000180027EF3 .text:0000000180027EF3 loc_180027EF3: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16+A↑j .text:0000000180027EF3 mov ecx, edx .text:0000000180027EF5 mov rdx, r8 .text:0000000180027EF8 jmp std__sys__pal__windows__alloc__process_heap_init_and_alloc .text:0000000180027EF8 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E endp ``` r? `@ChrisDenton`
This commit is contained in:
commit
5a3d6c91b1
@ -6,6 +6,7 @@
|
||||
use crate::sync::atomic::{AtomicPtr, Ordering};
|
||||
use crate::sys::c;
|
||||
use crate::sys::common::alloc::{realloc_fallback, MIN_ALIGN};
|
||||
use core::mem::MaybeUninit;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
@ -94,8 +95,6 @@ fn HeapReAlloc(
|
||||
// a non-null handle returned by `GetProcessHeap`.
|
||||
#[inline]
|
||||
fn init_or_get_process_heap() -> c::HANDLE {
|
||||
let heap = HEAP.load(Ordering::Relaxed);
|
||||
if core::intrinsics::unlikely(heap.is_null()) {
|
||||
// `HEAP` has not yet been successfully initialized
|
||||
let heap = unsafe { GetProcessHeap() };
|
||||
if !heap.is_null() {
|
||||
@ -109,14 +108,17 @@ fn init_or_get_process_heap() -> c::HANDLE {
|
||||
// Could not get the current process heap.
|
||||
ptr::null_mut()
|
||||
}
|
||||
} else {
|
||||
// SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap`
|
||||
heap
|
||||
}
|
||||
}
|
||||
|
||||
/// This is outlined from `process_heap_alloc` so that `process_heap_alloc`
|
||||
/// does not need any stack allocations.
|
||||
#[inline(never)]
|
||||
fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID {
|
||||
#[cold]
|
||||
extern "C" fn process_heap_init_and_alloc(
|
||||
_heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`
|
||||
flags: c::DWORD,
|
||||
dwBytes: c::SIZE_T,
|
||||
) -> c::LPVOID {
|
||||
let heap = init_or_get_process_heap();
|
||||
if core::intrinsics::unlikely(heap.is_null()) {
|
||||
return ptr::null_mut();
|
||||
@ -125,6 +127,21 @@ fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID {
|
||||
unsafe { HeapAlloc(heap, flags, dwBytes) }
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn process_heap_alloc(
|
||||
_heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`,
|
||||
flags: c::DWORD,
|
||||
dwBytes: c::SIZE_T,
|
||||
) -> c::LPVOID {
|
||||
let heap = HEAP.load(Ordering::Relaxed);
|
||||
if core::intrinsics::likely(!heap.is_null()) {
|
||||
// SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`.
|
||||
unsafe { HeapAlloc(heap, flags, dwBytes) }
|
||||
} else {
|
||||
process_heap_init_and_alloc(MaybeUninit::uninit(), flags, dwBytes)
|
||||
}
|
||||
}
|
||||
|
||||
// Get a non-null handle to the default heap of the current process.
|
||||
// SAFETY: `HEAP` must have been successfully initialized.
|
||||
#[inline]
|
||||
@ -148,12 +165,12 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 {
|
||||
|
||||
if layout.align() <= MIN_ALIGN {
|
||||
// The returned pointer points to the start of an allocated block.
|
||||
process_heap_alloc(flags, layout.size()) as *mut u8
|
||||
process_heap_alloc(MaybeUninit::uninit(), flags, layout.size()) as *mut u8
|
||||
} else {
|
||||
// Allocate extra padding in order to be able to satisfy the alignment.
|
||||
let total = layout.align() + layout.size();
|
||||
|
||||
let ptr = process_heap_alloc(flags, total) as *mut u8;
|
||||
let ptr = process_heap_alloc(MaybeUninit::uninit(), flags, total) as *mut u8;
|
||||
if ptr.is_null() {
|
||||
// Allocation has failed.
|
||||
return ptr::null_mut();
|
||||
|
Loading…
Reference in New Issue
Block a user