From 50760aa2b59b54fba474283e5c5c4e028ce4d9af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Mon, 11 Mar 2024 05:43:12 +0100 Subject: [PATCH] Optimize `process_heap_alloc` --- library/std/src/sys/pal/windows/alloc.rs | 53 ++++++++++++++++-------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/library/std/src/sys/pal/windows/alloc.rs b/library/std/src/sys/pal/windows/alloc.rs index 270eca37b14..681d1a5efe9 100644 --- a/library/std/src/sys/pal/windows/alloc.rs +++ b/library/std/src/sys/pal/windows/alloc.rs @@ -6,6 +6,7 @@ use crate::sync::atomic::{AtomicPtr, Ordering}; use crate::sys::c; use crate::sys::common::alloc::{realloc_fallback, MIN_ALIGN}; +use core::mem::MaybeUninit; #[cfg(test)] mod tests; @@ -94,29 +95,30 @@ fn HeapReAlloc( // a non-null handle returned by `GetProcessHeap`. #[inline] fn init_or_get_process_heap() -> c::HANDLE { - let heap = HEAP.load(Ordering::Relaxed); - if core::intrinsics::unlikely(heap.is_null()) { - // `HEAP` has not yet been successfully initialized - let heap = unsafe { GetProcessHeap() }; - if !heap.is_null() { - // SAFETY: No locking is needed because within the same process, - // successful calls to `GetProcessHeap` will always return the same value, even on different threads. - HEAP.store(heap, Ordering::Release); + // `HEAP` has not yet been successfully initialized + let heap = unsafe { GetProcessHeap() }; + if !heap.is_null() { + // SAFETY: No locking is needed because within the same process, + // successful calls to `GetProcessHeap` will always return the same value, even on different threads. + HEAP.store(heap, Ordering::Release); - // SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap` - heap - } else { - // Could not get the current process heap. - ptr::null_mut() - } - } else { // SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap` heap + } else { + // Could not get the current process heap. + ptr::null_mut() } } +/// This is outlined from `process_heap_alloc` so that `process_heap_alloc` +/// does not need any stack allocations. #[inline(never)] -fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID { +#[cold] +extern "C" fn process_heap_init_and_alloc( + _heap: MaybeUninit, // We pass this argument to match the ABI of `HeapAlloc` + flags: c::DWORD, + dwBytes: c::SIZE_T, +) -> c::LPVOID { let heap = init_or_get_process_heap(); if core::intrinsics::unlikely(heap.is_null()) { return ptr::null_mut(); @@ -125,6 +127,21 @@ fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID { unsafe { HeapAlloc(heap, flags, dwBytes) } } +#[inline(never)] +fn process_heap_alloc( + _heap: MaybeUninit, // We pass this argument to match the ABI of `HeapAlloc`, + flags: c::DWORD, + dwBytes: c::SIZE_T, +) -> c::LPVOID { + let heap = HEAP.load(Ordering::Relaxed); + if core::intrinsics::likely(!heap.is_null()) { + // SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`. + unsafe { HeapAlloc(heap, flags, dwBytes) } + } else { + process_heap_init_and_alloc(MaybeUninit::uninit(), flags, dwBytes) + } +} + // Get a non-null handle to the default heap of the current process. // SAFETY: `HEAP` must have been successfully initialized. #[inline] @@ -148,12 +165,12 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 { if layout.align() <= MIN_ALIGN { // The returned pointer points to the start of an allocated block. - process_heap_alloc(flags, layout.size()) as *mut u8 + process_heap_alloc(MaybeUninit::uninit(), flags, layout.size()) as *mut u8 } else { // Allocate extra padding in order to be able to satisfy the alignment. let total = layout.align() + layout.size(); - let ptr = process_heap_alloc(flags, total) as *mut u8; + let ptr = process_heap_alloc(MaybeUninit::uninit(), flags, total) as *mut u8; if ptr.is_null() { // Allocation has failed. return ptr::null_mut();