1f67a7aa8d
Optimize `Box::default` and `Arc::default` to construct more types in place Both the `Arc` and `Box` `Default` impls currently call `T::default()` before allocating, and then moving the resulting `T` into the allocation. Most `Default` impls are trivial, which should in theory allow LLVM to construct `T: Default` directly in the `Box` allocation when calling `<Box<T>>::default()`. However, the allocation may fail, which necessitates calling `T`'s destructor if it has one. If the destructor is non-trivial, then LLVM has a hard time proving that it's sound to elide, which makes it construct `T` on the stack first, and then copy it into the allocation. Change both of these impls to allocate first, and then call `T::default` into the uninitialized allocation, so that LLVM doesn't have to prove that it's sound to elide the destructor/initial stack copy. For example, given the following Rust code: ```rust #[derive(Default, Clone)] struct Foo { x: Vec<u8>, z: String, y: Vec<u8>, } #[no_mangle] pub fn src() -> Box<Foo> { Box::default() } ``` <details open> <summary>Before this PR:</summary> ```llvm `@__rust_no_alloc_shim_is_unstable` = external global i8 ; drop_in_place() generated in case the allocation fails ; core::ptr::drop_in_place<playground::Foo> ; Function Attrs: nounwind nonlazybind uwtable define internal fastcc void `@"_ZN4core3ptr36drop_in_place$LT$playground..Foo$GT$17hff376aece491233bE"(ptr` noalias nocapture noundef readonly align 8 dereferenceable(72) %_1) unnamed_addr #0 personality ptr `@rust_eh_personality` { start: %_1.val = load i64, ptr %_1, align 8 %0 = icmp eq i64 %_1.val, 0 br i1 %0, label %bb6, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i" "_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i": ; preds = %start %1 = getelementptr inbounds i8, ptr %_1, i64 8 %_1.val6 = load ptr, ptr %1, align 8, !nonnull !3, !noundef !3 tail call void `@__rust_dealloc(ptr` noundef nonnull %_1.val6, i64 noundef %_1.val, i64 noundef 1) #8 br label %bb6 bb6: ; preds = %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i", %start %2 = getelementptr inbounds i8, ptr %_1, i64 24 %.val9 = load i64, ptr %2, align 8 %3 = icmp eq i64 %.val9, 0 br i1 %3, label %bb5, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11" "_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11": ; preds = %bb6 %4 = getelementptr inbounds i8, ptr %_1, i64 32 %.val10 = load ptr, ptr %4, align 8, !nonnull !3, !noundef !3 tail call void `@__rust_dealloc(ptr` noundef nonnull %.val10, i64 noundef %.val9, i64 noundef 1) #8 br label %bb5 bb5: ; preds = %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11", %bb6 %5 = getelementptr inbounds i8, ptr %_1, i64 48 %.val4 = load i64, ptr %5, align 8 %6 = icmp eq i64 %.val4, 0 br i1 %6, label %"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16", label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15" "_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15": ; preds = %bb5 %7 = getelementptr inbounds i8, ptr %_1, i64 56 %.val5 = load ptr, ptr %7, align 8, !nonnull !3, !noundef !3 tail call void `@__rust_dealloc(ptr` noundef nonnull %.val5, i64 noundef %.val4, i64 noundef 1) #8 br label %"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16" "_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16": ; preds = %bb5, %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15" ret void } ; Function Attrs: nonlazybind uwtable define noalias noundef nonnull align 8 ptr `@src()` unnamed_addr #1 personality ptr `@rust_eh_personality` { start: ; alloca to place `Foo` in. %_1 = alloca [72 x i8], align 8 call void `@llvm.lifetime.start.p0(i64` 72, ptr nonnull %_1) store i64 0, ptr %_1, align 8 %_2.sroa.4.0._1.sroa_idx = getelementptr inbounds i8, ptr %_1, i64 8 store ptr inttoptr (i64 1 to ptr), ptr %_2.sroa.4.0._1.sroa_idx, align 8 %_2.sroa.5.0._1.sroa_idx = getelementptr inbounds i8, ptr %_1, i64 16 %_3.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 32 call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_2.sroa.5.0._1.sroa_idx, i8 0, i64 16, i1 false) store ptr inttoptr (i64 1 to ptr), ptr %_3.sroa.4.0..sroa_idx, align 8 %_3.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 40 %_4.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 56 call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_3.sroa.5.0..sroa_idx, i8 0, i64 16, i1 false) store ptr inttoptr (i64 1 to ptr), ptr %_4.sroa.4.0..sroa_idx, align 8 %_4.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 64 store i64 0, ptr %_4.sroa.5.0..sroa_idx, align 8 %0 = load volatile i8, ptr `@__rust_no_alloc_shim_is_unstable,` align 1, !noalias !4 %_0.i.i.i = tail call noalias noundef align 8 dereferenceable_or_null(72) ptr `@__rust_alloc(i64` noundef 72, i64 noundef 8) #8, !noalias !4 %1 = icmp eq ptr %_0.i.i.i, null br i1 %1, label %bb2.i, label %"_ZN5alloc5boxed12Box$LT$T$GT$3new17h0864de14f863a27aE.exit" bb2.i: ; preds = %start ; invoke alloc::alloc::handle_alloc_error invoke void `@_ZN5alloc5alloc18handle_alloc_error17h98142d0d8d74161bE(i64` noundef 8, i64 noundef 72) #9 to label %.noexc unwind label %cleanup.i .noexc: ; preds = %bb2.i unreachable cleanup.i: ; preds = %bb2.i %2 = landingpad { ptr, i32 } cleanup ; call core::ptr::drop_in_place<playground::Foo> call fastcc void `@"_ZN4core3ptr36drop_in_place$LT$playground..Foo$GT$17hff376aece491233bE"(ptr` noalias noundef nonnull align 8 dereferenceable(72) %_1) #10 resume { ptr, i32 } %2 "_ZN5alloc5boxed12Box$LT$T$GT$3new17h0864de14f863a27aE.exit": ; preds = %start ; Copy from stack to heap if allocation is successful call void `@llvm.memcpy.p0.p0.i64(ptr` noundef nonnull align 8 dereferenceable(72) %_0.i.i.i, ptr noundef nonnull align 8 dereferenceable(72) %_1, i64 72, i1 false) call void `@llvm.lifetime.end.p0(i64` 72, ptr nonnull %_1) ret ptr %_0.i.i.i } ``` </details> <details> <summary>After this PR</summary> ```llvm ; Notice how there's no `drop_in_place()` generated as well define noalias noundef nonnull align 8 ptr `@src()` unnamed_addr #0 personality ptr `@rust_eh_personality` { start: ; no stack allocation %0 = load volatile i8, ptr `@__rust_no_alloc_shim_is_unstable,` align 1 %_0.i.i.i.i.i = tail call noalias noundef align 8 dereferenceable_or_null(72) ptr `@__rust_alloc(i64` noundef 72, i64 noundef 8) #5 %1 = icmp eq ptr %_0.i.i.i.i.i, null br i1 %1, label %bb3.i, label %"_ZN5alloc5boxed16Box$LT$T$C$A$GT$13new_uninit_in17h80d6355ef4b73ea3E.exit" bb3.i: ; preds = %start ; call alloc::alloc::handle_alloc_error tail call void `@_ZN5alloc5alloc18handle_alloc_error17h98142d0d8d74161bE(i64` noundef 8, i64 noundef 72) #6 unreachable "_ZN5alloc5boxed16Box$LT$T$C$A$GT$13new_uninit_in17h80d6355ef4b73ea3E.exit": ; preds = %start ; construct `Foo` directly into the allocation if successful store i64 0, ptr %_0.i.i.i.i.i, align 8 %_8.sroa.4.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 8 store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.4.0._1.sroa_idx, align 8 %_8.sroa.5.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 16 %_8.sroa.7.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 32 tail call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_8.sroa.5.0._1.sroa_idx, i8 0, i64 16, i1 false) store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.7.0._1.sroa_idx, align 8 %_8.sroa.8.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 40 %_8.sroa.10.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 56 tail call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_8.sroa.8.0._1.sroa_idx, i8 0, i64 16, i1 false) store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.10.0._1.sroa_idx, align 8 %_8.sroa.11.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 64 store i64 0, ptr %_8.sroa.11.0._1.sroa_idx, align 8 ret ptr %_0.i.i.i.i.i } ``` </details> |
||
---|---|---|
.. | ||
auxiliary | ||
avr | ||
cffi | ||
cross-crate-inlining | ||
debug-accessibility | ||
dllimports | ||
enum | ||
float | ||
instrument-coverage | ||
instrument-xray | ||
intrinsics | ||
issues | ||
lib-optimizations | ||
loongarch-abi | ||
macos | ||
meta-filecheck | ||
naked-fn | ||
non-terminate | ||
patchable-function-entry | ||
remap_path_prefix | ||
repr | ||
riscv-abi | ||
sanitizer | ||
simd | ||
simd-intrinsic | ||
src-hash-algorithm | ||
unwind-abis | ||
aarch64-struct-align-128.rs | ||
abi-efiapi.rs | ||
abi-main-signature-16bit-c-int.rs | ||
abi-main-signature-32bit-c-int.rs | ||
abi-repr-ext.rs | ||
abi-sysv64.rs | ||
abi-x86_64_sysv.rs | ||
abi-x86-interrupt.rs | ||
addr-of-mutate.rs | ||
adjustments.rs | ||
align-byval-alignment-mismatch.rs | ||
align-byval-vector.rs | ||
align-byval.rs | ||
align-enum.rs | ||
align-fn.rs | ||
align-offset.rs | ||
align-struct.rs | ||
alloc-optimisation.rs | ||
array-clone.rs | ||
array-cmp.rs | ||
array-codegen.rs | ||
array-equality.rs | ||
array-from_fn.rs | ||
array-map.rs | ||
array-optimized.rs | ||
array-repeat.rs | ||
ascii-char.rs | ||
asm-arm64ec-clobbers.rs | ||
asm-clobber_abi.rs | ||
asm-clobbers.rs | ||
asm-goto.rs | ||
asm-may_unwind.rs | ||
asm-maybe-uninit.rs | ||
asm-msp430-clobbers.rs | ||
asm-multiple-options.rs | ||
asm-options.rs | ||
asm-powerpc-clobbers.rs | ||
asm-s390x-clobbers.rs | ||
asm-sanitize-llvm.rs | ||
asm-target-clobbers.rs | ||
async-closure-debug.rs | ||
async-fn-debug-awaitee-field.rs | ||
async-fn-debug-msvc.rs | ||
async-fn-debug.rs | ||
atomic-operations.rs | ||
atomicptr.rs | ||
autovectorize-f32x4.rs | ||
binary-heap-peek-mut-pop-no-panic.rs | ||
binary-search-index-no-bound-check.rs | ||
bool-cmp.rs | ||
box-uninit-bytes.rs | ||
bpf-alu32.rs | ||
branch-protection-old-llvm.rs | ||
branch-protection.rs | ||
call-llvm-intrinsics.rs | ||
call-metadata.rs | ||
cast-optimized.rs | ||
cast-target-abi.rs | ||
catch-unwind.rs | ||
cdylib-external-inline-fns.rs | ||
cf-protection.rs | ||
cfguard-checks.rs | ||
cfguard-disabled.rs | ||
cfguard-nochecks.rs | ||
cfguard-non-msvc.rs | ||
char-ascii-branchless.rs | ||
checked_ilog.rs | ||
checked_math.rs | ||
clone_as_copy.rs | ||
clone-shims.rs | ||
codemodels.rs | ||
coercions.rs | ||
cold-call-declare-and-call.rs | ||
common_prim_int_ptr.rs | ||
comparison-operators-2-tuple.rs | ||
comparison-operators-newtype.rs | ||
const_scalar_pair.rs | ||
const-vector.rs | ||
constant-branch.rs | ||
consts.rs | ||
coroutine-debug-msvc.rs | ||
coroutine-debug.rs | ||
dealloc-no-unwind.rs | ||
debug-alignment.rs | ||
debug-column-msvc.rs | ||
debug-column.rs | ||
debug-compile-unit-path.rs | ||
debug-fndef-size.rs | ||
debug-limited.rs | ||
debug-line-directives-only.rs | ||
debug-line-tables-only.rs | ||
debug-linkage-name.rs | ||
debug-vtable.rs | ||
debuginfo-constant-locals.rs | ||
debuginfo-generic-closure-env-names.rs | ||
debuginfo-inline-callsite-location.rs | ||
deduced-param-attrs.rs | ||
default-requires-uwtable.rs | ||
default-visibility.rs | ||
direct-access-external-data.rs | ||
dont_codegen_private_const_fn_only_used_in_const_eval.rs | ||
drop-in-place-noalias.rs | ||
drop.rs | ||
dst-offset.rs | ||
dst-vtable-align-nonzero.rs | ||
dst-vtable-size-range.rs | ||
ehcontguard_disabled.rs | ||
ehcontguard_enabled.rs | ||
emcripten-catch-unwind.rs | ||
enable-lto-unit-splitting.rs | ||
error-provide.rs | ||
export-no-mangle.rs | ||
external-no-mangle-fns.rs | ||
external-no-mangle-statics.rs | ||
fastcall-inreg.rs | ||
fatptr.rs | ||
fewer-names.rs | ||
fixed-x18.rs | ||
float_math.rs | ||
fn-impl-trait-self.rs | ||
foo.s | ||
force-frame-pointers.rs | ||
force-no-unwind-tables.rs | ||
force-unwind-tables.rs | ||
frame-pointer.rs | ||
function-arguments-noopt.rs | ||
function-arguments.rs | ||
function-return.rs | ||
gdb_debug_script_load.rs | ||
generic-debug.rs | ||
global_asm_include.rs | ||
global_asm_x2.rs | ||
global_asm.rs | ||
i128-x86-align.rs | ||
infallible-unwrap-in-opt-z.rs | ||
inherit_overflow.rs | ||
inline-always-works-always.rs | ||
inline-debuginfo.rs | ||
inline-function-args-debug-info.rs | ||
inline-hint.rs | ||
instrument-mcount.rs | ||
integer-cmp.rs | ||
integer-overflow.rs | ||
internalize-closures.rs | ||
intrinsic-no-unnamed-attr.rs | ||
is_val_statically_known.rs | ||
issue-97217.rs | ||
iter-repeat-n-trivial-drop.rs | ||
layout-size-checks.rs | ||
lifetime_start_end.rs | ||
link_section.rs | ||
link-dead-code.rs | ||
llvm_module_flags.rs | ||
llvm-ident.rs | ||
loads.rs | ||
local-generics-in-exe-internalized.rs | ||
lto-removes-invokes.rs | ||
mainsubprogram.rs | ||
mainsubprogramstart.rs | ||
match-optimized.rs | ||
match-optimizes-away.rs | ||
match-unoptimized.rs | ||
maybeuninit-rvo.rs | ||
mem-replace-big-type.rs | ||
mem-replace-simple-type.rs | ||
merge-functions.rs | ||
method-declaration.rs | ||
mir_zst_stores.rs | ||
mir-aggregate-no-alloca.rs | ||
mir-inlined-line-numbers.rs | ||
move-before-nocapture-ref-arg.rs | ||
move-operands.rs | ||
naked-asan.rs | ||
no_builtins-at-crate.rs | ||
no-alloca-inside-if-false.rs | ||
no-assumes-on-casts.rs | ||
no-dllimport-w-cross-lang-lto.rs | ||
no-jump-tables.rs | ||
no-plt.rs | ||
no-redundant-item-monomorphization.rs | ||
noalias-box-off.rs | ||
noalias-box.rs | ||
noalias-flag.rs | ||
noalias-freeze.rs | ||
noalias-refcell.rs | ||
noalias-rwlockreadguard.rs | ||
noalias-unpin.rs | ||
noreturn-uninhabited.rs | ||
noreturnflag.rs | ||
nounwind.rs | ||
nrvo.rs | ||
optimize-attr-1.rs | ||
option-as-slice.rs | ||
option-niche-eq.rs | ||
overaligned-constant.rs | ||
packed.rs | ||
panic-abort-windows.rs | ||
panic-in-drop-abort.rs | ||
panic-unwind-default-uwtable.rs | ||
pattern_type_symbols.rs | ||
personality_lifetimes.rs | ||
pgo-counter-bias.rs | ||
pgo-instrumentation.rs | ||
pic-relocation-model.rs | ||
pie-relocation-model.rs | ||
placement-new.rs | ||
powerpc64le-struct-align-128.rs | ||
precondition-checks.rs | ||
ptr-arithmetic.rs | ||
ptr-read-metadata.rs | ||
range-attribute.rs | ||
README.md | ||
refs.rs | ||
repeat-trusted-len.rs | ||
riscv-target-abi.rs | ||
scalar-pair-bool.rs | ||
set-discriminant-invalid.rs | ||
skip-mono-inside-if-false.rs | ||
slice_as_from_ptr_range.rs | ||
slice-as_chunks.rs | ||
slice-indexing.rs | ||
slice-init.rs | ||
slice-iter-fold.rs | ||
slice-iter-len-eq-zero.rs | ||
slice-iter-nonnull.rs | ||
slice-pointer-nonnull-unwrap.rs | ||
slice-position-bounds-check.rs | ||
slice-ref-equality.rs | ||
slice-reverse.rs | ||
slice-windows-no-bounds-check.rs | ||
some-abis-do-extend-params-to-32-bits.rs | ||
some-global-nonnull.rs | ||
sparc-struct-abi.rs | ||
split-lto-unit.rs | ||
sroa-fragment-debuginfo.rs | ||
sse42-implies-crc32.rs | ||
stack-probes-inline.rs | ||
stack-protector.rs | ||
static-relocation-model-msvc.rs | ||
staticlib-external-inline-fns.rs | ||
step_by-overflow-checks.rs | ||
stores.rs | ||
swap-large-types.rs | ||
swap-small-types.rs | ||
target-cpu-on-functions.rs | ||
target-feature-inline-closure.rs | ||
target-feature-overrides.rs | ||
thread-local.rs | ||
tied-features-strength.rs | ||
to_vec.rs | ||
trailing_zeros.rs | ||
transmute-optimized.rs | ||
transmute-scalar.rs | ||
try_question_mark_nop.rs | ||
tune-cpu-on-functions.rs | ||
tuple-layout-opt.rs | ||
ub-checks.rs | ||
unchecked_shifts.rs | ||
unchecked-float-casts.rs | ||
uninit-consts.rs | ||
union-abi.rs | ||
unwind-and-panic-abort.rs | ||
unwind-extern-exports.rs | ||
unwind-extern-imports.rs | ||
unwind-landingpad-cold.rs | ||
unwind-landingpad-inline.rs | ||
used_with_arg.rs | ||
var-names.rs | ||
vec_pop_push_noop.rs | ||
vec-as-ptr.rs | ||
vec-calloc.rs | ||
vec-in-place.rs | ||
vec-iter-collect-len.rs | ||
vec-iter.rs | ||
vec-len-invariant.rs | ||
vec-optimizes-away.rs | ||
vec-reserve-extend.rs | ||
vec-shrink-panik.rs | ||
vec-with-capacity.rs | ||
vecdeque_no_panic.rs | ||
vecdeque_pop_push.rs | ||
vecdeque-drain.rs | ||
vecdeque-nonempty-get-no-panic.rs | ||
virtual-function-elimination-32bit.rs | ||
virtual-function-elimination.rs | ||
vtable-loads.rs | ||
vtable-upcast.rs | ||
wasm_casts_trapping.rs | ||
wasm_exceptions.rs | ||
zip.rs | ||
zst-offset.rs |
The files here use the LLVM FileCheck framework, documented at https://llvm.org/docs/CommandGuide/FileCheck.html.
One extension worth noting is the use of revisions as custom prefixes for FileCheck. If your codegen test has different behavior based on the chosen target or different compiler flags that you want to exercise, you can use a revisions annotation, like so:
// revisions: aaa bbb
// [bbb] compile-flags: --flags-for-bbb
After specifying those variations, you can write different expected, or
explicitly unexpected output by using <prefix>-SAME:
and <prefix>-NOT:
,
like so:
// CHECK: expected code
// aaa-SAME: emitted-only-for-aaa
// aaa-NOT: emitted-only-for-bbb
// bbb-NOT: emitted-only-for-aaa
// bbb-SAME: emitted-only-for-bbb