Auto merge of #79547 - erikdesjardins:byval, r=nagisa

Pass arguments up to 2*usize by value

In https://github.com/rust-lang/rust/pull/77434#discussion_r498719533, `@eddyb` said:

> I wonder if it makes sense to limit this to returns [...]

Let's do a perf run and find out.

It seems the `extern "C"` ABI will pass arguments up to 2*usize in registers: https://godbolt.org/z/n8E6zc. (modified from https://github.com/rust-lang/rust/issues/26494#issuecomment-619506345)

r? `@nagisa`
This commit is contained in:
bors 2020-12-02 15:17:32 +00:00
commit a094ff9590
3 changed files with 17 additions and 12 deletions

View File

@ -2848,7 +2848,7 @@ fn adjust_for_abi(&mut self, cx: &C, abi: SpecAbi) {
|| abi == SpecAbi::RustIntrinsic
|| abi == SpecAbi::PlatformIntrinsic
{
let fixup = |arg: &mut ArgAbi<'tcx, Ty<'tcx>>, is_ret: bool| {
let fixup = |arg: &mut ArgAbi<'tcx, Ty<'tcx>>| {
if arg.is_ignore() {
return;
}
@ -2886,9 +2886,9 @@ fn adjust_for_abi(&mut self, cx: &C, abi: SpecAbi) {
_ => return,
}
// Return structures up to 2 pointers in size by value, matching `ScalarPair`. LLVM
// will usually return these in 2 registers, which is more efficient than by-ref.
let max_by_val_size = if is_ret { Pointer.size(cx) * 2 } else { Pointer.size(cx) };
// Pass and return structures up to 2 pointers in size by value, matching `ScalarPair`.
// LLVM will usually pass these in 2 registers, which is more efficient than by-ref.
let max_by_val_size = Pointer.size(cx) * 2;
let size = arg.layout.size;
if arg.layout.is_unsized() || size > max_by_val_size {
@ -2900,9 +2900,9 @@ fn adjust_for_abi(&mut self, cx: &C, abi: SpecAbi) {
arg.cast_to(Reg { kind: RegKind::Integer, size });
}
};
fixup(&mut self.ret, true);
fixup(&mut self.ret);
for arg in &mut self.args {
fixup(arg, false);
fixup(arg);
}
return;
}

View File

@ -1,4 +1,4 @@
//! This test checks that types of up to 128 bits are returned by-value instead of via out-pointer.
//! Check that types of up to 128 bits are passed and returned by-value instead of via pointer.
// compile-flags: -C no-prepopulate-passes -O
// only-x86_64
@ -11,7 +11,7 @@ pub struct S {
c: u32,
}
// CHECK: define i128 @modify(%S* noalias nocapture dereferenceable(16) %s)
// CHECK: define i128 @modify(i128{{( %0)?}})
#[no_mangle]
pub fn modify(s: S) -> S {
S { a: s.a + s.a, b: s.b + s.b, c: s.c + s.c }

View File

@ -63,11 +63,16 @@ pub union UnionU128{a:u128}
#[no_mangle]
pub fn test_UnionU128(_: UnionU128) -> UnionU128 { loop {} }
#[repr(C)]
pub union CUnionU128{a:u128}
// CHECK: define void @test_CUnionU128(%CUnionU128* {{.*}} %_1)
pub union UnionU128x2{a:(u128, u128)}
// CHECK: define void @test_UnionU128x2(i128 %_1.0, i128 %_1.1)
#[no_mangle]
pub fn test_CUnionU128(_: CUnionU128) { loop {} }
pub fn test_UnionU128x2(_: UnionU128x2) { loop {} }
#[repr(C)]
pub union CUnionU128x2{a:(u128, u128)}
// CHECK: define void @test_CUnionU128x2(%CUnionU128x2* {{.*}} %_1)
#[no_mangle]
pub fn test_CUnionU128x2(_: CUnionU128x2) { loop {} }
pub union UnionBool { b:bool }
// CHECK: define zeroext i1 @test_UnionBool(i8 %b)