Auto merge of #111803 - scottmcm:simple-swap-alternative, r=Mark-Simulacrum

Tweak the threshold for chunked swapping

Thanks to `@AngelicosPhosphoros` for the tests here, which I copied from #98892.

This is an experiment as a simple alternative to that PR that just tweaks the existing threshold, since that PR showed that 3×Align (like `String`) currently doesn't work as well as it could.
This commit is contained in:
bors 2024-01-20 21:54:44 +00:00
commit 4cb17b4e78
2 changed files with 65 additions and 19 deletions

View File

@ -736,7 +736,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
// tends to copy the whole thing to stack rather than doing it one part
// at a time, so instead treat them as one-element slices and piggy-back
// the slice optimizations that will split up the swaps.
if size_of::<T>() / align_of::<T>() > 4 {
if const { size_of::<T>() / align_of::<T>() > 2 } {
// SAFETY: exclusive references always point to one non-overlapping
// element and are non-null and properly aligned.
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };

View File

@ -26,12 +26,15 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
// CHECK-LABEL: @swap_rgb48
#[no_mangle]
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
// FIXME: See #115212 for why this has an alloca again
// CHECK-NOT: alloca
// CHECK: alloca [3 x i16], align 2
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
// Whether `i8` is the best for this is unclear, but
// might as well record what's actually happening right now.
// CHECK: load i8
// CHECK: load i8
// CHECK: store i8
// CHECK: store i8
swap(x, y)
}
@ -41,10 +44,39 @@ pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
#[no_mangle]
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
// CHECK-NOT: alloca
// CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2
// CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2
// CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2
// CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2
// CHECK-DAG: %[[XVAL:.+]] = load i64, ptr %x, align 2
// CHECK-DAG: %[[YVAL:.+]] = load i64, ptr %y, align 2
// CHECK-DAG: store i64 %[[YVAL]], ptr %x, align 2
// CHECK-DAG: store i64 %[[XVAL]], ptr %y, align 2
swap(x, y)
}
// CHECK-LABEL: @swap_vecs
#[no_mangle]
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
// CHECK-NOT: alloca
// There are plenty more loads and stores than just these,
// but at least one sure better be 64-bit (for size or capacity).
// CHECK: load i64
// CHECK: load i64
// CHECK: store i64
// CHECK: store i64
// CHECK: ret void
swap(x, y)
}
// CHECK-LABEL: @swap_slices
#[no_mangle]
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
// CHECK-NOT: alloca
// CHECK: load ptr
// CHECK: load i64
// CHECK: load ptr
// CHECK: load i64
// CHECK: store ptr
// CHECK: store i64
// CHECK: store ptr
// CHECK: store i64
swap(x, y)
}
@ -91,3 +123,17 @@ pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
x.swap_with_slice(y);
}
}
#[repr(C, packed)]
pub struct Packed {
pub first: bool,
pub second: usize,
}
// CHECK-LABEL: @swap_packed_structs
#[no_mangle]
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
// CHECK-NOT: alloca
// CHECK: ret void
swap(x, y)
}