Tweak the threshold for chunked swapping
Thanks to 98892 for the tests I brought in here, as it demonstrated that 3×usize is currently suboptimal.
This commit is contained in:
parent
128148d4cf
commit
60208a0517
@ -736,7 +736,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
|
||||
// tends to copy the whole thing to stack rather than doing it one part
|
||||
// at a time, so instead treat them as one-element slices and piggy-back
|
||||
// the slice optimizations that will split up the swaps.
|
||||
if size_of::<T>() / align_of::<T>() > 4 {
|
||||
if const { size_of::<T>() / align_of::<T>() > 2 } {
|
||||
// SAFETY: exclusive references always point to one non-overlapping
|
||||
// element and are non-null and properly aligned.
|
||||
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
|
||||
|
@ -26,12 +26,15 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
|
||||
// CHECK-LABEL: @swap_rgb48
|
||||
#[no_mangle]
|
||||
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
||||
// FIXME: See #115212 for why this has an alloca again
|
||||
// CHECK-NOT: alloca
|
||||
|
||||
// CHECK: alloca [3 x i16], align 2
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||
// Whether `i8` is the best for this is unclear, but
|
||||
// might as well record what's actually happening right now.
|
||||
|
||||
// CHECK: load i8
|
||||
// CHECK: load i8
|
||||
// CHECK: store i8
|
||||
// CHECK: store i8
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
@ -41,10 +44,39 @@ type RGBA64 = [u16; 4];
|
||||
#[no_mangle]
|
||||
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2
|
||||
// CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2
|
||||
// CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2
|
||||
// CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2
|
||||
// CHECK-DAG: %[[XVAL:.+]] = load i64, ptr %x, align 2
|
||||
// CHECK-DAG: %[[YVAL:.+]] = load i64, ptr %y, align 2
|
||||
// CHECK-DAG: store i64 %[[YVAL]], ptr %x, align 2
|
||||
// CHECK-DAG: store i64 %[[XVAL]], ptr %y, align 2
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @swap_vecs
|
||||
#[no_mangle]
|
||||
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
|
||||
// CHECK-NOT: alloca
|
||||
// There are plenty more loads and stores than just these,
|
||||
// but at least one sure better be 64-bit (for size or capacity).
|
||||
// CHECK: load i64
|
||||
// CHECK: load i64
|
||||
// CHECK: store i64
|
||||
// CHECK: store i64
|
||||
// CHECK: ret void
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @swap_slices
|
||||
#[no_mangle]
|
||||
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load ptr
|
||||
// CHECK: load i64
|
||||
// CHECK: load ptr
|
||||
// CHECK: load i64
|
||||
// CHECK: store ptr
|
||||
// CHECK: store i64
|
||||
// CHECK: store ptr
|
||||
// CHECK: store i64
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
@ -55,9 +87,9 @@ type RGB24 = [u8; 3];
|
||||
// CHECK-LABEL: @swap_rgb24_slices
|
||||
#[no_mangle]
|
||||
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i8>
|
||||
// CHECK: store <{{[0-9]+}} x i8>
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i8>
|
||||
// CHECK: store <{{[0-9]+}} x i8>
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
@ -69,9 +101,9 @@ type RGBA32 = [u8; 4];
|
||||
// CHECK-LABEL: @swap_rgba32_slices
|
||||
#[no_mangle]
|
||||
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i32>
|
||||
// CHECK: store <{{[0-9]+}} x i32>
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i32>
|
||||
// CHECK: store <{{[0-9]+}} x i32>
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
@ -84,10 +116,24 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
|
||||
// CHECK-LABEL: @swap_string_slices
|
||||
#[no_mangle]
|
||||
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i64>
|
||||
// CHECK: store <{{[0-9]+}} x i64>
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i64>
|
||||
// CHECK: store <{{[0-9]+}} x i64>
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C, packed)]
|
||||
pub struct Packed {
|
||||
pub first: bool,
|
||||
pub second: usize,
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @swap_packed_structs
|
||||
#[no_mangle]
|
||||
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: ret void
|
||||
swap(x, y)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user