Auto merge of #111803 - scottmcm:simple-swap-alternative, r=Mark-Simulacrum
Tweak the threshold for chunked swapping Thanks to `@AngelicosPhosphoros` for the tests here, which I copied from #98892. This is an experiment as a simple alternative to that PR that just tweaks the existing threshold, since that PR showed that 3×Align (like `String`) currently doesn't work as well as it could.
This commit is contained in:
commit
4cb17b4e78
@ -736,7 +736,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
|
|||||||
// tends to copy the whole thing to stack rather than doing it one part
|
// tends to copy the whole thing to stack rather than doing it one part
|
||||||
// at a time, so instead treat them as one-element slices and piggy-back
|
// at a time, so instead treat them as one-element slices and piggy-back
|
||||||
// the slice optimizations that will split up the swaps.
|
// the slice optimizations that will split up the swaps.
|
||||||
if size_of::<T>() / align_of::<T>() > 4 {
|
if const { size_of::<T>() / align_of::<T>() > 2 } {
|
||||||
// SAFETY: exclusive references always point to one non-overlapping
|
// SAFETY: exclusive references always point to one non-overlapping
|
||||||
// element and are non-null and properly aligned.
|
// element and are non-null and properly aligned.
|
||||||
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
|
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
|
||||||
|
@ -26,12 +26,15 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
|
|||||||
// CHECK-LABEL: @swap_rgb48
|
// CHECK-LABEL: @swap_rgb48
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
||||||
// FIXME: See #115212 for why this has an alloca again
|
// CHECK-NOT: alloca
|
||||||
|
|
||||||
// CHECK: alloca [3 x i16], align 2
|
// Whether `i8` is the best for this is unclear, but
|
||||||
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
// might as well record what's actually happening right now.
|
||||||
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
|
||||||
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
// CHECK: load i8
|
||||||
|
// CHECK: load i8
|
||||||
|
// CHECK: store i8
|
||||||
|
// CHECK: store i8
|
||||||
swap(x, y)
|
swap(x, y)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,10 +44,39 @@ pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
|||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
|
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
|
||||||
// CHECK-NOT: alloca
|
// CHECK-NOT: alloca
|
||||||
// CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2
|
// CHECK-DAG: %[[XVAL:.+]] = load i64, ptr %x, align 2
|
||||||
// CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2
|
// CHECK-DAG: %[[YVAL:.+]] = load i64, ptr %y, align 2
|
||||||
// CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2
|
// CHECK-DAG: store i64 %[[YVAL]], ptr %x, align 2
|
||||||
// CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2
|
// CHECK-DAG: store i64 %[[XVAL]], ptr %y, align 2
|
||||||
|
swap(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @swap_vecs
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
|
||||||
|
// CHECK-NOT: alloca
|
||||||
|
// There are plenty more loads and stores than just these,
|
||||||
|
// but at least one sure better be 64-bit (for size or capacity).
|
||||||
|
// CHECK: load i64
|
||||||
|
// CHECK: load i64
|
||||||
|
// CHECK: store i64
|
||||||
|
// CHECK: store i64
|
||||||
|
// CHECK: ret void
|
||||||
|
swap(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @swap_slices
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
|
||||||
|
// CHECK-NOT: alloca
|
||||||
|
// CHECK: load ptr
|
||||||
|
// CHECK: load i64
|
||||||
|
// CHECK: load ptr
|
||||||
|
// CHECK: load i64
|
||||||
|
// CHECK: store ptr
|
||||||
|
// CHECK: store i64
|
||||||
|
// CHECK: store ptr
|
||||||
|
// CHECK: store i64
|
||||||
swap(x, y)
|
swap(x, y)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,9 +87,9 @@ pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
|
|||||||
// CHECK-LABEL: @swap_rgb24_slices
|
// CHECK-LABEL: @swap_rgb24_slices
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
|
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
|
||||||
// CHECK-NOT: alloca
|
// CHECK-NOT: alloca
|
||||||
// CHECK: load <{{[0-9]+}} x i8>
|
// CHECK: load <{{[0-9]+}} x i8>
|
||||||
// CHECK: store <{{[0-9]+}} x i8>
|
// CHECK: store <{{[0-9]+}} x i8>
|
||||||
if x.len() == y.len() {
|
if x.len() == y.len() {
|
||||||
x.swap_with_slice(y);
|
x.swap_with_slice(y);
|
||||||
}
|
}
|
||||||
@ -69,9 +101,9 @@ pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
|
|||||||
// CHECK-LABEL: @swap_rgba32_slices
|
// CHECK-LABEL: @swap_rgba32_slices
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
|
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
|
||||||
// CHECK-NOT: alloca
|
// CHECK-NOT: alloca
|
||||||
// CHECK: load <{{[0-9]+}} x i32>
|
// CHECK: load <{{[0-9]+}} x i32>
|
||||||
// CHECK: store <{{[0-9]+}} x i32>
|
// CHECK: store <{{[0-9]+}} x i32>
|
||||||
if x.len() == y.len() {
|
if x.len() == y.len() {
|
||||||
x.swap_with_slice(y);
|
x.swap_with_slice(y);
|
||||||
}
|
}
|
||||||
@ -84,10 +116,24 @@ pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
|
|||||||
// CHECK-LABEL: @swap_string_slices
|
// CHECK-LABEL: @swap_string_slices
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
|
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
|
||||||
// CHECK-NOT: alloca
|
// CHECK-NOT: alloca
|
||||||
// CHECK: load <{{[0-9]+}} x i64>
|
// CHECK: load <{{[0-9]+}} x i64>
|
||||||
// CHECK: store <{{[0-9]+}} x i64>
|
// CHECK: store <{{[0-9]+}} x i64>
|
||||||
if x.len() == y.len() {
|
if x.len() == y.len() {
|
||||||
x.swap_with_slice(y);
|
x.swap_with_slice(y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[repr(C, packed)]
|
||||||
|
pub struct Packed {
|
||||||
|
pub first: bool,
|
||||||
|
pub second: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @swap_packed_structs
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
|
||||||
|
// CHECK-NOT: alloca
|
||||||
|
// CHECK: ret void
|
||||||
|
swap(x, y)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user