From 60208a0517dcc4b1358a51b1411f0cf4c8079905 Mon Sep 17 00:00:00 2001 From: AngelicosPhosphoros Date: Sun, 3 Jul 2022 00:31:02 +0300 Subject: [PATCH] Tweak the threshold for chunked swapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to 98892 for the tests I brought in here, as it demonstrated that 3×usize is currently suboptimal. --- library/core/src/mem/mod.rs | 2 +- tests/codegen/swap-small-types.rs | 82 ++++++++++++++++++++++++------- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/library/core/src/mem/mod.rs b/library/core/src/mem/mod.rs index 407954001e4..47dab575253 100644 --- a/library/core/src/mem/mod.rs +++ b/library/core/src/mem/mod.rs @@ -736,7 +736,7 @@ pub const fn swap(x: &mut T, y: &mut T) { // tends to copy the whole thing to stack rather than doing it one part // at a time, so instead treat them as one-element slices and piggy-back // the slice optimizations that will split up the swaps. - if size_of::() / align_of::() > 4 { + if const { size_of::() / align_of::() > 2 } { // SAFETY: exclusive references always point to one non-overlapping // element and are non-null and properly aligned. return unsafe { ptr::swap_nonoverlapping(x, y, 1) }; diff --git a/tests/codegen/swap-small-types.rs b/tests/codegen/swap-small-types.rs index 27bc00bc3ab..2c675174479 100644 --- a/tests/codegen/swap-small-types.rs +++ b/tests/codegen/swap-small-types.rs @@ -26,12 +26,15 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) { // CHECK-LABEL: @swap_rgb48 #[no_mangle] pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) { - // FIXME: See #115212 for why this has an alloca again + // CHECK-NOT: alloca - // CHECK: alloca [3 x i16], align 2 - // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) - // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) - // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) + // Whether `i8` is the best for this is unclear, but + // might as well record what's actually happening right now. + + // CHECK: load i8 + // CHECK: load i8 + // CHECK: store i8 + // CHECK: store i8 swap(x, y) } @@ -41,10 +44,39 @@ pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) { #[no_mangle] pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) { // CHECK-NOT: alloca - // CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2 - // CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2 - // CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2 - // CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2 + // CHECK-DAG: %[[XVAL:.+]] = load i64, ptr %x, align 2 + // CHECK-DAG: %[[YVAL:.+]] = load i64, ptr %y, align 2 + // CHECK-DAG: store i64 %[[YVAL]], ptr %x, align 2 + // CHECK-DAG: store i64 %[[XVAL]], ptr %y, align 2 + swap(x, y) +} + +// CHECK-LABEL: @swap_vecs +#[no_mangle] +pub fn swap_vecs(x: &mut Vec, y: &mut Vec) { + // CHECK-NOT: alloca + // There are plenty more loads and stores than just these, + // but at least one sure better be 64-bit (for size or capacity). + // CHECK: load i64 + // CHECK: load i64 + // CHECK: store i64 + // CHECK: store i64 + // CHECK: ret void + swap(x, y) +} + +// CHECK-LABEL: @swap_slices +#[no_mangle] +pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) { + // CHECK-NOT: alloca + // CHECK: load ptr + // CHECK: load i64 + // CHECK: load ptr + // CHECK: load i64 + // CHECK: store ptr + // CHECK: store i64 + // CHECK: store ptr + // CHECK: store i64 swap(x, y) } @@ -55,9 +87,9 @@ pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) { // CHECK-LABEL: @swap_rgb24_slices #[no_mangle] pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i8> -// CHECK: store <{{[0-9]+}} x i8> + // CHECK-NOT: alloca + // CHECK: load <{{[0-9]+}} x i8> + // CHECK: store <{{[0-9]+}} x i8> if x.len() == y.len() { x.swap_with_slice(y); } @@ -69,9 +101,9 @@ pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) { // CHECK-LABEL: @swap_rgba32_slices #[no_mangle] pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i32> -// CHECK: store <{{[0-9]+}} x i32> + // CHECK-NOT: alloca + // CHECK: load <{{[0-9]+}} x i32> + // CHECK: store <{{[0-9]+}} x i32> if x.len() == y.len() { x.swap_with_slice(y); } @@ -84,10 +116,24 @@ pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) { // CHECK-LABEL: @swap_string_slices #[no_mangle] pub fn swap_string_slices(x: &mut [String], y: &mut [String]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i64> -// CHECK: store <{{[0-9]+}} x i64> + // CHECK-NOT: alloca + // CHECK: load <{{[0-9]+}} x i64> + // CHECK: store <{{[0-9]+}} x i64> if x.len() == y.len() { x.swap_with_slice(y); } } + +#[repr(C, packed)] +pub struct Packed { + pub first: bool, + pub second: usize, +} + +// CHECK-LABEL: @swap_packed_structs +#[no_mangle] +pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) { + // CHECK-NOT: alloca + // CHECK: ret void + swap(x, y) +}