From 84e305dd93cc83cdd9378d0f6f657d8caaabfad8 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Fri, 25 Aug 2023 20:06:57 -0700 Subject: [PATCH] Stop emitting non-power-of-two vectors in basic LLVM codegen --- compiler/rustc_codegen_llvm/src/type_of.rs | 4 +++ tests/codegen/mem-replace-simple-type.rs | 21 ++++++++++---- tests/codegen/swap-small-types.rs | 33 ++++++++++++++++------ 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs index 2be7bce115d..831645579b9 100644 --- a/compiler/rustc_codegen_llvm/src/type_of.rs +++ b/compiler/rustc_codegen_llvm/src/type_of.rs @@ -405,7 +405,11 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> { // Vectors, even for non-power-of-two sizes, have the same layout as // arrays but don't count as aggregate types + // While LLVM theoretically supports non-power-of-two sizes, and they + // often work fine, sometimes x86-isel deals with them horribly + // (see #115212) so for now only use power-of-two ones. if let FieldsShape::Array { count, .. } = self.layout.fields() + && count.is_power_of_two() && let element = self.field(cx, 0) && element.ty.is_integral() { diff --git a/tests/codegen/mem-replace-simple-type.rs b/tests/codegen/mem-replace-simple-type.rs index 174ac608e01..be3af989ef0 100644 --- a/tests/codegen/mem-replace-simple-type.rs +++ b/tests/codegen/mem-replace-simple-type.rs @@ -33,12 +33,21 @@ pub fn replace_ref_str<'a>(r: &mut &'a str, v: &'a str) -> &'a str { } #[no_mangle] -// CHECK-LABEL: @replace_short_array( -pub fn replace_short_array(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] { +// CHECK-LABEL: @replace_short_array_3( +pub fn replace_short_array_3(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] { // CHECK-NOT: alloca - // CHECK: %[[R:.+]] = load <3 x i32>, ptr %r, align 4 - // CHECK: store <3 x i32> %[[R]], ptr %result - // CHECK: %[[V:.+]] = load <3 x i32>, ptr %v, align 4 - // CHECK: store <3 x i32> %[[V]], ptr %r + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %result, ptr align 4 %r, i64 12, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %r, ptr align 4 %v, i64 12, i1 false) + std::mem::replace(r, v) +} + +#[no_mangle] +// CHECK-LABEL: @replace_short_array_4( +pub fn replace_short_array_4(r: &mut [u32; 4], v: [u32; 4]) -> [u32; 4] { + // CHECK-NOT: alloca + // CHECK: %[[R:.+]] = load <4 x i32>, ptr %r, align 4 + // CHECK: store <4 x i32> %[[R]], ptr %result + // CHECK: %[[V:.+]] = load <4 x i32>, ptr %v, align 4 + // CHECK: store <4 x i32> %[[V]], ptr %r std::mem::replace(r, v) } diff --git a/tests/codegen/swap-small-types.rs b/tests/codegen/swap-small-types.rs index 419645a3fc6..27bc00bc3ab 100644 --- a/tests/codegen/swap-small-types.rs +++ b/tests/codegen/swap-small-types.rs @@ -11,11 +11,12 @@ type RGB48 = [u16; 3]; // CHECK-LABEL: @swap_rgb48_manually( #[no_mangle] pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) { - // CHECK-NOT: alloca - // CHECK: %[[TEMP0:.+]] = load <3 x i16>, ptr %x, align 2 - // CHECK: %[[TEMP1:.+]] = load <3 x i16>, ptr %y, align 2 - // CHECK: store <3 x i16> %[[TEMP1]], ptr %x, align 2 - // CHECK: store <3 x i16> %[[TEMP0]], ptr %y, align 2 + // FIXME: See #115212 for why this has an alloca again + + // CHECK: alloca [3 x i16], align 2 + // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) let temp = *x; *x = *y; @@ -25,11 +26,25 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) { // CHECK-LABEL: @swap_rgb48 #[no_mangle] pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) { + // FIXME: See #115212 for why this has an alloca again + + // CHECK: alloca [3 x i16], align 2 + // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false) + swap(x, y) +} + +type RGBA64 = [u16; 4]; + +// CHECK-LABEL: @swap_rgba64 +#[no_mangle] +pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) { // CHECK-NOT: alloca - // CHECK: load <3 x i16> - // CHECK: load <3 x i16> - // CHECK: store <3 x i16> - // CHECK: store <3 x i16> + // CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2 + // CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2 + // CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2 + // CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2 swap(x, y) }