Auto merge of #115236 - scottmcm:less-vector, r=compiler-errors
Stop emitting non-power-of-two vectors in (non-portable-SIMD) codegen Fixes #115212 It's unclear what makes this not work sometimes, since it often *does* work, so for now just disable the unusual cases. A future PR can consider doing something smarter, but this is an easy and safe tweak that we can do to resolve the regressions for now.
This commit is contained in:
commit
766c0c0b83
@ -405,7 +405,11 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
|
|||||||
|
|
||||||
// Vectors, even for non-power-of-two sizes, have the same layout as
|
// Vectors, even for non-power-of-two sizes, have the same layout as
|
||||||
// arrays but don't count as aggregate types
|
// arrays but don't count as aggregate types
|
||||||
|
// While LLVM theoretically supports non-power-of-two sizes, and they
|
||||||
|
// often work fine, sometimes x86-isel deals with them horribly
|
||||||
|
// (see #115212) so for now only use power-of-two ones.
|
||||||
if let FieldsShape::Array { count, .. } = self.layout.fields()
|
if let FieldsShape::Array { count, .. } = self.layout.fields()
|
||||||
|
&& count.is_power_of_two()
|
||||||
&& let element = self.field(cx, 0)
|
&& let element = self.field(cx, 0)
|
||||||
&& element.ty.is_integral()
|
&& element.ty.is_integral()
|
||||||
{
|
{
|
||||||
|
@ -33,12 +33,21 @@ pub fn replace_ref_str<'a>(r: &mut &'a str, v: &'a str) -> &'a str {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
// CHECK-LABEL: @replace_short_array(
|
// CHECK-LABEL: @replace_short_array_3(
|
||||||
pub fn replace_short_array(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] {
|
pub fn replace_short_array_3(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] {
|
||||||
// CHECK-NOT: alloca
|
// CHECK-NOT: alloca
|
||||||
// CHECK: %[[R:.+]] = load <3 x i32>, ptr %r, align 4
|
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %result, ptr align 4 %r, i64 12, i1 false)
|
||||||
// CHECK: store <3 x i32> %[[R]], ptr %result
|
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %r, ptr align 4 %v, i64 12, i1 false)
|
||||||
// CHECK: %[[V:.+]] = load <3 x i32>, ptr %v, align 4
|
std::mem::replace(r, v)
|
||||||
// CHECK: store <3 x i32> %[[V]], ptr %r
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
// CHECK-LABEL: @replace_short_array_4(
|
||||||
|
pub fn replace_short_array_4(r: &mut [u32; 4], v: [u32; 4]) -> [u32; 4] {
|
||||||
|
// CHECK-NOT: alloca
|
||||||
|
// CHECK: %[[R:.+]] = load <4 x i32>, ptr %r, align 4
|
||||||
|
// CHECK: store <4 x i32> %[[R]], ptr %result
|
||||||
|
// CHECK: %[[V:.+]] = load <4 x i32>, ptr %v, align 4
|
||||||
|
// CHECK: store <4 x i32> %[[V]], ptr %r
|
||||||
std::mem::replace(r, v)
|
std::mem::replace(r, v)
|
||||||
}
|
}
|
||||||
|
@ -11,11 +11,12 @@ type RGB48 = [u16; 3];
|
|||||||
// CHECK-LABEL: @swap_rgb48_manually(
|
// CHECK-LABEL: @swap_rgb48_manually(
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
|
pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
|
||||||
// CHECK-NOT: alloca
|
// FIXME: See #115212 for why this has an alloca again
|
||||||
// CHECK: %[[TEMP0:.+]] = load <3 x i16>, ptr %x, align 2
|
|
||||||
// CHECK: %[[TEMP1:.+]] = load <3 x i16>, ptr %y, align 2
|
// CHECK: alloca [3 x i16], align 2
|
||||||
// CHECK: store <3 x i16> %[[TEMP1]], ptr %x, align 2
|
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||||
// CHECK: store <3 x i16> %[[TEMP0]], ptr %y, align 2
|
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||||
|
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||||
|
|
||||||
let temp = *x;
|
let temp = *x;
|
||||||
*x = *y;
|
*x = *y;
|
||||||
@ -25,11 +26,25 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
|
|||||||
// CHECK-LABEL: @swap_rgb48
|
// CHECK-LABEL: @swap_rgb48
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
||||||
|
// FIXME: See #115212 for why this has an alloca again
|
||||||
|
|
||||||
|
// CHECK: alloca [3 x i16], align 2
|
||||||
|
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||||
|
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||||
|
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
|
||||||
|
swap(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
type RGBA64 = [u16; 4];
|
||||||
|
|
||||||
|
// CHECK-LABEL: @swap_rgba64
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
|
||||||
// CHECK-NOT: alloca
|
// CHECK-NOT: alloca
|
||||||
// CHECK: load <3 x i16>
|
// CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2
|
||||||
// CHECK: load <3 x i16>
|
// CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2
|
||||||
// CHECK: store <3 x i16>
|
// CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2
|
||||||
// CHECK: store <3 x i16>
|
// CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2
|
||||||
swap(x, y)
|
swap(x, y)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user