Use -0.0 in intrinsics::simd::reduce_add_unordered
-0.0 is the actual neutral additive float, not +0.0, and this matters to codegen.
This commit is contained in:
parent
0307e401c2
commit
ab8c202527
@ -2090,14 +2090,14 @@ macro_rules! arith_red {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, 0.0);
|
arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
|
||||||
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
|
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
|
||||||
arith_red!(
|
arith_red!(
|
||||||
simd_reduce_add_unordered: vector_reduce_add,
|
simd_reduce_add_unordered: vector_reduce_add,
|
||||||
vector_reduce_fadd_reassoc,
|
vector_reduce_fadd_reassoc,
|
||||||
false,
|
false,
|
||||||
add,
|
add,
|
||||||
0.0
|
-0.0
|
||||||
);
|
);
|
||||||
arith_red!(
|
arith_red!(
|
||||||
simd_reduce_mul_unordered: vector_reduce_mul,
|
simd_reduce_mul_unordered: vector_reduce_mul,
|
||||||
|
29
tests/assembly/simd/reduce-fadd-unordered.rs
Normal file
29
tests/assembly/simd/reduce-fadd-unordered.rs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
//@ revisions: x86_64 aarch64
|
||||||
|
//@ assembly-output: emit-asm
|
||||||
|
//@ compile-flags: --crate-type=lib -O
|
||||||
|
//@[aarch64] only-aarch64
|
||||||
|
//@[x86_64] only-x86_64
|
||||||
|
//@[x86_64] compile-flags: -Ctarget-feature=+sse3
|
||||||
|
#![feature(portable_simd)]
|
||||||
|
#![feature(core_intrinsics)]
|
||||||
|
use std::intrinsics::simd as intrinsics;
|
||||||
|
use std::simd::*;
|
||||||
|
// Regression test for https://github.com/rust-lang/rust/issues/130028
|
||||||
|
// This intrinsic produces much worse code if you use +0.0 instead of -0.0 because
|
||||||
|
// +0.0 isn't as easy to algebraically reassociate, even using LLVM's reassoc attribute!
|
||||||
|
// It would emit about an extra fadd, depending on the architecture.
|
||||||
|
|
||||||
|
// CHECK-LABEL: reduce_fadd_negative_zero
|
||||||
|
pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
|
||||||
|
// x86_64: addps
|
||||||
|
// x86_64-NEXT: movshdup
|
||||||
|
// x86_64-NEXT: addss
|
||||||
|
// x86_64-NOT: xorps
|
||||||
|
|
||||||
|
// aarch64: faddp
|
||||||
|
// aarch64-NEXT: faddp
|
||||||
|
|
||||||
|
// CHECK-NOT: {{f?}}add{{p?s*}}
|
||||||
|
// CHECK: ret
|
||||||
|
intrinsics::simd_reduce_add_unordered(v)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user