Use -0.0 in intrinsics::simd::reduce_add_unordered

-0.0 is the actual neutral additive float, not +0.0, and this matters to codegen.
2024-09-13 13:36:32 -07:00 · 2024-09-13 13:36:32 -07:00 · ab8c202527
commit ab8c202527
parent 0307e401c2
2 changed files with 31 additions and 2 deletions
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@ -2090,14 +2090,14 @@ macro_rules! arith_red {
        };
    }
-    arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, 0.0);
+    arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
    arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
    arith_red!(
        simd_reduce_add_unordered: vector_reduce_add,
        vector_reduce_fadd_reassoc,
        false,
        add,
-        0.0
+        -0.0
    );
    arith_red!(
        simd_reduce_mul_unordered: vector_reduce_mul,
--- a/tests/assembly/simd/reduce-fadd-unordered.rs
+++ b/tests/assembly/simd/reduce-fadd-unordered.rs
@ -0,0 +1,29 @@
 //@ revisions: x86_64 aarch64
 //@ assembly-output: emit-asm
 //@ compile-flags: --crate-type=lib -O
 //@[aarch64] only-aarch64
 //@[x86_64] only-x86_64
 //@[x86_64] compile-flags: -Ctarget-feature=+sse3
 #![feature(portable_simd)]
 #![feature(core_intrinsics)]
 use std::intrinsics::simd as intrinsics;
 use std::simd::*;
 // Regression test for https://github.com/rust-lang/rust/issues/130028
 // This intrinsic produces much worse code if you use +0.0 instead of -0.0 because
 // +0.0 isn't as easy to algebraically reassociate, even using LLVM's reassoc attribute!
 // It would emit about an extra fadd, depending on the architecture.
 // CHECK-LABEL: reduce_fadd_negative_zero
 pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
    // x86_64: addps
    // x86_64-NEXT: movshdup
    // x86_64-NEXT: addss
    // x86_64-NOT: xorps
    // aarch64: faddp
    // aarch64-NEXT: faddp
    // CHECK-NOT: {{f?}}add{{p?s*}}
    // CHECK: ret
    intrinsics::simd_reduce_add_unordered(v)
 }