Feature-flag fused mul-add to block libcalls

2021-09-21 18:55:05 -07:00 · 2021-09-21 18:55:05 -07:00 · c2f59483f9
commit c2f59483f9
parent 6d3d07abfe
3 changed files with 25 additions and 22 deletions
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@ -46,13 +46,6 @@ extern "platform-intrinsic" {
    /// fabs
    pub(crate) fn simd_fabs<T>(x: T) -> T;

-    /// fsqrt
-    #[cfg(feature = "std")]
-    pub(crate) fn simd_fsqrt<T>(x: T) -> T;
-
-    /// fma
-    pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
-
    pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@ -110,6 +103,12 @@ mod std {

        // trunc
        pub(crate) fn simd_trunc<T>(x: T) -> T;
+
+        // fsqrt
+        pub(crate) fn simd_fsqrt<T>(x: T) -> T;
+
+        // fma
+        pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
    }
 }

--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@ -42,6 +42,7 @@ macro_rules! impl_float_vector {
            /// architecture has a dedicated `fma` CPU instruction.  However, this is not always
            /// true, and will be heavily dependent on designing algorithms with specific target
            /// hardware in mind.
+            #[cfg(feature = "std")]
            #[inline]
            pub fn mul_add(self, a: Self, b: Self) -> Self {
                unsafe { intrinsics::simd_fma(self, a, b) }
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@ -437,14 +437,6 @@ macro_rules! impl_float_tests {
                    )
                }

-                fn mul_add<const LANES: usize>() {
-                    test_helpers::test_ternary_elementwise(
-                        &Vector::<LANES>::mul_add,
-                        &Scalar::mul_add,
-                        &|_, _, _| true,
-                    )
-                }
-
                fn recip<const LANES: usize>() {
                    test_helpers::test_unary_elementwise(
                        &Vector::<LANES>::recip,
@ -601,13 +593,24 @@ macro_rules! impl_float_tests {
            }

            #[cfg(feature = "std")]
-            test_helpers::test_lanes! {
-                fn sqrt<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
-                        &Vector::<LANES>::sqrt,
-                        &Scalar::sqrt,
-                        &|_| true,
-                    )
+            mod std {
+                use super::*;
+                test_helpers::test_lanes! {
+                    fn sqrt<const LANES: usize>() {
+                        test_helpers::test_unary_elementwise(
+                            &Vector::<LANES>::sqrt,
+                            &Scalar::sqrt,
+                            &|_| true,
+                        )
+                    }
+
+                    fn mul_add<const LANES: usize>() {
+                        test_helpers::test_ternary_elementwise(
+                            &Vector::<LANES>::mul_add,
+                            &Scalar::mul_add,
+                            &|_, _, _| true,
+                        )
+                    }
                }
            }
        }