Feature-flag fused mul-add to block libcalls

This commit is contained in:
Jubilee Young 2021-09-21 18:55:05 -07:00 committed by Jubilee
parent 6d3d07abfe
commit c2f59483f9
3 changed files with 25 additions and 22 deletions

View File

@ -46,13 +46,6 @@ extern "platform-intrinsic" {
/// fabs
pub(crate) fn simd_fabs<T>(x: T) -> T;
/// fsqrt
#[cfg(feature = "std")]
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
/// fma
pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@ -110,6 +103,12 @@ mod std {
// trunc
pub(crate) fn simd_trunc<T>(x: T) -> T;
// fsqrt
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
// fma
pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
}
}

View File

@ -42,6 +42,7 @@ macro_rules! impl_float_vector {
/// architecture has a dedicated `fma` CPU instruction. However, this is not always
/// true, and will be heavily dependent on designing algorithms with specific target
/// hardware in mind.
#[cfg(feature = "std")]
#[inline]
pub fn mul_add(self, a: Self, b: Self) -> Self {
unsafe { intrinsics::simd_fma(self, a, b) }

View File

@ -437,14 +437,6 @@ macro_rules! impl_float_tests {
)
}
fn mul_add<const LANES: usize>() {
test_helpers::test_ternary_elementwise(
&Vector::<LANES>::mul_add,
&Scalar::mul_add,
&|_, _, _| true,
)
}
fn recip<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::recip,
@ -601,13 +593,24 @@ macro_rules! impl_float_tests {
}
#[cfg(feature = "std")]
test_helpers::test_lanes! {
fn sqrt<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::sqrt,
&Scalar::sqrt,
&|_| true,
)
mod std {
use super::*;
test_helpers::test_lanes! {
fn sqrt<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::sqrt,
&Scalar::sqrt,
&|_| true,
)
}
fn mul_add<const LANES: usize>() {
test_helpers::test_ternary_elementwise(
&Vector::<LANES>::mul_add,
&Scalar::mul_add,
&|_, _, _| true,
)
}
}
}
}