Auto merge of #122905 - dpaoliello:sync-portable-simd-2024-03-22, r=workingjubilee

Portable SIMD subtree update Syncs nightly to the latest changes from rust-lang/portable-simd r? `@calebzulawski`
2024-03-24 00:02:14 +00:00 · 2024-03-24 00:02:14 +00:00 · 9b8d12cf4c
commit 9b8d12cf4c
parent 2f090c30dd 9e0ec251d5
13 changed files with 505 additions and 52 deletions
--- a/library/portable-simd/.github/workflows/ci.yml
+++ b/library/portable-simd/.github/workflows/ci.yml
@ -141,6 +141,11 @@ jobs:
      - name: Test (release)
        run: cargo test --verbose --target=${{ matrix.target }} --release

+      - name: Generate docs
+        run: cargo doc --verbose --target=${{ matrix.target }}
+        env:
+          RUSTDOCFLAGS: -Dwarnings
+
  wasm-tests:
    name: "wasm (firefox, ${{ matrix.name }})"
    runs-on: ubuntu-latest
--- a/library/portable-simd/Cargo.lock
+++ b/library/portable-simd/Cargo.lock
@ -177,6 +177,9 @@ name = "std_float"
 version = "0.1.0"
 dependencies = [
 "core_simd",
+ "test_helpers",
+ "wasm-bindgen",
+ "wasm-bindgen-test",
 ]

 [[package]]
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@ -13,11 +13,12 @@
    simd_ffi,
    staged_api,
    strict_provenance,
+    prelude_import,
    ptr_metadata
 )]
 #![cfg_attr(
    all(
-        any(target_arch = "aarch64", target_arch = "arm",),
+        any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",),
        any(
            all(target_feature = "v6", not(target_feature = "mclass")),
            all(target_feature = "mclass", target_feature = "dsp"),
@ -33,12 +34,21 @@
    any(target_arch = "powerpc", target_arch = "powerpc64"),
    feature(stdarch_powerpc)
 )]
+#![cfg_attr(
+    all(target_arch = "x86_64", target_feature = "avx512f"),
+    feature(stdarch_x86_avx512)
+)]
 #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
 #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
+#![doc(test(attr(deny(warnings))))]
 #![allow(internal_features)]
 #![unstable(feature = "portable_simd", issue = "86656")]
 //! Portable SIMD module.

+#[prelude_import]
+#[allow(unused_imports)]
+use core::prelude::v1::*;
+
 #[path = "mod.rs"]
 mod core_simd;
 pub use self::core_simd::simd;
--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@ -34,6 +34,7 @@ fn valid<const N: usize>(values: Simd<Self, N>) -> bool
        fn eq(self, other: Self) -> bool;

        fn to_usize(self) -> usize;
+        fn max_unsigned() -> u64;

        type Unsigned: SimdElement;

@ -78,6 +79,11 @@ fn to_usize(self) -> usize {
                self as usize
            }

+            #[inline]
+            fn max_unsigned() -> u64 {
+                <$unsigned>::MAX as u64
+            }
+
            type Unsigned = $unsigned;

            const TRUE: Self = -1;
--- a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
@ -16,7 +16,10 @@ impl<const N: usize> Simd<u8, N>
    #[inline]
    pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
        #![allow(unused_imports, unused_unsafe)]
-        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
+        #[cfg(all(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            target_endian = "little"
+        ))]
        use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8};
        #[cfg(all(
            target_arch = "arm",
@ -37,6 +40,7 @@ pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
                #[cfg(all(
                    any(
                        target_arch = "aarch64",
+                        target_arch = "arm64ec",
                        all(target_arch = "arm", target_feature = "v7")
                    ),
                    target_feature = "neon",
@ -48,7 +52,7 @@ pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
                #[cfg(target_feature = "simd128")]
                16 => transize(wasm::i8x16_swizzle, self, idxs),
                #[cfg(all(
-                    target_arch = "aarch64",
+                    any(target_arch = "aarch64", target_arch = "arm64ec"),
                    target_feature = "neon",
                    target_endian = "little"
                ))]
--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@ -1,5 +1,6 @@
 use crate::simd::{
    cmp::SimdPartialOrd,
+    num::SimdUint,
    ptr::{SimdConstPtr, SimdMutPtr},
    LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
 };
@ -262,6 +263,7 @@ impl<const N: usize> Swizzle<N> for Splat {
    /// # Panics
    ///
    /// Panics if the slice's length is less than the vector's `Simd::N`.
+    /// Use `load_or_default` for an alternative that does not panic.
    ///
    /// # Example
    ///
@ -315,6 +317,143 @@ pub fn copy_to_slice(self, slice: &mut [T]) {
        unsafe { self.store(slice.as_mut_ptr().cast()) }
    }

+    /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for
+    /// the `slice`. Otherwise, the default value for the element type is returned.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::Simd;
+    /// let vec: Vec<i32> = vec![10, 11];
+    ///
+    /// let result = Simd::<i32, 4>::load_or_default(&vec);
+    /// assert_eq!(result, Simd::from_array([10, 11, 0, 0]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn load_or_default(slice: &[T]) -> Self
+    where
+        T: Default,
+    {
+        Self::load_or(slice, Default::default())
+    }
+
+    /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for
+    /// the `slice`. Otherwise, the corresponding value from `or` is passed through.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::Simd;
+    /// let vec: Vec<i32> = vec![10, 11];
+    /// let or = Simd::from_array([-5, -4, -3, -2]);
+    ///
+    /// let result = Simd::load_or(&vec, or);
+    /// assert_eq!(result, Simd::from_array([10, 11, -3, -2]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn load_or(slice: &[T], or: Self) -> Self {
+        Self::load_select(slice, Mask::splat(true), or)
+    }
+
+    /// Reads contiguous elements from `slice`. Each element is read from memory if its
+    /// corresponding element in `enable` is `true`.
+    ///
+    /// When the element is disabled or out of bounds for the slice, that memory location
+    /// is not accessed and the corresponding value from `or` is passed through.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, Mask};
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let enable = Mask::from_array([true, true, false, true]);
+    /// let or = Simd::from_array([-5, -4, -3, -2]);
+    ///
+    /// let result = Simd::load_select(&vec, enable, or);
+    /// assert_eq!(result, Simd::from_array([10, 11, -3, 13]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn load_select_or_default(slice: &[T], enable: Mask<<T as SimdElement>::Mask, N>) -> Self
+    where
+        T: Default,
+    {
+        Self::load_select(slice, enable, Default::default())
+    }
+
+    /// Reads contiguous elements from `slice`. Each element is read from memory if its
+    /// corresponding element in `enable` is `true`.
+    ///
+    /// When the element is disabled or out of bounds for the slice, that memory location
+    /// is not accessed and the corresponding value from `or` is passed through.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, Mask};
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let enable = Mask::from_array([true, true, false, true]);
+    /// let or = Simd::from_array([-5, -4, -3, -2]);
+    ///
+    /// let result = Simd::load_select(&vec, enable, or);
+    /// assert_eq!(result, Simd::from_array([10, 11, -3, 13]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn load_select(
+        slice: &[T],
+        mut enable: Mask<<T as SimdElement>::Mask, N>,
+        or: Self,
+    ) -> Self {
+        enable &= mask_up_to(slice.len());
+        // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to
+        // the element.
+        unsafe { Self::load_select_ptr(slice.as_ptr(), enable, or) }
+    }
+
+    /// Reads contiguous elements from `slice`. Each element is read from memory if its
+    /// corresponding element in `enable` is `true`.
+    ///
+    /// When the element is disabled, that memory location is not accessed and the corresponding
+    /// value from `or` is passed through.
+    #[must_use]
+    #[inline]
+    pub unsafe fn load_select_unchecked(
+        slice: &[T],
+        enable: Mask<<T as SimdElement>::Mask, N>,
+        or: Self,
+    ) -> Self {
+        let ptr = slice.as_ptr();
+        // SAFETY: The safety of reading elements from `slice` is ensured by the caller.
+        unsafe { Self::load_select_ptr(ptr, enable, or) }
+    }
+
+    /// Reads contiguous elements starting at `ptr`. Each element is read from memory if its
+    /// corresponding element in `enable` is `true`.
+    ///
+    /// When the element is disabled, that memory location is not accessed and the corresponding
+    /// value from `or` is passed through.
+    #[must_use]
+    #[inline]
+    pub unsafe fn load_select_ptr(
+        ptr: *const T,
+        enable: Mask<<T as SimdElement>::Mask, N>,
+        or: Self,
+    ) -> Self {
+        // SAFETY: The safety of reading elements through `ptr` is ensured by the caller.
+        unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) }
+    }
+
    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
    /// If an index is out-of-bounds, the element is instead selected from the `or` vector.
    ///
@ -493,6 +632,77 @@ pub unsafe fn gather_select_ptr(
        unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) }
    }

+    /// Conditionally write contiguous elements to `slice`. The `enable` mask controls
+    /// which elements are written, as long as they're in-bounds of the `slice`.
+    /// If the element is disabled or out of bounds, no memory access to that location
+    /// is made.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, Mask};
+    /// let mut arr = [0i32; 4];
+    /// let write = Simd::from_array([-5, -4, -3, -2]);
+    /// let enable = Mask::from_array([false, true, true, true]);
+    ///
+    /// write.store_select(&mut arr[..3], enable);
+    /// assert_eq!(arr, [0, -4, -3, 0]);
+    /// ```
+    #[inline]
+    pub fn store_select(self, slice: &mut [T], mut enable: Mask<<T as SimdElement>::Mask, N>) {
+        enable &= mask_up_to(slice.len());
+        // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to
+        // the element.
+        unsafe { self.store_select_ptr(slice.as_mut_ptr(), enable) }
+    }
+
+    /// Conditionally write contiguous elements to `slice`. The `enable` mask controls
+    /// which elements are written.
+    ///
+    /// # Safety
+    ///
+    /// Every enabled element must be in bounds for the `slice`.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, Mask};
+    /// let mut arr = [0i32; 4];
+    /// let write = Simd::from_array([-5, -4, -3, -2]);
+    /// let enable = Mask::from_array([false, true, true, true]);
+    ///
+    /// unsafe { write.store_select_unchecked(&mut arr, enable) };
+    /// assert_eq!(arr, [0, -4, -3, -2]);
+    /// ```
+    #[inline]
+    pub unsafe fn store_select_unchecked(
+        self,
+        slice: &mut [T],
+        enable: Mask<<T as SimdElement>::Mask, N>,
+    ) {
+        let ptr = slice.as_mut_ptr();
+        // SAFETY: The safety of writing elements in `slice` is ensured by the caller.
+        unsafe { self.store_select_ptr(ptr, enable) }
+    }
+
+    /// Conditionally write contiguous elements starting from `ptr`.
+    /// The `enable` mask controls which elements are written.
+    /// When disabled, the memory location corresponding to that element is not accessed.
+    ///
+    /// # Safety
+    ///
+    /// Memory addresses for element are calculated [`pointer::wrapping_offset`] and
+    /// each enabled element must satisfy the same conditions as [`core::ptr::write`].
+    #[inline]
+    pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<<T as SimdElement>::Mask, N>) {
+        // SAFETY: The safety of writing elements through `ptr` is ensured by the caller.
+        unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) }
+    }
+
    /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
    /// If an index is out-of-bounds, the write is suppressed without panicking.
    /// If two elements in the scattered vector would write to the same index
@ -980,3 +1190,37 @@ unsafe impl<T> SimdElement for *mut T
 {
    type Mask = isize;
 }
+
+#[inline]
+fn lane_indices<const N: usize>() -> Simd<usize, N>
+where
+    LaneCount<N>: SupportedLaneCount,
+{
+    let mut index = [0; N];
+    for i in 0..N {
+        index[i] = i;
+    }
+    Simd::from_array(index)
+}
+
+#[inline]
+fn mask_up_to<M, const N: usize>(len: usize) -> Mask<M, N>
+where
+    LaneCount<N>: SupportedLaneCount,
+    M: MaskElement,
+{
+    let index = lane_indices::<N>();
+    let max_value: u64 = M::max_unsigned();
+    macro_rules! case {
+        ($ty:ty) => {
+            if N < <$ty>::MAX as usize && max_value as $ty as u64 == max_value {
+                return index.cast().simd_lt(Simd::splat(len.min(N) as $ty)).cast();
+            }
+        };
+    }
+    case!(u8);
+    case!(u16);
+    case!(u32);
+    case!(u64);
+    index.simd_lt(Simd::splat(len)).cast()
+}
--- a/library/portable-simd/crates/core_simd/src/vendor.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@ -24,7 +24,7 @@ fn from(value: $from) -> $to {
 #[cfg(target_arch = "wasm32")]
 mod wasm32;

-#[cfg(any(target_arch = "aarch64", target_arch = "arm",))]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",))]
 mod arm;

 #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
--- a/library/portable-simd/crates/core_simd/src/vendor/arm.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor/arm.rs
@ -4,12 +4,13 @@
 #[cfg(target_arch = "arm")]
 use core::arch::arm::*;

-#[cfg(target_arch = "aarch64")]
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 use core::arch::aarch64::*;

 #[cfg(all(
    any(
        target_arch = "aarch64",
+        target_arch = "arm64ec",
        all(target_arch = "arm", target_feature = "v7"),
    ),
    target_endian = "little"
@ -69,7 +70,10 @@ mod simd32 {
    from_transmute! { unsafe Simd<i8, 4> => int8x4_t }
 }

-#[cfg(target_arch = "aarch64")]
+#[cfg(all(
+    any(target_arch = "aarch64", target_arch = "arm64ec"),
+    target_endian = "little"
+))]
 mod aarch64 {
    use super::neon::*;
    use super::*;
--- a/library/portable-simd/crates/core_simd/tests/masked_load_store.rs
+++ b/library/portable-simd/crates/core_simd/tests/masked_load_store.rs
@ -0,0 +1,35 @@
+#![feature(portable_simd)]
+use core_simd::simd::prelude::*;
+
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+#[cfg(target_arch = "wasm32")]
+wasm_bindgen_test_configure!(run_in_browser);
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn masked_load_store() {
+    let mut arr = [u8::MAX; 7];
+
+    u8x4::splat(0).store_select(&mut arr[5..], Mask::from_array([false, true, false, true]));
+    // write to index 8 is OOB and dropped
+    assert_eq!(arr, [255u8, 255, 255, 255, 255, 255, 0]);
+
+    u8x4::from_array([0, 1, 2, 3]).store_select(&mut arr[1..], Mask::splat(true));
+    assert_eq!(arr, [255u8, 0, 1, 2, 3, 255, 0]);
+
+    // read from index 8 is OOB and dropped
+    assert_eq!(
+        u8x4::load_or(&arr[4..], u8x4::splat(42)),
+        u8x4::from_array([3, 255, 0, 42])
+    );
+    assert_eq!(
+        u8x4::load_select(
+            &arr[4..],
+            Mask::from_array([true, false, true, true]),
+            u8x4::splat(42)
+        ),
+        u8x4::from_array([3, 42, 0, 42])
+    );
+}
--- a/library/portable-simd/crates/core_simd/tests/swizzle_dyn.rs
+++ b/library/portable-simd/crates/core_simd/tests/swizzle_dyn.rs
@ -1,6 +1,6 @@
 #![feature(portable_simd)]
 use core::{fmt, ops::RangeInclusive};
-use test_helpers::{self, biteq, make_runner, prop_assert_biteq};
+use test_helpers::{biteq, make_runner, prop_assert_biteq};

 fn swizzle_dyn_scalar_ver<const N: usize>(values: [u8; N], idxs: [u8; N]) -> [u8; N] {
    let mut array = [0; N];
--- a/library/portable-simd/crates/std_float/Cargo.toml
+++ b/library/portable-simd/crates/std_float/Cargo.toml
@ -8,6 +8,13 @@ edition = "2021"
 [dependencies]
 core_simd = { path = "../core_simd", default-features = false }

+[dev-dependencies.test_helpers]
+path = "../test_helpers"
+
+[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
+wasm-bindgen = "0.2"
+wasm-bindgen-test = "0.3"
+
 [features]
 default = ["as_crate"]
 as_crate = []
--- a/library/portable-simd/crates/std_float/src/lib.rs
+++ b/library/portable-simd/crates/std_float/src/lib.rs
@ -1,4 +1,3 @@
-#![cfg_attr(feature = "as_crate", no_std)] // We are std!
 #![cfg_attr(
    feature = "as_crate",
    feature(core_intrinsics),
@ -44,7 +43,7 @@ pub trait Sealed {}
 /// For now this trait is available to permit experimentation with SIMD float
 /// operations that may lack hardware support, such as `mul_add`.
 pub trait StdFloat: Sealed + Sized {
-    /// Fused multiply-add.  Computes `(self * a) + b` with only one rounding error,
+    /// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error,
    /// yielding a more accurate result than an unfused multiply-add.
    ///
    /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
@ -57,22 +56,65 @@ fn mul_add(self, a: Self, b: Self) -> Self {
        unsafe { intrinsics::simd_fma(self, a, b) }
    }

-    /// Produces a vector where every lane has the square root value
-    /// of the equivalently-indexed lane in `self`
+    /// Produces a vector where every element has the square root value
+    /// of the equivalently-indexed element in `self`
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
    fn sqrt(self) -> Self {
        unsafe { intrinsics::simd_fsqrt(self) }
    }

-    /// Returns the smallest integer greater than or equal to each lane.
+    /// Produces a vector where every element has the sine of the value
+    /// in the equivalently-indexed element in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn sin(self) -> Self;
+
+    /// Produces a vector where every element has the cosine of the value
+    /// in the equivalently-indexed element in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn cos(self) -> Self;
+
+    /// Produces a vector where every element has the exponential (base e) of the value
+    /// in the equivalently-indexed element in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn exp(self) -> Self;
+
+    /// Produces a vector where every element has the exponential (base 2) of the value
+    /// in the equivalently-indexed element in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn exp2(self) -> Self;
+
+    /// Produces a vector where every element has the natural logarithm of the value
+    /// in the equivalently-indexed element in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn ln(self) -> Self;
+
+    /// Produces a vector where every element has the logarithm with respect to an arbitrary
+    /// in the equivalently-indexed elements in `self` and `base`.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn log(self, base: Self) -> Self {
+        unsafe { intrinsics::simd_div(self.ln(), base.ln()) }
+    }
+
+    /// Produces a vector where every element has the base-2 logarithm of the value
+    /// in the equivalently-indexed element in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn log2(self) -> Self;
+
+    /// Produces a vector where every element has the base-10 logarithm of the value
+    /// in the equivalently-indexed element in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn log10(self) -> Self;
+
+    /// Returns the smallest integer greater than or equal to each element.
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn ceil(self) -> Self {
        unsafe { intrinsics::simd_ceil(self) }
    }

-    /// Returns the largest integer value less than or equal to each lane.
+    /// Returns the largest integer value less than or equal to each element.
    #[must_use = "method returns a new vector and does not mutate the original value"]
    #[inline]
    fn floor(self) -> Self {
@ -101,46 +143,65 @@ fn trunc(self) -> Self {
 impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
 impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}

-// We can safely just use all the defaults.
-impl<const N: usize> StdFloat for Simd<f32, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
-    /// Returns the floating point's fractional value, with its integer part removed.
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    #[inline]
-    fn fract(self) -> Self {
-        self - self.trunc()
+macro_rules! impl_float {
+    {
+        $($fn:ident: $intrinsic:ident,)*
+    } => {
+        impl<const N: usize> StdFloat for Simd<f32, N>
+        where
+            LaneCount<N>: SupportedLaneCount,
+        {
+            #[inline]
+            fn fract(self) -> Self {
+                self - self.trunc()
+            }
+
+            $(
+            #[inline]
+            fn $fn(self) -> Self {
+                unsafe { intrinsics::$intrinsic(self) }
+            }
+            )*
+        }
+
+        impl<const N: usize> StdFloat for Simd<f64, N>
+        where
+            LaneCount<N>: SupportedLaneCount,
+        {
+            #[inline]
+            fn fract(self) -> Self {
+                self - self.trunc()
+            }
+
+            $(
+            #[inline]
+            fn $fn(self) -> Self {
+                // https://github.com/llvm/llvm-project/issues/83729
+                #[cfg(target_arch = "aarch64")]
+                {
+                    let mut ln = Self::splat(0f64);
+                    for i in 0..N {
+                        ln[i] = self[i].$fn()
+                    }
+                    ln
+                }
+
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    unsafe { intrinsics::$intrinsic(self) }
+                }
+            }
+            )*
+        }
    }
 }

-impl<const N: usize> StdFloat for Simd<f64, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
-    /// Returns the floating point's fractional value, with its integer part removed.
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    #[inline]
-    fn fract(self) -> Self {
-        self - self.trunc()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use simd::prelude::*;
-
-    #[test]
-    fn everything_works() {
-        let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]);
-        let x2 = x + x;
-        let _xc = x.ceil();
-        let _xf = x.floor();
-        let _xr = x.round();
-        let _xt = x.trunc();
-        let _xfma = x.mul_add(x, x);
-        let _xsqrt = x.sqrt();
-        let _ = x2.abs() * x2;
-    }
+impl_float! {
+    sin: simd_fsin,
+    cos: simd_fcos,
+    exp: simd_fexp,
+    exp2: simd_fexp2,
+    ln: simd_flog,
+    log2: simd_flog2,
+    log10: simd_flog10,
 }
--- a/library/portable-simd/crates/std_float/tests/float.rs
+++ b/library/portable-simd/crates/std_float/tests/float.rs
@ -0,0 +1,74 @@
+#![feature(portable_simd)]
+
+macro_rules! unary_test {
+    { $scalar:tt, $($func:tt),+ } => {
+        test_helpers::test_lanes! {
+            $(
+            fn $func<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &core_simd::simd::Simd::<$scalar, LANES>::$func,
+                    &$scalar::$func,
+                    &|_| true,
+                )
+            }
+            )*
+        }
+    }
+}
+
+macro_rules! binary_test {
+    { $scalar:tt, $($func:tt),+ } => {
+        test_helpers::test_lanes! {
+            $(
+            fn $func<const LANES: usize>() {
+                test_helpers::test_binary_elementwise(
+                    &core_simd::simd::Simd::<$scalar, LANES>::$func,
+                    &$scalar::$func,
+                    &|_, _| true,
+                )
+            }
+            )*
+        }
+    }
+}
+
+macro_rules! ternary_test {
+    { $scalar:tt, $($func:tt),+ } => {
+        test_helpers::test_lanes! {
+            $(
+            fn $func<const LANES: usize>() {
+                test_helpers::test_ternary_elementwise(
+                    &core_simd::simd::Simd::<$scalar, LANES>::$func,
+                    &$scalar::$func,
+                    &|_, _, _| true,
+                )
+            }
+            )*
+        }
+    }
+}
+
+macro_rules! impl_tests {
+    { $scalar:tt } => {
+        mod $scalar {
+            use std_float::StdFloat;
+
+            unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc }
+            binary_test! { $scalar, log }
+            ternary_test! { $scalar, mul_add }
+
+            test_helpers::test_lanes! {
+                fn fract<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise_flush_subnormals(
+                        &core_simd::simd::Simd::<$scalar, LANES>::fract,
+                        &$scalar::fract,
+                        &|_| true,
+                    )
+                }
+            }
+        }
+    }
+}
+
+impl_tests! { f32 }
+impl_tests! { f64 }