From bb4bba5fcf9fce9208e6c2c1de008cec78a9519c Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Thu, 22 Feb 2024 17:09:20 +0900 Subject: [PATCH 01/13] Remove redundant imports --- crates/core_simd/src/vector.rs | 1 - crates/core_simd/tests/swizzle_dyn.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 9e97a3161bb..46b1acf25dd 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -3,7 +3,6 @@ ptr::{SimdConstPtr, SimdMutPtr}, LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, }; -use core::convert::{TryFrom, TryInto}; /// A SIMD vector with the shape of `[T; N]` but the operations of `T`. /// diff --git a/crates/core_simd/tests/swizzle_dyn.rs b/crates/core_simd/tests/swizzle_dyn.rs index f21a937f01c..19ffe1417c8 100644 --- a/crates/core_simd/tests/swizzle_dyn.rs +++ b/crates/core_simd/tests/swizzle_dyn.rs @@ -1,6 +1,6 @@ #![feature(portable_simd)] use core::{fmt, ops::RangeInclusive}; -use test_helpers::{self, biteq, make_runner, prop_assert_biteq}; +use test_helpers::{biteq, make_runner, prop_assert_biteq}; fn swizzle_dyn_scalar_ver(values: [u8; N], idxs: [u8; N]) -> [u8; N] { let mut array = [0; N]; From 6ce3ab72a07e4aadb6bf9b62427d5c31c4639e59 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Thu, 22 Feb 2024 16:57:30 +0900 Subject: [PATCH 02/13] Fix build error on big endian aarch64 --- crates/core_simd/src/vendor/arm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/vendor/arm.rs b/crates/core_simd/src/vendor/arm.rs index ee5c6421373..233dc080728 100644 --- a/crates/core_simd/src/vendor/arm.rs +++ b/crates/core_simd/src/vendor/arm.rs @@ -69,7 +69,7 @@ mod simd32 { from_transmute! { unsafe Simd => int8x4_t } } -#[cfg(target_arch = "aarch64")] +#[cfg(all(target_arch = "aarch64", target_endian = "little"))] mod aarch64 { use super::neon::*; use super::*; From 18de239ecf5054ce4f284a221c394ceace0cbab5 Mon Sep 17 00:00:00 2001 From: AquaEBM Date: Fri, 23 Feb 2024 12:26:52 +0100 Subject: [PATCH 03/13] add stdarch_x86_avx512 feature flag for AVX-512-supporting architectures --- crates/core_simd/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index a25723e11ce..ecadb56bd12 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -33,6 +33,10 @@ any(target_arch = "powerpc", target_arch = "powerpc64"), feature(stdarch_powerpc) )] +#![cfg_attr( + all(target_arch = "x86_64", target_feature = "avx512f"), + feature(stdarch_x86_avx512) +)] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] #![allow(internal_features)] From 499a53dd71aecfff2bf5863d509b8c21a6541841 Mon Sep 17 00:00:00 2001 From: avhz Date: Sun, 3 Mar 2024 07:24:19 +0100 Subject: [PATCH 04/13] feat: add SIMD float math functions (exp, exp2, log, log2, log10, sin, cos). --- crates/std_float/src/lib.rs | 93 +++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 4c547777fde..98f10e94a30 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -65,6 +65,62 @@ fn sqrt(self) -> Self { unsafe { intrinsics::simd_fsqrt(self) } } + /// Produces a vector where every lane has the sine of the value + /// in the equivalently-indexed lane in `self`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn sin(self) -> Self { + unsafe { intrinsics::simd_fsin(self) } + } + + /// Produces a vector where every lane has the cosine of the value + /// in the equivalently-indexed lane in `self`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn cos(self) -> Self { + unsafe { intrinsics::simd_fcos(self) } + } + + /// Produces a vector where every lane has the exponential (base e) of the value + /// in the equivalently-indexed lane in `self`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn exp(self) -> Self { + unsafe { intrinsics::simd_fexp(self) } + } + + /// Produces a vector where every lane has the exponential (base 2) of the value + /// in the equivalently-indexed lane in `self`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn exp2(self) -> Self { + unsafe { intrinsics::simd_fexp2(self) } + } + + /// Produces a vector where every lane has the natural logarithm of the value + /// in the equivalently-indexed lane in `self`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn log(self) -> Self { + unsafe { intrinsics::simd_flog(self) } + } + + /// Produces a vector where every lane has the base-2 logarithm of the value + /// in the equivalently-indexed lane in `self`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn log2(self) -> Self { + unsafe { intrinsics::simd_flog2(self) } + } + + /// Produces a vector where every lane has the base-10 logarithm of the value + /// in the equivalently-indexed lane in `self`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn log10(self) -> Self { + unsafe { intrinsics::simd_flog10(self) } + } + /// Returns the smallest integer greater than or equal to each lane. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] @@ -127,13 +183,14 @@ fn fract(self) -> Self { } #[cfg(test)] -mod tests { +mod tests_simd_floats { use super::*; use simd::prelude::*; #[test] - fn everything_works() { + fn everything_works_f32() { let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); + let x2 = x + x; let _xc = x.ceil(); let _xf = x.floor(); @@ -141,6 +198,36 @@ fn everything_works() { let _xt = x.trunc(); let _xfma = x.mul_add(x, x); let _xsqrt = x.sqrt(); - let _ = x2.abs() * x2; + let _abs_mul = x2.abs() * x2; + + let _fexp = x.exp(); + let _fexp2 = x.exp2(); + let _flog = x.log(); + let _flog2 = x.log2(); + let _flog10 = x.log10(); + let _fsin = x.sin(); + let _fcos = x.cos(); + } + + #[test] + fn everything_works_f64() { + let x = f64x4::from_array([0.1, 0.5, 0.6, -1.5]); + + let x2 = x + x; + let _xc = x.ceil(); + let _xf = x.floor(); + let _xr = x.round(); + let _xt = x.trunc(); + let _xfma = x.mul_add(x, x); + let _xsqrt = x.sqrt(); + let _abs_mul = x2.abs() * x2; + + let _fexp = x.exp(); + let _fexp2 = x.exp2(); + let _flog = x.log(); + let _flog2 = x.log2(); + let _flog10 = x.log10(); + let _fsin = x.sin(); + let _fcos = x.cos(); } } From 5b5b259bf3488a07501e638b0af1d00af0c3c1c0 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 3 Mar 2024 10:06:20 -0500 Subject: [PATCH 05/13] Test std_float --- Cargo.lock | 1 + crates/std_float/Cargo.toml | 3 ++ crates/std_float/src/lib.rs | 61 ++++++------------------------- crates/std_float/tests/float.rs | 64 +++++++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 51 deletions(-) create mode 100644 crates/std_float/tests/float.rs diff --git a/Cargo.lock b/Cargo.lock index 46312c09657..1ede15ff002 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -177,6 +177,7 @@ name = "std_float" version = "0.1.0" dependencies = [ "core_simd", + "test_helpers", ] [[package]] diff --git a/crates/std_float/Cargo.toml b/crates/std_float/Cargo.toml index 84c69774cbd..8842b226104 100644 --- a/crates/std_float/Cargo.toml +++ b/crates/std_float/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" [dependencies] core_simd = { path = "../core_simd", default-features = false } +[dev-dependencies.test_helpers] +path = "../test_helpers" + [features] default = ["as_crate"] as_crate = [] diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 98f10e94a30..5237f7cce15 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -101,10 +101,19 @@ fn exp2(self) -> Self { /// in the equivalently-indexed lane in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn log(self) -> Self { + fn ln(self) -> Self { unsafe { intrinsics::simd_flog(self) } } + /// Produces a vector where every lane has the logarithm with respect to an arbitrary + /// in the equivalently-indexed lanes in `self` and `base`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn log(self, base: Self) -> Self { + unsafe { intrinsics::simd_div(self.ln(), base.ln()) } + } + + /// Produces a vector where every lane has the base-2 logarithm of the value /// in the equivalently-indexed lane in `self`. #[inline] @@ -181,53 +190,3 @@ fn fract(self) -> Self { self - self.trunc() } } - -#[cfg(test)] -mod tests_simd_floats { - use super::*; - use simd::prelude::*; - - #[test] - fn everything_works_f32() { - let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); - - let x2 = x + x; - let _xc = x.ceil(); - let _xf = x.floor(); - let _xr = x.round(); - let _xt = x.trunc(); - let _xfma = x.mul_add(x, x); - let _xsqrt = x.sqrt(); - let _abs_mul = x2.abs() * x2; - - let _fexp = x.exp(); - let _fexp2 = x.exp2(); - let _flog = x.log(); - let _flog2 = x.log2(); - let _flog10 = x.log10(); - let _fsin = x.sin(); - let _fcos = x.cos(); - } - - #[test] - fn everything_works_f64() { - let x = f64x4::from_array([0.1, 0.5, 0.6, -1.5]); - - let x2 = x + x; - let _xc = x.ceil(); - let _xf = x.floor(); - let _xr = x.round(); - let _xt = x.trunc(); - let _xfma = x.mul_add(x, x); - let _xsqrt = x.sqrt(); - let _abs_mul = x2.abs() * x2; - - let _fexp = x.exp(); - let _fexp2 = x.exp2(); - let _flog = x.log(); - let _flog2 = x.log2(); - let _flog10 = x.log10(); - let _fsin = x.sin(); - let _fcos = x.cos(); - } -} diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs new file mode 100644 index 00000000000..60bdf00fba8 --- /dev/null +++ b/crates/std_float/tests/float.rs @@ -0,0 +1,64 @@ +#![feature(portable_simd)] + +macro_rules! unary_test { + { $scalar:tt, $($func:tt),+ } => { + test_helpers::test_lanes! { + $( + fn $func() { + test_helpers::test_unary_elementwise( + &core_simd::simd::Simd::<$scalar, LANES>::$func, + &$scalar::$func, + &|_| true, + ) + } + )* + } + } +} + +macro_rules! binary_test { + { $scalar:tt, $($func:tt),+ } => { + test_helpers::test_lanes! { + $( + fn $func() { + test_helpers::test_binary_elementwise( + &core_simd::simd::Simd::<$scalar, LANES>::$func, + &$scalar::$func, + &|_, _| true, + ) + } + )* + } + } +} + +macro_rules! ternary_test { + { $scalar:tt, $($func:tt),+ } => { + test_helpers::test_lanes! { + $( + fn $func() { + test_helpers::test_ternary_elementwise( + &core_simd::simd::Simd::<$scalar, LANES>::$func, + &$scalar::$func, + &|_, _, _| true, + ) + } + )* + } + } +} + +macro_rules! impl_tests { + { $scalar:tt } => { + mod $scalar { + use std_float::StdFloat; + + unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc, fract } + binary_test! { $scalar, log } + ternary_test! { $scalar, mul_add } + } + } +} + +impl_tests! { f32 } +impl_tests! { f64 } From e5d5006cf319ef31027d0a0c6c20aa136f7737a4 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 3 Mar 2024 10:11:52 -0500 Subject: [PATCH 06/13] Update docs --- crates/std_float/src/lib.rs | 42 ++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 5237f7cce15..23ee55d8ac3 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -44,7 +44,7 @@ pub trait Sealed {} /// For now this trait is available to permit experimentation with SIMD float /// operations that may lack hardware support, such as `mul_add`. pub trait StdFloat: Sealed + Sized { - /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, + /// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error, /// yielding a more accurate result than an unfused multiply-add. /// /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target @@ -57,56 +57,56 @@ fn mul_add(self, a: Self, b: Self) -> Self { unsafe { intrinsics::simd_fma(self, a, b) } } - /// Produces a vector where every lane has the square root value - /// of the equivalently-indexed lane in `self` + /// Produces a vector where every element has the square root value + /// of the equivalently-indexed element in `self` #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn sqrt(self) -> Self { unsafe { intrinsics::simd_fsqrt(self) } } - /// Produces a vector where every lane has the sine of the value - /// in the equivalently-indexed lane in `self`. + /// Produces a vector where every element has the sine of the value + /// in the equivalently-indexed element in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn sin(self) -> Self { unsafe { intrinsics::simd_fsin(self) } } - /// Produces a vector where every lane has the cosine of the value - /// in the equivalently-indexed lane in `self`. + /// Produces a vector where every element has the cosine of the value + /// in the equivalently-indexed element in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn cos(self) -> Self { unsafe { intrinsics::simd_fcos(self) } } - /// Produces a vector where every lane has the exponential (base e) of the value - /// in the equivalently-indexed lane in `self`. + /// Produces a vector where every element has the exponential (base e) of the value + /// in the equivalently-indexed element in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn exp(self) -> Self { unsafe { intrinsics::simd_fexp(self) } } - /// Produces a vector where every lane has the exponential (base 2) of the value - /// in the equivalently-indexed lane in `self`. + /// Produces a vector where every element has the exponential (base 2) of the value + /// in the equivalently-indexed element in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn exp2(self) -> Self { unsafe { intrinsics::simd_fexp2(self) } } - /// Produces a vector where every lane has the natural logarithm of the value - /// in the equivalently-indexed lane in `self`. + /// Produces a vector where every element has the natural logarithm of the value + /// in the equivalently-indexed element in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn ln(self) -> Self { unsafe { intrinsics::simd_flog(self) } } - /// Produces a vector where every lane has the logarithm with respect to an arbitrary - /// in the equivalently-indexed lanes in `self` and `base`. + /// Produces a vector where every element has the logarithm with respect to an arbitrary + /// in the equivalently-indexed elements in `self` and `base`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn log(self, base: Self) -> Self { @@ -114,30 +114,30 @@ fn log(self, base: Self) -> Self { } - /// Produces a vector where every lane has the base-2 logarithm of the value - /// in the equivalently-indexed lane in `self`. + /// Produces a vector where every element has the base-2 logarithm of the value + /// in the equivalently-indexed element in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn log2(self) -> Self { unsafe { intrinsics::simd_flog2(self) } } - /// Produces a vector where every lane has the base-10 logarithm of the value - /// in the equivalently-indexed lane in `self`. + /// Produces a vector where every element has the base-10 logarithm of the value + /// in the equivalently-indexed element in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn log10(self) -> Self { unsafe { intrinsics::simd_flog10(self) } } - /// Returns the smallest integer greater than or equal to each lane. + /// Returns the smallest integer greater than or equal to each element. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] fn ceil(self) -> Self { unsafe { intrinsics::simd_ceil(self) } } - /// Returns the largest integer value less than or equal to each lane. + /// Returns the largest integer value less than or equal to each element. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] fn floor(self) -> Self { From bcedde54568f6bfb0f150ff8fe5d26f427b129d3 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 3 Mar 2024 10:28:33 -0500 Subject: [PATCH 07/13] Fix formatting --- crates/std_float/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 23ee55d8ac3..44bbcba412f 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -113,7 +113,6 @@ fn log(self, base: Self) -> Self { unsafe { intrinsics::simd_div(self.ln(), base.ln()) } } - /// Produces a vector where every element has the base-2 logarithm of the value /// in the equivalently-indexed element in `self`. #[inline] From 2f062b8f5eb40d566b2e88ab960b176687167faa Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 3 Mar 2024 10:29:32 -0500 Subject: [PATCH 08/13] Fix wasm tests --- crates/std_float/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/std_float/Cargo.toml b/crates/std_float/Cargo.toml index 8842b226104..0896094ee63 100644 --- a/crates/std_float/Cargo.toml +++ b/crates/std_float/Cargo.toml @@ -11,6 +11,10 @@ core_simd = { path = "../core_simd", default-features = false } [dev-dependencies.test_helpers] path = "../test_helpers" +[target.'cfg(target_arch = "wasm32")'.dev-dependencies] +wasm-bindgen = "0.2" +wasm-bindgen-test = "0.3" + [features] default = ["as_crate"] as_crate = [] From 278eb287b34d6335a3ac4720f1517a12671fd0b8 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 3 Mar 2024 11:28:58 -0500 Subject: [PATCH 09/13] Attempt to avoid LLVM error --- Cargo.lock | 2 + crates/std_float/src/lib.rs | 114 ++++++++++++++++++-------------- crates/std_float/tests/float.rs | 12 +++- 3 files changed, 78 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ede15ff002..1584c704fb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -178,6 +178,8 @@ version = "0.1.0" dependencies = [ "core_simd", "test_helpers", + "wasm-bindgen", + "wasm-bindgen-test", ] [[package]] diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 44bbcba412f..148aa5f9f17 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -1,4 +1,3 @@ -#![cfg_attr(feature = "as_crate", no_std)] // We are std! #![cfg_attr( feature = "as_crate", feature(core_intrinsics), @@ -67,43 +66,28 @@ fn sqrt(self) -> Self { /// Produces a vector where every element has the sine of the value /// in the equivalently-indexed element in `self`. - #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn sin(self) -> Self { - unsafe { intrinsics::simd_fsin(self) } - } + fn sin(self) -> Self; /// Produces a vector where every element has the cosine of the value /// in the equivalently-indexed element in `self`. - #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn cos(self) -> Self { - unsafe { intrinsics::simd_fcos(self) } - } + fn cos(self) -> Self; /// Produces a vector where every element has the exponential (base e) of the value /// in the equivalently-indexed element in `self`. - #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn exp(self) -> Self { - unsafe { intrinsics::simd_fexp(self) } - } + fn exp(self) -> Self; /// Produces a vector where every element has the exponential (base 2) of the value /// in the equivalently-indexed element in `self`. - #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn exp2(self) -> Self { - unsafe { intrinsics::simd_fexp2(self) } - } + fn exp2(self) -> Self; /// Produces a vector where every element has the natural logarithm of the value /// in the equivalently-indexed element in `self`. - #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn ln(self) -> Self { - unsafe { intrinsics::simd_flog(self) } - } + fn ln(self) -> Self; /// Produces a vector where every element has the logarithm with respect to an arbitrary /// in the equivalently-indexed elements in `self` and `base`. @@ -115,19 +99,13 @@ fn log(self, base: Self) -> Self { /// Produces a vector where every element has the base-2 logarithm of the value /// in the equivalently-indexed element in `self`. - #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn log2(self) -> Self { - unsafe { intrinsics::simd_flog2(self) } - } + fn log2(self) -> Self; /// Produces a vector where every element has the base-10 logarithm of the value /// in the equivalently-indexed element in `self`. - #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn log10(self) -> Self { - unsafe { intrinsics::simd_flog10(self) } - } + fn log10(self) -> Self; /// Returns the smallest integer greater than or equal to each element. #[must_use = "method returns a new vector and does not mutate the original value"] @@ -165,27 +143,65 @@ fn trunc(self) -> Self { impl Sealed for Simd where LaneCount: SupportedLaneCount {} impl Sealed for Simd where LaneCount: SupportedLaneCount {} -// We can safely just use all the defaults. -impl StdFloat for Simd -where - LaneCount: SupportedLaneCount, -{ - /// Returns the floating point's fractional value, with its integer part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - fn fract(self) -> Self { - self - self.trunc() +macro_rules! impl_float { + { + $($fn:ident: $intrinsic:ident,)* + } => { + impl StdFloat for Simd + where + LaneCount: SupportedLaneCount, + { + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } + + $( + #[inline] + fn $fn(self) -> Self { + unsafe { intrinsics::$intrinsic(self) } + } + )* + } + + impl StdFloat for Simd + where + LaneCount: SupportedLaneCount, + { + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } + + $( + #[inline] + fn $fn(self) -> Self { + // https://github.com/llvm/llvm-project/issues/83729 + #[cfg(target_arch = "aarch64")] + { + let mut ln = Self::splat(0f64); + for i in 0..N { + ln[i] = self[i].$fn() + } + ln + } + + #[cfg(not(target_arch = "aarch64"))] + { + unsafe { intrinsics::$intrinsic(self) } + } + } + )* + } } } -impl StdFloat for Simd -where - LaneCount: SupportedLaneCount, -{ - /// Returns the floating point's fractional value, with its integer part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - fn fract(self) -> Self { - self - self.trunc() - } +impl_float! { + sin: simd_fsin, + cos: simd_fcos, + exp: simd_fexp, + exp2: simd_fexp2, + ln: simd_flog, + log2: simd_flog2, + log10: simd_flog10, } diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs index 60bdf00fba8..c66c968f8c6 100644 --- a/crates/std_float/tests/float.rs +++ b/crates/std_float/tests/float.rs @@ -53,9 +53,19 @@ macro_rules! impl_tests { mod $scalar { use std_float::StdFloat; - unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc, fract } + unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc } binary_test! { $scalar, log } ternary_test! { $scalar, mul_add } + + test_helpers::test_lanes! { + fn fract() { + test_helpers::test_unary_elementwise_flush_subnormals( + &core_simd::simd::Simd::<$scalar, LANES>::fract, + &$scalar::fract, + &|_| true, + ) + } + } } } } From ca4033f49b1f6019561b8b161b4097b4a07f2e1b Mon Sep 17 00:00:00 2001 From: Daniel Paoliello Date: Thu, 22 Feb 2024 16:02:22 -0800 Subject: [PATCH 10/13] Add arm64ec support --- crates/core_simd/src/lib.rs | 2 +- crates/core_simd/src/swizzle_dyn.rs | 8 ++++++-- crates/core_simd/src/vendor.rs | 2 +- crates/core_simd/src/vendor/arm.rs | 8 ++++++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index ecadb56bd12..8aee556b772 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -17,7 +17,7 @@ )] #![cfg_attr( all( - any(target_arch = "aarch64", target_arch = "arm",), + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",), any( all(target_feature = "v6", not(target_feature = "mclass")), all(target_feature = "mclass", target_feature = "dsp"), diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index ae9ff6894b0..8a1079042f0 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -16,7 +16,10 @@ impl Simd #[inline] pub fn swizzle_dyn(self, idxs: Simd) -> Self { #![allow(unused_imports, unused_unsafe)] - #[cfg(all(target_arch = "aarch64", target_endian = "little"))] + #[cfg(all( + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ))] use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8}; #[cfg(all( target_arch = "arm", @@ -37,6 +40,7 @@ pub fn swizzle_dyn(self, idxs: Simd) -> Self { #[cfg(all( any( target_arch = "aarch64", + target_arch = "arm64ec", all(target_arch = "arm", target_feature = "v7") ), target_feature = "neon", @@ -48,7 +52,7 @@ pub fn swizzle_dyn(self, idxs: Simd) -> Self { #[cfg(target_feature = "simd128")] 16 => transize(wasm::i8x16_swizzle, self, idxs), #[cfg(all( - target_arch = "aarch64", + any(target_arch = "aarch64", target_arch = "arm64ec"), target_feature = "neon", target_endian = "little" ))] diff --git a/crates/core_simd/src/vendor.rs b/crates/core_simd/src/vendor.rs index 6223bedb4e1..1a34a3a8de5 100644 --- a/crates/core_simd/src/vendor.rs +++ b/crates/core_simd/src/vendor.rs @@ -24,7 +24,7 @@ fn from(value: $from) -> $to { #[cfg(target_arch = "wasm32")] mod wasm32; -#[cfg(any(target_arch = "aarch64", target_arch = "arm",))] +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",))] mod arm; #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] diff --git a/crates/core_simd/src/vendor/arm.rs b/crates/core_simd/src/vendor/arm.rs index 233dc080728..f8878d11f09 100644 --- a/crates/core_simd/src/vendor/arm.rs +++ b/crates/core_simd/src/vendor/arm.rs @@ -4,12 +4,13 @@ #[cfg(target_arch = "arm")] use core::arch::arm::*; -#[cfg(target_arch = "aarch64")] +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] use core::arch::aarch64::*; #[cfg(all( any( target_arch = "aarch64", + target_arch = "arm64ec", all(target_arch = "arm", target_feature = "v7"), ), target_endian = "little" @@ -69,7 +70,10 @@ mod simd32 { from_transmute! { unsafe Simd => int8x4_t } } -#[cfg(all(target_arch = "aarch64", target_endian = "little"))] +#[cfg(all( + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" +))] mod aarch64 { use super::neon::*; use super::*; From 4f0ba1ae1963cf785370f0b951018ed214dc48bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Oko=C5=84ski?= Date: Thu, 29 Feb 2024 17:09:59 +0100 Subject: [PATCH 11/13] Add support for masked loads & stores --- crates/core_simd/src/masks.rs | 6 + crates/core_simd/src/vector.rs | 244 ++++++++++++++++++++ crates/core_simd/tests/masked_load_store.rs | 35 +++ 3 files changed, 285 insertions(+) create mode 100644 crates/core_simd/tests/masked_load_store.rs diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index e480c25a51e..e6e27c76a5e 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -34,6 +34,7 @@ fn valid(values: Simd) -> bool fn eq(self, other: Self) -> bool; fn to_usize(self) -> usize; + fn max_unsigned() -> u64; type Unsigned: SimdElement; @@ -78,6 +79,11 @@ fn to_usize(self) -> usize { self as usize } + #[inline] + fn max_unsigned() -> u64 { + <$unsigned>::MAX as u64 + } + type Unsigned = $unsigned; const TRUE: Self = -1; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 46b1acf25dd..ee0926bcae8 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,5 +1,6 @@ use crate::simd::{ cmp::SimdPartialOrd, + num::SimdUint, ptr::{SimdConstPtr, SimdMutPtr}, LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, }; @@ -261,6 +262,7 @@ impl Swizzle for Splat { /// # Panics /// /// Panics if the slice's length is less than the vector's `Simd::N`. + /// Use `load_or_default` for an alternative that does not panic. /// /// # Example /// @@ -314,6 +316,143 @@ pub fn copy_to_slice(self, slice: &mut [T]) { unsafe { self.store(slice.as_mut_ptr().cast()) } } + /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for + /// the `slice`. Otherwise, the default value for the element type is returned. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11]; + /// + /// let result = Simd::::load_or_default(&vec); + /// assert_eq!(result, Simd::from_array([10, 11, 0, 0])); + /// ``` + #[must_use] + #[inline] + pub fn load_or_default(slice: &[T]) -> Self + where + T: Default, + { + Self::load_or(slice, Default::default()) + } + + /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for + /// the `slice`. Otherwise, the corresponding value from `or` is passed through. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11]; + /// let or = Simd::from_array([-5, -4, -3, -2]); + /// + /// let result = Simd::load_or(&vec, or); + /// assert_eq!(result, Simd::from_array([10, 11, -3, -2])); + /// ``` + #[must_use] + #[inline] + pub fn load_or(slice: &[T], or: Self) -> Self { + Self::load_select(slice, Mask::splat(true), or) + } + + /// Reads contiguous elements from `slice`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled or out of bounds for the slice, that memory location + /// is not accessed and the corresponding value from `or` is passed through. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let enable = Mask::from_array([true, true, false, true]); + /// let or = Simd::from_array([-5, -4, -3, -2]); + /// + /// let result = Simd::load_select(&vec, enable, or); + /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); + /// ``` + #[must_use] + #[inline] + pub fn load_select_or_default(slice: &[T], enable: Mask<::Mask, N>) -> Self + where + T: Default, + { + Self::load_select(slice, enable, Default::default()) + } + + /// Reads contiguous elements from `slice`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled or out of bounds for the slice, that memory location + /// is not accessed and the corresponding value from `or` is passed through. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let enable = Mask::from_array([true, true, false, true]); + /// let or = Simd::from_array([-5, -4, -3, -2]); + /// + /// let result = Simd::load_select(&vec, enable, or); + /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); + /// ``` + #[must_use] + #[inline] + pub fn load_select( + slice: &[T], + mut enable: Mask<::Mask, N>, + or: Self, + ) -> Self { + enable &= mask_up_to(slice.len()); + // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to + // the element. + unsafe { Self::load_select_ptr(slice.as_ptr(), enable, or) } + } + + /// Reads contiguous elements from `slice`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled, that memory location is not accessed and the corresponding + /// value from `or` is passed through. + #[must_use] + #[inline] + pub unsafe fn load_select_unchecked( + slice: &[T], + enable: Mask<::Mask, N>, + or: Self, + ) -> Self { + let ptr = slice.as_ptr(); + // SAFETY: The safety of reading elements from `slice` is ensured by the caller. + unsafe { Self::load_select_ptr(ptr, enable, or) } + } + + /// Reads contiguous elements starting at `ptr`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled, that memory location is not accessed and the corresponding + /// value from `or` is passed through. + #[must_use] + #[inline] + pub unsafe fn load_select_ptr( + ptr: *const T, + enable: Mask<::Mask, N>, + or: Self, + ) -> Self { + // SAFETY: The safety of reading elements through `ptr` is ensured by the caller. + unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) } + } + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. /// If an index is out-of-bounds, the element is instead selected from the `or` vector. /// @@ -492,6 +631,77 @@ pub unsafe fn gather_select_ptr( unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) } } + /// Conditionally write contiguous elements to `slice`. The `enable` mask controls + /// which elements are written, as long as they're in-bounds of the `slice`. + /// If the element is disabled or out of bounds, no memory access to that location + /// is made. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let mut arr = [0i32; 4]; + /// let write = Simd::from_array([-5, -4, -3, -2]); + /// let enable = Mask::from_array([false, true, true, true]); + /// + /// write.store_select(&mut arr[..3], enable); + /// assert_eq!(arr, [0, -4, -3, 0]); + /// ``` + #[inline] + pub fn store_select(self, slice: &mut [T], mut enable: Mask<::Mask, N>) { + enable &= mask_up_to(slice.len()); + // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to + // the element. + unsafe { self.store_select_ptr(slice.as_mut_ptr(), enable) } + } + + /// Conditionally write contiguous elements to `slice`. The `enable` mask controls + /// which elements are written. + /// + /// # Safety + /// + /// Every enabled element must be in bounds for the `slice`. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let mut arr = [0i32; 4]; + /// let write = Simd::from_array([-5, -4, -3, -2]); + /// let enable = Mask::from_array([false, true, true, true]); + /// + /// unsafe { write.store_select_unchecked(&mut arr, enable) }; + /// assert_eq!(arr, [0, -4, -3, -2]); + /// ``` + #[inline] + pub unsafe fn store_select_unchecked( + self, + slice: &mut [T], + enable: Mask<::Mask, N>, + ) { + let ptr = slice.as_mut_ptr(); + // SAFETY: The safety of writing elements in `slice` is ensured by the caller. + unsafe { self.store_select_ptr(ptr, enable) } + } + + /// Conditionally write contiguous elements starting from `ptr`. + /// The `enable` mask controls which elements are written. + /// When disabled, the memory location corresponding to that element is not accessed. + /// + /// # Safety + /// + /// Memory addresses for element are calculated [`core::ptr::wrapping_offset`] and + /// each enabled element must satisfy the same conditions as [`core::ptr::write`]. + #[inline] + pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<::Mask, N>) { + // SAFETY: The safety of writing elements through `ptr` is ensured by the caller. + unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) } + } + /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. /// If an index is out-of-bounds, the write is suppressed without panicking. /// If two elements in the scattered vector would write to the same index @@ -979,3 +1189,37 @@ unsafe impl SimdElement for *mut T { type Mask = isize; } + +#[inline] +fn lane_indices() -> Simd +where + LaneCount: SupportedLaneCount, +{ + let mut index = [0; N]; + for i in 0..N { + index[i] = i; + } + Simd::from_array(index) +} + +#[inline] +fn mask_up_to(len: usize) -> Mask +where + LaneCount: SupportedLaneCount, + M: MaskElement, +{ + let index = lane_indices::(); + let max_value: u64 = M::max_unsigned(); + macro_rules! case { + ($ty:ty) => { + if N < <$ty>::MAX as usize && max_value as $ty as u64 == max_value { + return index.cast().simd_lt(Simd::splat(len.min(N) as $ty)).cast(); + } + }; + } + case!(u8); + case!(u16); + case!(u32); + case!(u64); + index.simd_lt(Simd::splat(len)).cast() +} diff --git a/crates/core_simd/tests/masked_load_store.rs b/crates/core_simd/tests/masked_load_store.rs new file mode 100644 index 00000000000..3d38658e945 --- /dev/null +++ b/crates/core_simd/tests/masked_load_store.rs @@ -0,0 +1,35 @@ +#![feature(portable_simd)] +use core_simd::simd::prelude::*; + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn masked_load_store() { + let mut arr = [u8::MAX; 7]; + + u8x4::splat(0).store_select(&mut arr[5..], Mask::from_array([false, true, false, true])); + // write to index 8 is OOB and dropped + assert_eq!(arr, [255u8, 255, 255, 255, 255, 255, 0]); + + u8x4::from_array([0, 1, 2, 3]).store_select(&mut arr[1..], Mask::splat(true)); + assert_eq!(arr, [255u8, 0, 1, 2, 3, 255, 0]); + + // read from index 8 is OOB and dropped + assert_eq!( + u8x4::load_or(&arr[4..], u8x4::splat(42)), + u8x4::from_array([3, 255, 0, 42]) + ); + assert_eq!( + u8x4::load_select( + &arr[4..], + Mask::from_array([true, false, true, true]), + u8x4::splat(42) + ), + u8x4::from_array([3, 42, 0, 42]) + ); +} From 53de3f08061bfeb4a96ade3587aee366cd13e065 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 22 Mar 2024 18:41:08 -0400 Subject: [PATCH 12/13] Use v1 prelude to match core (rust-lang/portable-simd#406) --- crates/core_simd/src/lib.rs | 5 +++++ crates/core_simd/src/vector.rs | 1 + 2 files changed, 6 insertions(+) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 8aee556b772..a2e40e81105 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -13,6 +13,7 @@ simd_ffi, staged_api, strict_provenance, + prelude_import, ptr_metadata )] #![cfg_attr( @@ -43,6 +44,10 @@ #![unstable(feature = "portable_simd", issue = "86656")] //! Portable SIMD module. +#[prelude_import] +#[allow(unused_imports)] +use core::prelude::v1::*; + #[path = "mod.rs"] mod core_simd; pub use self::core_simd::simd; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index ee0926bcae8..2d8b48a9a8f 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -4,6 +4,7 @@ ptr::{SimdConstPtr, SimdMutPtr}, LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, }; +use core::convert::{TryFrom, TryInto}; /// A SIMD vector with the shape of `[T; N]` but the operations of `T`. /// From cff979eec1ac0473fc4960ee6cde462c6aeda824 Mon Sep 17 00:00:00 2001 From: Daniel Paoliello Date: Fri, 22 Mar 2024 16:08:24 -0700 Subject: [PATCH 13/13] Validate generating docs (rust-lang/portable-simd#407) - Generate docs with warnings disabled. - Instruct doctest to add `#![deny(warnings)]` to all tests. --- .github/workflows/ci.yml | 5 +++++ crates/core_simd/src/lib.rs | 1 + crates/core_simd/src/vector.rs | 6 +++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 90543044ea8..b292be2d6f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -141,6 +141,11 @@ jobs: - name: Test (release) run: cargo test --verbose --target=${{ matrix.target }} --release + - name: Generate docs + run: cargo doc --verbose --target=${{ matrix.target }} + env: + RUSTDOCFLAGS: -Dwarnings + wasm-tests: name: "wasm (firefox, ${{ matrix.name }})" runs-on: ubuntu-latest diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index a2e40e81105..7a161b7e01d 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -40,6 +40,7 @@ )] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] +#![doc(test(attr(deny(warnings))))] #![allow(internal_features)] #![unstable(feature = "portable_simd", issue = "86656")] //! Portable SIMD module. diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 2d8b48a9a8f..6c8205b112c 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -325,7 +325,7 @@ pub fn copy_to_slice(self, slice: &mut [T]) { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, Mask}; + /// # use simd::Simd; /// let vec: Vec = vec![10, 11]; /// /// let result = Simd::::load_or_default(&vec); @@ -348,7 +348,7 @@ pub fn load_or_default(slice: &[T]) -> Self /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, Mask}; + /// # use simd::Simd; /// let vec: Vec = vec![10, 11]; /// let or = Simd::from_array([-5, -4, -3, -2]); /// @@ -695,7 +695,7 @@ pub unsafe fn store_select_unchecked( /// /// # Safety /// - /// Memory addresses for element are calculated [`core::ptr::wrapping_offset`] and + /// Memory addresses for element are calculated [`pointer::wrapping_offset`] and /// each enabled element must satisfy the same conditions as [`core::ptr::write`]. #[inline] pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<::Mask, N>) {