From b0a005dcfbbf4d395e4506963d5ab81877a226d2 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 14 Feb 2021 23:35:24 -0500 Subject: [PATCH 01/17] Add floating-point classification functions --- crates/core_simd/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 906ee3f06ae..8d9fccd238a 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -17,6 +17,7 @@ mod fmt; mod intrinsics; mod ops; mod round; +mod comparisons; mod math; From d7649f46f3f562960f1a87b93e61a35dcd0cc857 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 6 Mar 2021 02:14:58 -0500 Subject: [PATCH 02/17] Various bug fixes --- crates/core_simd/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 8d9fccd238a..906ee3f06ae 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -17,7 +17,6 @@ mod fmt; mod intrinsics; mod ops; mod round; -mod comparisons; mod math; From 926cf3aba3fe453e36bc7e56b2b8b8894fca5377 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 7 Mar 2021 19:45:31 -0500 Subject: [PATCH 03/17] Add intrinsics --- crates/core_simd/src/intrinsics.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index fafeed6a62a..13cda880a6c 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -67,4 +67,15 @@ extern "platform-intrinsic" { // {s,u}sub.sat pub(crate) fn simd_saturating_sub(x: T, y: T) -> T; + + // reductions + pub(crate) fn simd_reduce_add_ordered(x: T, y: U) -> U; + pub(crate) fn simd_reduce_mul_ordered(x: T, y: U) -> U; + pub(crate) fn simd_reduce_all(x: T) -> bool; + pub(crate) fn simd_reduce_any(x: T) -> bool; + pub(crate) fn simd_reduce_max(x: T) -> U; + pub(crate) fn simd_reduce_min(x: T) -> U; + pub(crate) fn simd_reduce_and(x: T) -> U; + pub(crate) fn simd_reduce_or(x: T) -> U; + pub(crate) fn simd_reduce_xor(x: T) -> U; } From 875b31c33f6b0ccbb8590c2b3c9cbf1b11ed6165 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 7 Mar 2021 21:15:56 -0500 Subject: [PATCH 04/17] Implement reductions --- crates/core_simd/src/lib.rs | 2 + crates/core_simd/src/masks/bitmask.rs | 2 +- crates/core_simd/src/masks/full_masks.rs | 130 ++++++++++++--------- crates/core_simd/src/masks/mod.rs | 128 ++++++++++---------- crates/core_simd/src/reduction.rs | 142 +++++++++++++++++++++++ crates/core_simd/src/vector/float.rs | 1 + crates/core_simd/src/vector/int.rs | 1 + crates/core_simd/src/vector/uint.rs | 1 + 8 files changed, 289 insertions(+), 118 deletions(-) create mode 100644 crates/core_simd/src/reduction.rs diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 906ee3f06ae..0fc2641516d 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -11,6 +11,8 @@ mod first; mod permute; #[macro_use] mod transmute; +#[macro_use] +mod reduction; mod comparisons; mod fmt; diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index d7400699fde..1d25db46742 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -3,7 +3,7 @@ use crate::LanesAtMost32; /// A mask where each lane is represented by a single bit. #[derive(Copy, Clone, Debug)] #[repr(transparent)] -pub struct BitMask(u64) +pub struct BitMask(pub(crate) u64) where BitMask: LanesAtMost32; diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index cca077b14d0..a6689ce48c6 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -14,22 +14,27 @@ impl core::fmt::Display for TryFromMaskError { } macro_rules! define_mask { - { $(#[$attr:meta])* struct $name:ident($type:ty); } => { + { + $(#[$attr:meta])* + struct $name:ident( + crate::$type:ident<$lanes2:ident> + ); + } => { $(#[$attr])* #[derive(Default, PartialEq, PartialOrd, Eq, Ord, Hash)] #[repr(transparent)] - pub struct $name($type) + pub struct $name(crate::$type<$lanes2>) where - $type: crate::LanesAtMost32; + crate::$type: crate::LanesAtMost32; impl Copy for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, {} impl Clone for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { #[inline] fn clone(&self) -> Self { @@ -37,13 +42,13 @@ macro_rules! define_mask { } } - impl $name<$lanes> + impl $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { /// Construct a mask by setting all lanes to the given value. pub fn splat(value: bool) -> Self { - Self(<$type>::splat( + Self(>::splat( if value { -1 } else { @@ -76,42 +81,51 @@ macro_rules! define_mask { } } - /// Creates a mask from an integer vector. - /// - /// # Safety - /// All lanes must be either 0 or -1. + /// Converts the mask to the equivalent integer representation, where -1 represents + /// "set" and 0 represents "unset". #[inline] - pub unsafe fn from_int_unchecked(value: $type) -> Self { + pub fn to_int(self) -> crate::$type { + self.0 + } + + /// Creates a mask from the equivalent integer representation, where -1 represents + /// "set" and 0 represents "unset". + /// + /// Each provided lane must be either 0 or -1. + #[inline] + pub unsafe fn from_int_unchecked(value: crate::$type) -> Self { Self(value) } - /// Creates a mask from an integer vector. + /// Creates a mask from the equivalent integer representation, where -1 represents + /// "set" and 0 represents "unset". /// /// # Panics /// Panics if any lane is not 0 or -1. #[inline] - pub fn from_int(value: $type) -> Self { + pub fn from_int(value: crate::$type) -> Self { use core::convert::TryInto; value.try_into().unwrap() } } - impl core::convert::From for $name<$lanes> + impl core::convert::From for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { fn from(value: bool) -> Self { Self::splat(value) } } - impl core::convert::TryFrom<$type> for $name<$lanes> + impl core::convert::TryFrom> for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Error = TryFromMaskError; - fn try_from(value: $type) -> Result { - if value.as_slice().iter().all(|x| *x == 0 || *x == -1) { + fn try_from(value: crate::$type) -> Result { + let valid = (value.lanes_eq(crate::$type::::splat(0)) | value.lanes_eq(crate::$type::::splat(-1))).all(); + if valid { Ok(Self(value)) } else { Err(TryFromMaskError(())) @@ -119,21 +133,21 @@ macro_rules! define_mask { } } - impl core::convert::From<$name<$lanes>> for $type + impl core::convert::From<$name> for crate::$type where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { - fn from(value: $name<$lanes>) -> Self { + fn from(value: $name) -> Self { value.0 } } - impl core::convert::From> for $name<$lanes> + impl core::convert::From> for $name where - $type: crate::LanesAtMost32, - crate::BitMask<$lanes>: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, + crate::BitMask: crate::LanesAtMost32, { - fn from(value: crate::BitMask<$lanes>) -> Self { + fn from(value: crate::BitMask) -> Self { // TODO use an intrinsic to do this efficiently (with LLVM's sext instruction) let mut mask = Self::splat(false); for lane in 0..LANES { @@ -143,10 +157,10 @@ macro_rules! define_mask { } } - impl core::convert::From<$name<$lanes>> for crate::BitMask<$lanes> + impl core::convert::From<$name> for crate::BitMask where - $type: crate::LanesAtMost32, - crate::BitMask<$lanes>: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, + crate::BitMask: crate::LanesAtMost32, { fn from(value: $name<$lanes>) -> Self { // TODO use an intrinsic to do this efficiently (with LLVM's trunc instruction) @@ -158,9 +172,9 @@ macro_rules! define_mask { } } - impl core::fmt::Debug for $name<$lanes> + impl core::fmt::Debug for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { f.debug_list() @@ -169,36 +183,36 @@ macro_rules! define_mask { } } - impl core::fmt::Binary for $name<$lanes> + impl core::fmt::Binary for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { core::fmt::Binary::fmt(&self.0, f) } } - impl core::fmt::Octal for $name<$lanes> + impl core::fmt::Octal for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { core::fmt::Octal::fmt(&self.0, f) } } - impl core::fmt::LowerHex for $name<$lanes> + impl core::fmt::LowerHex for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { core::fmt::LowerHex::fmt(&self.0, f) } } - impl core::fmt::UpperHex for $name<$lanes> + impl core::fmt::UpperHex for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { core::fmt::UpperHex::fmt(&self.0, f) @@ -207,7 +221,7 @@ macro_rules! define_mask { impl core::ops::BitAnd for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = Self; #[inline] @@ -218,7 +232,7 @@ macro_rules! define_mask { impl core::ops::BitAnd for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = Self; #[inline] @@ -229,7 +243,7 @@ macro_rules! define_mask { impl core::ops::BitAnd<$name> for bool where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = $name; #[inline] @@ -240,7 +254,7 @@ macro_rules! define_mask { impl core::ops::BitOr for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = Self; #[inline] @@ -251,7 +265,7 @@ macro_rules! define_mask { impl core::ops::BitOr for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = Self; #[inline] @@ -262,7 +276,7 @@ macro_rules! define_mask { impl core::ops::BitOr<$name> for bool where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = $name; #[inline] @@ -273,7 +287,7 @@ macro_rules! define_mask { impl core::ops::BitXor for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = Self; #[inline] @@ -284,7 +298,7 @@ macro_rules! define_mask { impl core::ops::BitXor for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = Self; #[inline] @@ -295,7 +309,7 @@ macro_rules! define_mask { impl core::ops::BitXor<$name> for bool where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = $name; #[inline] @@ -306,7 +320,7 @@ macro_rules! define_mask { impl core::ops::Not for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { type Output = $name; #[inline] @@ -317,7 +331,7 @@ macro_rules! define_mask { impl core::ops::BitAndAssign for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { #[inline] fn bitand_assign(&mut self, rhs: Self) { @@ -327,7 +341,7 @@ macro_rules! define_mask { impl core::ops::BitAndAssign for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { #[inline] fn bitand_assign(&mut self, rhs: bool) { @@ -337,7 +351,7 @@ macro_rules! define_mask { impl core::ops::BitOrAssign for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { #[inline] fn bitor_assign(&mut self, rhs: Self) { @@ -347,7 +361,7 @@ macro_rules! define_mask { impl core::ops::BitOrAssign for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { #[inline] fn bitor_assign(&mut self, rhs: bool) { @@ -357,7 +371,7 @@ macro_rules! define_mask { impl core::ops::BitXorAssign for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { #[inline] fn bitxor_assign(&mut self, rhs: Self) { @@ -367,13 +381,15 @@ macro_rules! define_mask { impl core::ops::BitXorAssign for $name where - $type: crate::LanesAtMost32, + crate::$type: crate::LanesAtMost32, { #[inline] fn bitxor_assign(&mut self, rhs: bool) { *self ^= Self::splat(rhs); } } + + impl_full_mask_reductions! { $name, $type } } } diff --git a/crates/core_simd/src/masks/mod.rs b/crates/core_simd/src/masks/mod.rs index 0b986aaf7e1..4503187e4b8 100644 --- a/crates/core_simd/src/masks/mod.rs +++ b/crates/core_simd/src/masks/mod.rs @@ -7,25 +7,27 @@ pub use full_masks::*; mod bitmask; pub use bitmask::*; -use crate::LanesAtMost32; +use crate::{LanesAtMost32, SimdI128, SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize}; macro_rules! define_opaque_mask { { $(#[$attr:meta])* - struct $name:ident($inner_ty:ty); - @bits $bits_ty:ty + struct $name:ident($inner_ty:ident<$lanes2:ident>); + @bits $bits_ty:ident } => { $(#[$attr])* #[allow(non_camel_case_types)] - pub struct $name($inner_ty) where $bits_ty: LanesAtMost32; + pub struct $name($inner_ty) where $bits_ty: LanesAtMost32; - impl $name<$lanes> + impl_opaque_mask_reductions! { $name, $inner_ty, $bits_ty } + + impl $name where - $bits_ty: LanesAtMost32 + $bits_ty: LanesAtMost32 { /// Construct a mask by setting all lanes to the given value. pub fn splat(value: bool) -> Self { - Self(<$inner_ty>::splat(value)) + Self(<$inner_ty>::splat(value)) } /// Converts an array to a SIMD vector. @@ -69,66 +71,72 @@ macro_rules! define_opaque_mask { } } - impl From> for $name<$lanes> + impl From> for $name where - $bits_ty: LanesAtMost32, - BitMask<$lanes>: LanesAtMost32, + $bits_ty: LanesAtMost32, + BitMask: LanesAtMost32, { - fn from(value: BitMask<$lanes>) -> Self { + fn from(value: BitMask) -> Self { Self(value.into()) } } - impl From<$name<$lanes>> for crate::BitMask<$lanes> + impl From<$name> for crate::BitMask where - $bits_ty: LanesAtMost32, - BitMask<$lanes>: LanesAtMost32, + $bits_ty: LanesAtMost32, + BitMask: LanesAtMost32, { - fn from(value: $name<$lanes>) -> Self { + fn from(value: $name) -> Self { value.0.into() } } - impl From<$inner_ty> for $name<$lanes> + impl From<$inner_ty> for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { - fn from(value: $inner_ty) -> Self { + fn from(value: $inner_ty) -> Self { Self(value) } } - impl From<$name<$lanes>> for $inner_ty + impl From<$name> for $inner_ty where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { - fn from(value: $name<$lanes>) -> Self { + fn from(value: $name) -> Self { value.0 } } // vector/array conversion - impl From<[bool; $lanes]> for $name<$lanes> where $bits_ty: crate::LanesAtMost32 { - fn from(array: [bool; $lanes]) -> Self { + impl From<[bool; LANES]> for $name + where + $bits_ty: crate::LanesAtMost32 + { + fn from(array: [bool; LANES]) -> Self { Self::from_array(array) } } - impl From<$name<$lanes>> for [bool; $lanes] where $bits_ty: crate::LanesAtMost32 { - fn from(vector: $name<$lanes>) -> Self { + impl From<$name> for [bool; LANES] + where + $bits_ty: crate::LanesAtMost32 + { + fn from(vector: $name) -> Self { vector.to_array() } } - impl Copy for $name<$lanes> + impl Copy for $name where - $inner_ty: Copy, - $bits_ty: LanesAtMost32, + $inner_ty: Copy, + $bits_ty: LanesAtMost32, {} - impl Clone for $name<$lanes> + impl Clone for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn clone(&self) -> Self { @@ -136,9 +144,9 @@ macro_rules! define_opaque_mask { } } - impl Default for $name<$lanes> + impl Default for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn default() -> Self { @@ -146,9 +154,9 @@ macro_rules! define_opaque_mask { } } - impl PartialEq for $name<$lanes> + impl PartialEq for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn eq(&self, other: &Self) -> bool { @@ -156,9 +164,9 @@ macro_rules! define_opaque_mask { } } - impl PartialOrd for $name<$lanes> + impl PartialOrd for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -166,9 +174,9 @@ macro_rules! define_opaque_mask { } } - impl core::fmt::Debug for $name<$lanes> + impl core::fmt::Debug for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { core::fmt::Debug::fmt(&self.0, f) @@ -177,7 +185,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitAnd for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = Self; #[inline] @@ -188,7 +196,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitAnd for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = Self; #[inline] @@ -199,7 +207,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitAnd<$name> for bool where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = $name; #[inline] @@ -210,7 +218,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitOr for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = Self; #[inline] @@ -221,7 +229,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitOr for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = Self; #[inline] @@ -232,7 +240,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitOr<$name> for bool where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = $name; #[inline] @@ -243,7 +251,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitXor for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = Self; #[inline] @@ -254,7 +262,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitXor for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = Self; #[inline] @@ -265,7 +273,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitXor<$name> for bool where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = $name; #[inline] @@ -276,7 +284,7 @@ macro_rules! define_opaque_mask { impl core::ops::Not for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { type Output = $name; #[inline] @@ -287,7 +295,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitAndAssign for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn bitand_assign(&mut self, rhs: Self) { @@ -297,7 +305,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitAndAssign for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn bitand_assign(&mut self, rhs: bool) { @@ -307,7 +315,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitOrAssign for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn bitor_assign(&mut self, rhs: Self) { @@ -317,7 +325,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitOrAssign for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn bitor_assign(&mut self, rhs: bool) { @@ -327,7 +335,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitXorAssign for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn bitxor_assign(&mut self, rhs: Self) { @@ -337,7 +345,7 @@ macro_rules! define_opaque_mask { impl core::ops::BitXorAssign for $name where - $bits_ty: LanesAtMost32, + $bits_ty: LanesAtMost32, { #[inline] fn bitxor_assign(&mut self, rhs: bool) { @@ -352,7 +360,7 @@ define_opaque_mask! { /// /// The layout of this type is unspecified. struct Mask8(SimdMask8); - @bits crate::SimdI8 + @bits SimdI8 } define_opaque_mask! { @@ -360,7 +368,7 @@ define_opaque_mask! { /// /// The layout of this type is unspecified. struct Mask16(SimdMask16); - @bits crate::SimdI16 + @bits SimdI16 } define_opaque_mask! { @@ -368,7 +376,7 @@ define_opaque_mask! { /// /// The layout of this type is unspecified. struct Mask32(SimdMask32); - @bits crate::SimdI32 + @bits SimdI32 } define_opaque_mask! { @@ -376,7 +384,7 @@ define_opaque_mask! { /// /// The layout of this type is unspecified. struct Mask64(SimdMask64); - @bits crate::SimdI64 + @bits SimdI64 } define_opaque_mask! { @@ -384,7 +392,7 @@ define_opaque_mask! { /// /// The layout of this type is unspecified. struct Mask128(SimdMask128); - @bits crate::SimdI128 + @bits SimdI128 } define_opaque_mask! { @@ -392,7 +400,7 @@ define_opaque_mask! { /// /// The layout of this type is unspecified. struct MaskSize(SimdMaskSize); - @bits crate::SimdIsize + @bits SimdIsize } /// Vector of eight 8-bit masks diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs new file mode 100644 index 00000000000..0c6d91a2bef --- /dev/null +++ b/crates/core_simd/src/reduction.rs @@ -0,0 +1,142 @@ +macro_rules! impl_integer_reductions { + { $name:ident, $scalar:ty } => { + impl crate::$name + where + Self: crate::LanesAtMost32 + { + /// Produces the sum of the lanes of the vector, with wrapping addition. + #[inline] + pub fn wrapping_sum(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) } + } + + /// Produces the sum of the lanes of the vector, with wrapping multiplication. + #[inline] + pub fn wrapping_product(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) } + } + + /// Sequentially performs bitwise "and" between the lanes of the vector. + #[inline] + pub fn and_lanes(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_and(self) } + } + + /// Sequentially performs bitwise "or" between the lanes of the vector. + #[inline] + pub fn or_lanes(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_or(self) } + } + + /// Sequentially performs bitwise "xor" between the lanes of the vector. + #[inline] + pub fn xor_lanes(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_xor(self) } + } + + /// Returns the maximum lane in the vector. + #[inline] + pub fn max_lane(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_max(self) } + } + + /// Returns the minimum lane in the vector. + #[inline] + pub fn min_lane(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_min(self) } + } + } + } +} + +macro_rules! impl_float_reductions { + { $name:ident, $scalar:ty } => { + impl crate::$name + where + Self: crate::LanesAtMost32 + { + /// Produces the sum of the lanes of the vector. + #[inline] + pub fn sum(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } + } + + /// Produces the sum of the lanes of the vector. + #[inline] + pub fn product(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } + } + + /// Returns the maximum lane in the vector. + #[inline] + pub fn max_lane(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_max(self) } + } + + /// Returns the minimum lane in the vector. + #[inline] + pub fn min_lane(self) -> $scalar { + unsafe { crate::intrinsics::simd_reduce_min(self) } + } + } + } +} + +macro_rules! impl_full_mask_reductions { + { $name:ident, $inner:ident } => { + impl crate::$name + where + crate::$inner: crate::LanesAtMost32 + { + /// Returns true if any lane is set, or false otherwise. + #[inline] + pub fn any(self) -> bool { + unsafe { crate::intrinsics::simd_reduce_any(self.to_int()) } + } + + /// Returns true if all lanes are set, or false otherwise. + #[inline] + pub fn all(self) -> bool { + unsafe { crate::intrinsics::simd_reduce_all(self.to_int()) } + } + } + } +} + +macro_rules! impl_opaque_mask_reductions { + { $name:ident, $inner:ident, $bits_ty:ident } => { + impl $name + where + $bits_ty: crate::LanesAtMost32 + { + /// Returns true if any lane is set, or false otherwise. + #[inline] + pub fn any(self) -> bool { + self.0.any() + } + + /// Returns true if all lanes are set, or false otherwise. + #[inline] + pub fn all(self) -> bool { + self.0.all() + } + } + } +} + +impl crate::BitMask +where + crate::BitMask: crate::LanesAtMost32, +{ + /// Returns true if any lane is set, or false otherwise. + #[inline] + pub fn any(self) -> bool { + self.0 != 0 + } + + /// Returns true if all lanes are set, or false otherwise. + #[inline] + pub fn all(self) -> bool { + self.0 == (!0) >> (64 - LANES) + } +} diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index 393e39023d9..91585b46946 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -6,6 +6,7 @@ macro_rules! impl_float_vector { { $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => { impl_vector! { $name, $type } + impl_float_reductions! { $name, $type } impl $name where diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs index 5304d11cd6e..24f77cb3e10 100644 --- a/crates/core_simd/src/vector/int.rs +++ b/crates/core_simd/src/vector/int.rs @@ -4,6 +4,7 @@ macro_rules! impl_integer_vector { { $name:ident, $type:ty, $mask_ty:ident, $mask_impl_ty:ident } => { impl_vector! { $name, $type } + impl_integer_reductions! { $name, $type } impl Eq for $name where Self: crate::LanesAtMost32 {} diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs index 71b5b295112..3866b9ca5c6 100644 --- a/crates/core_simd/src/vector/uint.rs +++ b/crates/core_simd/src/vector/uint.rs @@ -5,6 +5,7 @@ macro_rules! impl_unsigned_vector { { $name:ident, $type:ty } => { impl_vector! { $name, $type } + impl_integer_reductions! { $name, $type } impl Eq for $name where Self: crate::LanesAtMost32 {} From a7b82adb12a9bfbaaf4e446b4b17dcb35a546223 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 8 Mar 2021 00:48:18 -0500 Subject: [PATCH 05/17] Add tests --- crates/core_simd/tests/masks.rs | 18 ++++ crates/core_simd/tests/ops_macros.rs | 121 +++++++++++++++++++++++++++ crates/test_helpers/src/biteq.rs | 2 +- 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 03a835b9c66..59da77de622 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -59,6 +59,24 @@ macro_rules! test_mask_api { let mask = core_simd::$name::<8>::splat(false); let _ = mask.test(8); } + + #[test] + fn any() { + assert!(!core_simd::$name::<8>::splat(false).any()); + assert!(core_simd::$name::<8>::splat(true).any()); + let mut v = core_simd::$name::<8>::splat(false); + v.set(2, true); + assert!(v.any()); + } + + #[test] + fn all() { + assert!(!core_simd::$name::<8>::splat(false).all()); + assert!(core_simd::$name::<8>::splat(true).all()); + let mut v = core_simd::$name::<8>::splat(false); + v.set(2, true); + assert!(!v.all()); + } } } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index a70a8a9c48b..d9f705cf390 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -136,6 +136,83 @@ macro_rules! impl_binary_checked_op_test { }; } +#[macro_export] +macro_rules! impl_common_integer_tests { + { $vector:ident, $scalar:ident } => { + test_helpers::test_lanes! { + fn wrapping_sum() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::::from_array(x).wrapping_sum(), + x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), + ); + Ok(()) + }); + } + + fn wrapping_product() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::::from_array(x).wrapping_product(), + x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), + ); + Ok(()) + }); + } + + fn and_lanes() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::::from_array(x).and_lanes(), + x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand), + ); + Ok(()) + }); + } + + fn or_lanes() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::::from_array(x).or_lanes(), + x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor), + ); + Ok(()) + }); + } + + fn xor_lanes() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::::from_array(x).xor_lanes(), + x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor), + ); + Ok(()) + }); + } + + fn max_lane() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::::from_array(x).max_lane(), + x.iter().copied().max().unwrap(), + ); + Ok(()) + }); + } + + fn min_lane() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + $vector::::from_array(x).min_lane(), + x.iter().copied().min().unwrap(), + ); + Ok(()) + }); + } + } + } +} + /// Implement tests for signed integers. #[macro_export] macro_rules! impl_signed_tests { @@ -144,6 +221,8 @@ macro_rules! impl_signed_tests { type Vector = core_simd::$vector; type Scalar = $scalar; + impl_common_integer_tests! { Vector, Scalar } + test_helpers::test_lanes! { fn neg() { test_helpers::test_unary_elementwise( @@ -241,6 +320,8 @@ macro_rules! impl_unsigned_tests { type Vector = core_simd::$vector; type Scalar = $scalar; + impl_common_integer_tests! { Vector, Scalar } + test_helpers::test_lanes_panic! { fn rem_zero_panic() { let a = Vector::::splat(42); @@ -397,6 +478,46 @@ macro_rules! impl_float_tests { }, ).unwrap(); } + + fn sum() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + Vector::::from_array(x).sum(), + x.iter().copied().fold(0 as Scalar, ::add), + ); + Ok(()) + }); + } + + fn product() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + Vector::::from_array(x).product(), + x.iter().copied().fold(1. as Scalar, ::mul), + ); + Ok(()) + }); + } + + fn max_lane() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + Vector::::from_array(x).max_lane(), + x.iter().copied().fold(Scalar::NAN, Scalar::max), + ); + Ok(()) + }); + } + + fn min_lane() { + test_helpers::test_1(&|x| { + test_helpers::prop_assert_biteq! ( + Vector::::from_array(x).min_lane(), + x.iter().copied().fold(Scalar::NAN, Scalar::min), + ); + Ok(()) + }); + } } } } diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs index 4a41fe3a16e..00350e22418 100644 --- a/crates/test_helpers/src/biteq.rs +++ b/crates/test_helpers/src/biteq.rs @@ -95,7 +95,7 @@ impl core::fmt::Debug for BitEqWrapper<'_, T> { #[macro_export] macro_rules! prop_assert_biteq { - { $a:expr, $b:expr } => { + { $a:expr, $b:expr $(,)? } => { { use $crate::biteq::BitEqWrapper; let a = $a; From 193cd14b4a7a04d0713593d21f3630de012fd811 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 10 Mar 2021 23:47:43 -0500 Subject: [PATCH 06/17] Enable special handling of zero --- crates/core_simd/tests/ops_macros.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index d9f705cf390..2b65d514623 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -501,20 +501,24 @@ macro_rules! impl_float_tests { fn max_lane() { test_helpers::test_1(&|x| { - test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).max_lane(), - x.iter().copied().fold(Scalar::NAN, Scalar::max), - ); + let vmax = Vector::::from_array(x).max_lane(); + let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max); + // 0 and -0 are treated the same + if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { + test_helpers::prop_assert_biteq!(vmax, smax); + } Ok(()) }); } fn min_lane() { test_helpers::test_1(&|x| { - test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).min_lane(), - x.iter().copied().fold(Scalar::NAN, Scalar::min), - ); + let vmax = Vector::::from_array(x).min_lane(); + let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min); + // 0 and -0 are treated the same + if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { + test_helpers::prop_assert_biteq!(vmax, smax); + } Ok(()) }); } From 02608d44f7542981202792234540915484e0560d Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 11 Mar 2021 00:05:20 -0500 Subject: [PATCH 07/17] Fix mask ops --- crates/core_simd/src/masks/bitmask.rs | 6 +++--- crates/core_simd/src/reduction.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 1d25db46742..b4d1b6d9557 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -1,9 +1,9 @@ use crate::LanesAtMost32; /// A mask where each lane is represented by a single bit. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Hash)] #[repr(transparent)] -pub struct BitMask(pub(crate) u64) +pub struct BitMask(u64) where BitMask: LanesAtMost32; @@ -14,7 +14,7 @@ where /// Construct a mask by setting all lanes to the given value. pub fn splat(value: bool) -> Self { if value { - Self(u64::MAX) + Self(u64::MAX >> (64 - LANES)) } else { Self(u64::MIN) } diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index 0c6d91a2bef..d314cc737ed 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -131,12 +131,12 @@ where /// Returns true if any lane is set, or false otherwise. #[inline] pub fn any(self) -> bool { - self.0 != 0 + self != Self::splat(false) } /// Returns true if all lanes are set, or false otherwise. #[inline] pub fn all(self) -> bool { - self.0 == (!0) >> (64 - LANES) + self == Self::splat(true) } } From 64f564866bf09f98ae7a044fa8ca98a53bbbff1f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 11 Mar 2021 00:27:47 -0500 Subject: [PATCH 08/17] Update documentation and fix i586 inaccuracy --- crates/core_simd/src/reduction.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index d314cc737ed..684879021b4 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -58,22 +58,38 @@ macro_rules! impl_float_reductions { /// Produces the sum of the lanes of the vector. #[inline] pub fn sum(self) -> $scalar { - unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } + // f32 SIMD sum is inaccurate on i586 + if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 { + self.as_slice().iter().sum() + } else { + unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } + } } /// Produces the sum of the lanes of the vector. #[inline] pub fn product(self) -> $scalar { - unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } + // f32 SIMD product is inaccurate on i586 + if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 { + self.as_slice().iter().product() + } else { + unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } + } } /// Returns the maximum lane in the vector. + /// + /// Returns values based on equality, so a vector containing both `0.` and `-0.` may + /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] pub fn max_lane(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_max(self) } } /// Returns the minimum lane in the vector. + /// + /// Returns values based on equality, so a vector containing both `0.` and `-0.` may + /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] pub fn min_lane(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_min(self) } From 4b8cbd5385e8d6e851edb2d1e37ddbf843dda02a Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 11 Mar 2021 01:02:47 -0500 Subject: [PATCH 09/17] Fix i586 detection --- crates/core_simd/src/reduction.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index 684879021b4..e59bf93baa3 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -55,11 +55,12 @@ macro_rules! impl_float_reductions { where Self: crate::LanesAtMost32 { + /// Produces the sum of the lanes of the vector. #[inline] pub fn sum(self) -> $scalar { // f32 SIMD sum is inaccurate on i586 - if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 { + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { self.as_slice().iter().sum() } else { unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } @@ -70,7 +71,7 @@ macro_rules! impl_float_reductions { #[inline] pub fn product(self) -> $scalar { // f32 SIMD product is inaccurate on i586 - if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 { + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { self.as_slice().iter().product() } else { unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } From b51febbd348924a4cee970ef302dcaf5ff0fac18 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 12 Mar 2021 00:29:18 -0500 Subject: [PATCH 10/17] Revert i586 fix, fix test instead --- crates/core_simd/src/reduction.rs | 14 ++------------ crates/core_simd/tests/ops_macros.rs | 4 ++-- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index e59bf93baa3..177669ff444 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -59,23 +59,13 @@ macro_rules! impl_float_reductions { /// Produces the sum of the lanes of the vector. #[inline] pub fn sum(self) -> $scalar { - // f32 SIMD sum is inaccurate on i586 - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { - self.as_slice().iter().sum() - } else { - unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } - } + unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } } /// Produces the sum of the lanes of the vector. #[inline] pub fn product(self) -> $scalar { - // f32 SIMD product is inaccurate on i586 - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { - self.as_slice().iter().product() - } else { - unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } - } + unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } } /// Returns the maximum lane in the vector. diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 2b65d514623..59e923ac5c1 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -483,7 +483,7 @@ macro_rules! impl_float_tests { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( Vector::::from_array(x).sum(), - x.iter().copied().fold(0 as Scalar, ::add), + x.iter().sum(), ); Ok(()) }); @@ -493,7 +493,7 @@ macro_rules! impl_float_tests { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( Vector::::from_array(x).product(), - x.iter().copied().fold(1. as Scalar, ::mul), + x.iter().product(), ); Ok(()) }); From 3fae09bd08b4ffacd3f81cc6ec13772e99d29796 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 12 Mar 2021 20:09:31 -0500 Subject: [PATCH 11/17] Revert "Revert i586 fix, fix test instead" This reverts commit 1ea2f128821339d8050ca936f24b71677352437e. --- crates/core_simd/src/reduction.rs | 14 ++++++++++++-- crates/core_simd/tests/ops_macros.rs | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index 177669ff444..e59bf93baa3 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -59,13 +59,23 @@ macro_rules! impl_float_reductions { /// Produces the sum of the lanes of the vector. #[inline] pub fn sum(self) -> $scalar { - unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } + // f32 SIMD sum is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { + self.as_slice().iter().sum() + } else { + unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } + } } /// Produces the sum of the lanes of the vector. #[inline] pub fn product(self) -> $scalar { - unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } + // f32 SIMD product is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { + self.as_slice().iter().product() + } else { + unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } + } } /// Returns the maximum lane in the vector. diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 59e923ac5c1..2b65d514623 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -483,7 +483,7 @@ macro_rules! impl_float_tests { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( Vector::::from_array(x).sum(), - x.iter().sum(), + x.iter().copied().fold(0 as Scalar, ::add), ); Ok(()) }); @@ -493,7 +493,7 @@ macro_rules! impl_float_tests { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( Vector::::from_array(x).product(), - x.iter().product(), + x.iter().copied().fold(1. as Scalar, ::mul), ); Ok(()) }); From 3cf970fc0997591cb1a0388874506e58a8c44baf Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 12 Mar 2021 20:10:51 -0500 Subject: [PATCH 12/17] Fix test sum/product implementation --- crates/core_simd/tests/ops_macros.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 2b65d514623..59e923ac5c1 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -483,7 +483,7 @@ macro_rules! impl_float_tests { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( Vector::::from_array(x).sum(), - x.iter().copied().fold(0 as Scalar, ::add), + x.iter().sum(), ); Ok(()) }); @@ -493,7 +493,7 @@ macro_rules! impl_float_tests { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( Vector::::from_array(x).product(), - x.iter().copied().fold(1. as Scalar, ::mul), + x.iter().product(), ); Ok(()) }); From e2fa502617175e90f47e0e50873774e512a3ce62 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 12 Mar 2021 20:31:30 -0500 Subject: [PATCH 13/17] Enable i586 workaround for both f32 and f64 --- crates/core_simd/src/reduction.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index e59bf93baa3..a2b652189c8 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -59,8 +59,8 @@ macro_rules! impl_float_reductions { /// Produces the sum of the lanes of the vector. #[inline] pub fn sum(self) -> $scalar { - // f32 SIMD sum is inaccurate on i586 - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { + // LLVM sum is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_slice().iter().sum() } else { unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) } @@ -70,8 +70,8 @@ macro_rules! impl_float_reductions { /// Produces the sum of the lanes of the vector. #[inline] pub fn product(self) -> $scalar { - // f32 SIMD product is inaccurate on i586 - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 { + // LLVM product is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_slice().iter().product() } else { unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) } From e12758670900aa079b4151bb262d7e4dc0f375c6 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 11 Apr 2021 10:59:05 -0400 Subject: [PATCH 14/17] Improve function names and docs --- crates/core_simd/src/reduction.rs | 39 +++++++++++++++------------- crates/core_simd/tests/ops_macros.rs | 28 ++++++++++---------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index a2b652189c8..e728f8ad82a 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -4,45 +4,48 @@ macro_rules! impl_integer_reductions { where Self: crate::LanesAtMost32 { - /// Produces the sum of the lanes of the vector, with wrapping addition. + /// Horizontal wrapping add. Computes the sum of the lanes of the vector, with wrapping addition. #[inline] pub fn wrapping_sum(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) } } - /// Produces the sum of the lanes of the vector, with wrapping multiplication. + /// Horizontal wrapping multiply. Computes the product of the lanes of the vector, with wrapping multiplication. #[inline] pub fn wrapping_product(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) } } - /// Sequentially performs bitwise "and" between the lanes of the vector. + /// Horizontal bitwise "and". Computes the cumulative bitwise "and" across the lanes of + /// the vector. #[inline] - pub fn and_lanes(self) -> $scalar { + pub fn horizontal_and(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_and(self) } } - /// Sequentially performs bitwise "or" between the lanes of the vector. + /// Horizontal bitwise "or". Computes the cumulative bitwise "or" across the lanes of + /// the vector. #[inline] - pub fn or_lanes(self) -> $scalar { + pub fn horizontal_or(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_or(self) } } - /// Sequentially performs bitwise "xor" between the lanes of the vector. + /// Horizontal bitwise "xor". Computes the cumulative bitwise "xor" across the lanes of + /// the vector. #[inline] - pub fn xor_lanes(self) -> $scalar { + pub fn horizontal_xor(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_xor(self) } } - /// Returns the maximum lane in the vector. + /// Horizontal maximum. Computes the maximum lane in the vector. #[inline] - pub fn max_lane(self) -> $scalar { + pub fn horizontal_max(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_max(self) } } - /// Returns the minimum lane in the vector. + /// Horizontal minimum. Computes the minimum lane in the vector. #[inline] - pub fn min_lane(self) -> $scalar { + pub fn horizontal_min(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_min(self) } } } @@ -56,7 +59,7 @@ macro_rules! impl_float_reductions { Self: crate::LanesAtMost32 { - /// Produces the sum of the lanes of the vector. + /// Horizontal add. Computes the sum of the lanes of the vector. #[inline] pub fn sum(self) -> $scalar { // LLVM sum is inaccurate on i586 @@ -67,7 +70,7 @@ macro_rules! impl_float_reductions { } } - /// Produces the sum of the lanes of the vector. + /// Horizontal multiply. Computes the sum of the lanes of the vector. #[inline] pub fn product(self) -> $scalar { // LLVM product is inaccurate on i586 @@ -78,21 +81,21 @@ macro_rules! impl_float_reductions { } } - /// Returns the maximum lane in the vector. + /// Horizontal maximum. Computes the maximum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] - pub fn max_lane(self) -> $scalar { + pub fn horizontal_max(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_max(self) } } - /// Returns the minimum lane in the vector. + /// Horizontal minimum. Computes the minimum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] - pub fn min_lane(self) -> $scalar { + pub fn horizontal_min(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_min(self) } } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 59e923ac5c1..7ce85b77254 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -160,50 +160,50 @@ macro_rules! impl_common_integer_tests { }); } - fn and_lanes() { + fn horizontal_and() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).and_lanes(), + $vector::::from_array(x).horizontal_and(), x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand), ); Ok(()) }); } - fn or_lanes() { + fn horizontal_or() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).or_lanes(), + $vector::::from_array(x).horizontal_or(), x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor), ); Ok(()) }); } - fn xor_lanes() { + fn horizontal_xor() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).xor_lanes(), + $vector::::from_array(x).horizontal_xor(), x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor), ); Ok(()) }); } - fn max_lane() { + fn horizontal_max() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).max_lane(), + $vector::::from_array(x).horizontal_max(), x.iter().copied().max().unwrap(), ); Ok(()) }); } - fn min_lane() { + fn horizontal_min() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).min_lane(), + $vector::::from_array(x).horizontal_min(), x.iter().copied().min().unwrap(), ); Ok(()) @@ -499,9 +499,9 @@ macro_rules! impl_float_tests { }); } - fn max_lane() { + fn horizontal_max() { test_helpers::test_1(&|x| { - let vmax = Vector::::from_array(x).max_lane(); + let vmax = Vector::::from_array(x).horizontal_max(); let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max); // 0 and -0 are treated the same if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { @@ -511,9 +511,9 @@ macro_rules! impl_float_tests { }); } - fn min_lane() { + fn horizontal_min() { test_helpers::test_1(&|x| { - let vmax = Vector::::from_array(x).min_lane(); + let vmax = Vector::::from_array(x).horizontal_min(); let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min); // 0 and -0 are treated the same if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { From 01d78aa21aee98ccf5b71a2ee9a136aa9e5f290c Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 17 Apr 2021 01:32:45 +0000 Subject: [PATCH 15/17] Update docs --- crates/core_simd/src/reduction.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index e728f8ad82a..e1fc82e328a 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -4,46 +4,46 @@ macro_rules! impl_integer_reductions { where Self: crate::LanesAtMost32 { - /// Horizontal wrapping add. Computes the sum of the lanes of the vector, with wrapping addition. + /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. #[inline] pub fn wrapping_sum(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) } } - /// Horizontal wrapping multiply. Computes the product of the lanes of the vector, with wrapping multiplication. + /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. #[inline] pub fn wrapping_product(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) } } - /// Horizontal bitwise "and". Computes the cumulative bitwise "and" across the lanes of + /// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of /// the vector. #[inline] pub fn horizontal_and(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_and(self) } } - /// Horizontal bitwise "or". Computes the cumulative bitwise "or" across the lanes of + /// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of /// the vector. #[inline] pub fn horizontal_or(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_or(self) } } - /// Horizontal bitwise "xor". Computes the cumulative bitwise "xor" across the lanes of + /// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of /// the vector. #[inline] pub fn horizontal_xor(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_xor(self) } } - /// Horizontal maximum. Computes the maximum lane in the vector. + /// Horizontal maximum. Returns the maximum lane in the vector. #[inline] pub fn horizontal_max(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_max(self) } } - /// Horizontal minimum. Computes the minimum lane in the vector. + /// Horizontal minimum. Returns the minimum lane in the vector. #[inline] pub fn horizontal_min(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_min(self) } @@ -59,7 +59,7 @@ macro_rules! impl_float_reductions { Self: crate::LanesAtMost32 { - /// Horizontal add. Computes the sum of the lanes of the vector. + /// Horizontal add. Returns the sum of the lanes of the vector. #[inline] pub fn sum(self) -> $scalar { // LLVM sum is inaccurate on i586 @@ -70,7 +70,7 @@ macro_rules! impl_float_reductions { } } - /// Horizontal multiply. Computes the sum of the lanes of the vector. + /// Horizontal multiply. Returns the product of the lanes of the vector. #[inline] pub fn product(self) -> $scalar { // LLVM product is inaccurate on i586 @@ -81,7 +81,7 @@ macro_rules! impl_float_reductions { } } - /// Horizontal maximum. Computes the maximum lane in the vector. + /// Horizontal maximum. Returns the maximum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. @@ -90,7 +90,7 @@ macro_rules! impl_float_reductions { unsafe { crate::intrinsics::simd_reduce_max(self) } } - /// Horizontal minimum. Computes the minimum lane in the vector. + /// Horizontal minimum. Returns the minimum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. From 828b274ae75efb984ec6a848ea85868f30c587f9 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 19 Apr 2021 23:41:11 +0000 Subject: [PATCH 16/17] Rename sum, product to horizontal_{sum,product} --- crates/core_simd/src/reduction.rs | 8 ++++---- crates/core_simd/tests/ops_macros.rs | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index e1fc82e328a..86a34e4455d 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -6,13 +6,13 @@ macro_rules! impl_integer_reductions { { /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. #[inline] - pub fn wrapping_sum(self) -> $scalar { + pub fn horizontal_wrapping_sum(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) } } /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. #[inline] - pub fn wrapping_product(self) -> $scalar { + pub fn horizontal_wrapping_product(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) } } @@ -61,7 +61,7 @@ macro_rules! impl_float_reductions { /// Horizontal add. Returns the sum of the lanes of the vector. #[inline] - pub fn sum(self) -> $scalar { + pub fn horizontal_sum(self) -> $scalar { // LLVM sum is inaccurate on i586 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_slice().iter().sum() @@ -72,7 +72,7 @@ macro_rules! impl_float_reductions { /// Horizontal multiply. Returns the product of the lanes of the vector. #[inline] - pub fn product(self) -> $scalar { + pub fn horizontal_product(self) -> $scalar { // LLVM product is inaccurate on i586 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_slice().iter().product() diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 7ce85b77254..a1213e39e34 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -140,20 +140,20 @@ macro_rules! impl_binary_checked_op_test { macro_rules! impl_common_integer_tests { { $vector:ident, $scalar:ident } => { test_helpers::test_lanes! { - fn wrapping_sum() { + fn horizontal_wrapping_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).wrapping_sum(), + $vector::::from_array(x).horizontal_wrapping_sum(), x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), ); Ok(()) }); } - fn wrapping_product() { + fn horizontal_wrapping_product() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).wrapping_product(), + $vector::::from_array(x).horizontal_wrapping_product(), x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), ); Ok(()) @@ -479,20 +479,20 @@ macro_rules! impl_float_tests { ).unwrap(); } - fn sum() { + fn horizontal_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).sum(), + Vector::::from_array(x).horizontal_sum(), x.iter().sum(), ); Ok(()) }); } - fn product() { + fn horizontal_product() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).product(), + Vector::::from_array(x).horizontal_product(), x.iter().product(), ); Ok(()) From 04ee1073237dc77b3742e7a1c0d3740c1df499c4 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 22 Apr 2021 22:41:12 +0000 Subject: [PATCH 17/17] Remove wrapping from sum/product fns --- crates/core_simd/src/reduction.rs | 4 ++-- crates/core_simd/tests/ops_macros.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index 86a34e4455d..382d366dd3d 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -6,13 +6,13 @@ macro_rules! impl_integer_reductions { { /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. #[inline] - pub fn horizontal_wrapping_sum(self) -> $scalar { + pub fn horizontal_sum(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) } } /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. #[inline] - pub fn horizontal_wrapping_product(self) -> $scalar { + pub fn horizontal_product(self) -> $scalar { unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index a1213e39e34..37f3b49a330 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -140,20 +140,20 @@ macro_rules! impl_binary_checked_op_test { macro_rules! impl_common_integer_tests { { $vector:ident, $scalar:ident } => { test_helpers::test_lanes! { - fn horizontal_wrapping_sum() { + fn horizontal_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_wrapping_sum(), + $vector::::from_array(x).horizontal_sum(), x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), ); Ok(()) }); } - fn horizontal_wrapping_product() { + fn horizontal_product() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_wrapping_product(), + $vector::::from_array(x).horizontal_product(), x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), ); Ok(())