From b0a005dcfbbf4d395e4506963d5ab81877a226d2 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 14 Feb 2021 23:35:24 -0500
Subject: [PATCH 01/17] Add floating-point classification functions

---
 crates/core_simd/src/lib.rs | 1 +
 1 file changed, 1 insertion(+)
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 906ee3f06ae..8d9fccd238a 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -17,6 +17,7 @@ mod fmt;
 mod intrinsics;
 mod ops;
 mod round;
+mod comparisons;
 
 mod math;
 

From d7649f46f3f562960f1a87b93e61a35dcd0cc857 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 6 Mar 2021 02:14:58 -0500
Subject: [PATCH 02/17] Various bug fixes

---
 crates/core_simd/src/lib.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 8d9fccd238a..906ee3f06ae 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -17,7 +17,6 @@ mod fmt;
 mod intrinsics;
 mod ops;
 mod round;
-mod comparisons;
 
 mod math;
 

From 926cf3aba3fe453e36bc7e56b2b8b8894fca5377 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 7 Mar 2021 19:45:31 -0500
Subject: [PATCH 03/17] Add intrinsics

---
 crates/core_simd/src/intrinsics.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index fafeed6a62a..13cda880a6c 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -67,4 +67,15 @@ extern "platform-intrinsic" {
 
     // {s,u}sub.sat
     pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+
+    // reductions
+    pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
+    pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
+    pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
+    pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
 }

From 875b31c33f6b0ccbb8590c2b3c9cbf1b11ed6165 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 7 Mar 2021 21:15:56 -0500
Subject: [PATCH 04/17] Implement reductions

---
 crates/core_simd/src/lib.rs              |   2 +
 crates/core_simd/src/masks/bitmask.rs    |   2 +-
 crates/core_simd/src/masks/full_masks.rs | 130 ++++++++++++---------
 crates/core_simd/src/masks/mod.rs        | 128 ++++++++++----------
 crates/core_simd/src/reduction.rs        | 142 +++++++++++++++++++++++
 crates/core_simd/src/vector/float.rs     |   1 +
 crates/core_simd/src/vector/int.rs       |   1 +
 crates/core_simd/src/vector/uint.rs      |   1 +
 8 files changed, 289 insertions(+), 118 deletions(-)
 create mode 100644 crates/core_simd/src/reduction.rs

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 906ee3f06ae..0fc2641516d 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -11,6 +11,8 @@ mod first;
 mod permute;
 #[macro_use]
 mod transmute;
+#[macro_use]
+mod reduction;
 
 mod comparisons;
 mod fmt;
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index d7400699fde..1d25db46742 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -3,7 +3,7 @@ use crate::LanesAtMost32;
 /// A mask where each lane is represented by a single bit.
 #[derive(Copy, Clone, Debug)]
 #[repr(transparent)]
-pub struct BitMask<const LANES: usize>(u64)
+pub struct BitMask<const LANES: usize>(pub(crate) u64)
 where
     BitMask<LANES>: LanesAtMost32;
 
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index cca077b14d0..a6689ce48c6 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -14,22 +14,27 @@ impl core::fmt::Display for TryFromMaskError {
 }
 
 macro_rules! define_mask {
-    { $(#[$attr:meta])* struct $name:ident<const $lanes:ident: usize>($type:ty); } => {
+    {
+        $(#[$attr:meta])*
+        struct $name:ident<const $lanes:ident: usize>(
+            crate::$type:ident<$lanes2:ident>
+        );
+    } => {
         $(#[$attr])*
         #[derive(Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
         #[repr(transparent)]
-        pub struct $name<const $lanes: usize>($type)
+        pub struct $name<const $lanes: usize>(crate::$type<$lanes2>)
         where
-            $type: crate::LanesAtMost32;
+            crate::$type<LANES>: crate::LanesAtMost32;
 
         impl<const LANES: usize> Copy for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {}
 
         impl<const LANES: usize> Clone for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn clone(&self) -> Self {
@@ -37,13 +42,13 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> $name<$lanes>
+        impl<const LANES: usize> $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             /// Construct a mask by setting all lanes to the given value.
             pub fn splat(value: bool) -> Self {
-                Self(<$type>::splat(
+                Self(<crate::$type<LANES>>::splat(
                     if value {
                         -1
                     } else {
@@ -76,42 +81,51 @@ macro_rules! define_mask {
                 }
             }
 
-            /// Creates a mask from an integer vector.
-            ///
-            /// # Safety
-            /// All lanes must be either 0 or -1.
+            /// Converts the mask to the equivalent integer representation, where -1 represents
+            /// "set" and 0 represents "unset".
             #[inline]
-            pub unsafe fn from_int_unchecked(value: $type) -> Self {
+            pub fn to_int(self) -> crate::$type<LANES> {
+                self.0
+            }
+
+            /// Creates a  mask from the equivalent integer representation, where -1 represents
+            /// "set" and 0 represents "unset".
+            ///
+            /// Each provided lane must be either 0 or -1.
+            #[inline]
+            pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
                 Self(value)
             }
 
-            /// Creates a mask from an integer vector.
+            /// Creates a mask from the equivalent integer representation, where -1 represents
+            /// "set" and 0 represents "unset".
             ///
             /// # Panics
             /// Panics if any lane is not 0 or -1.
             #[inline]
-            pub fn from_int(value: $type) -> Self {
+            pub fn from_int(value: crate::$type<LANES>) -> Self {
                 use core::convert::TryInto;
                 value.try_into().unwrap()
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<bool> for $name<$lanes>
+        impl<const LANES: usize> core::convert::From<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn from(value: bool) -> Self {
                 Self::splat(value)
             }
         }
 
-        impl<const $lanes: usize> core::convert::TryFrom<$type> for $name<$lanes>
+        impl<const LANES: usize> core::convert::TryFrom<crate::$type<LANES>> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Error = TryFromMaskError;
-            fn try_from(value: $type) -> Result<Self, Self::Error> {
-                if value.as_slice().iter().all(|x| *x == 0 || *x == -1) {
+            fn try_from(value: crate::$type<LANES>) -> Result<Self, Self::Error> {
+                let valid = (value.lanes_eq(crate::$type::<LANES>::splat(0)) | value.lanes_eq(crate::$type::<LANES>::splat(-1))).all();
+                if valid {
                     Ok(Self(value))
                 } else {
                     Err(TryFromMaskError(()))
@@ -119,21 +133,21 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<$name<$lanes>> for $type
+        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::$type<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
-            fn from(value: $name<$lanes>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                 value.0
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<crate::BitMask<$lanes>> for $name<$lanes>
+        impl<const LANES: usize> core::convert::From<crate::BitMask<LANES>> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
-            crate::BitMask<$lanes>: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::BitMask<LANES>: crate::LanesAtMost32,
         {
-            fn from(value: crate::BitMask<$lanes>) -> Self {
+            fn from(value: crate::BitMask<LANES>) -> Self {
                 // TODO use an intrinsic to do this efficiently (with LLVM's sext instruction)
                 let mut mask = Self::splat(false);
                 for lane in 0..LANES {
@@ -143,10 +157,10 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<$name<$lanes>> for crate::BitMask<$lanes>
+        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::BitMask<LANES>
         where
-            $type: crate::LanesAtMost32,
-            crate::BitMask<$lanes>: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::BitMask<LANES>: crate::LanesAtMost32,
         {
             fn from(value: $name<$lanes>) -> Self {
                 // TODO use an intrinsic to do this efficiently (with LLVM's trunc instruction)
@@ -158,9 +172,9 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Debug for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Debug for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 f.debug_list()
@@ -169,36 +183,36 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Binary for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Binary for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::Binary::fmt(&self.0, f)
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Octal for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Octal for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::Octal::fmt(&self.0, f)
             }
         }
 
-        impl<const $lanes: usize> core::fmt::LowerHex for $name<$lanes>
+        impl<const LANES: usize> core::fmt::LowerHex for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::LowerHex::fmt(&self.0, f)
             }
         }
 
-        impl<const $lanes: usize> core::fmt::UpperHex for $name<$lanes>
+        impl<const LANES: usize> core::fmt::UpperHex for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::UpperHex::fmt(&self.0, f)
@@ -207,7 +221,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -218,7 +232,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -229,7 +243,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -240,7 +254,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOr for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -251,7 +265,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -262,7 +276,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -273,7 +287,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXor for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -284,7 +298,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -295,7 +309,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -306,7 +320,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::Not for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -317,7 +331,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: Self) {
@@ -327,7 +341,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: bool) {
@@ -337,7 +351,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: Self) {
@@ -347,7 +361,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: bool) {
@@ -357,7 +371,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: Self) {
@@ -367,13 +381,15 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: bool) {
                 *self ^= Self::splat(rhs);
             }
         }
+
+        impl_full_mask_reductions! { $name, $type }
     }
 }
 
diff --git a/crates/core_simd/src/masks/mod.rs b/crates/core_simd/src/masks/mod.rs
index 0b986aaf7e1..4503187e4b8 100644
--- a/crates/core_simd/src/masks/mod.rs
+++ b/crates/core_simd/src/masks/mod.rs
@@ -7,25 +7,27 @@ pub use full_masks::*;
 mod bitmask;
 pub use bitmask::*;
 
-use crate::LanesAtMost32;
+use crate::{LanesAtMost32, SimdI128, SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
 
 macro_rules! define_opaque_mask {
     {
         $(#[$attr:meta])*
-        struct $name:ident<const $lanes:ident: usize>($inner_ty:ty);
-        @bits $bits_ty:ty
+        struct $name:ident<const $lanes:ident: usize>($inner_ty:ident<$lanes2:ident>);
+        @bits $bits_ty:ident
     } => {
         $(#[$attr])*
         #[allow(non_camel_case_types)]
-        pub struct $name<const $lanes: usize>($inner_ty) where $bits_ty: LanesAtMost32;
+        pub struct $name<const LANES: usize>($inner_ty<LANES>) where $bits_ty<LANES>: LanesAtMost32;
 
-        impl<const $lanes: usize> $name<$lanes>
+        impl_opaque_mask_reductions! { $name, $inner_ty, $bits_ty }
+
+        impl<const LANES: usize> $name<LANES>
         where
-            $bits_ty: LanesAtMost32
+            $bits_ty<LANES>: LanesAtMost32
         {
             /// Construct a mask by setting all lanes to the given value.
             pub fn splat(value: bool) -> Self {
-                Self(<$inner_ty>::splat(value))
+                Self(<$inner_ty<LANES>>::splat(value))
             }
 
             /// Converts an array to a SIMD vector.
@@ -69,66 +71,72 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> From<BitMask<$lanes>> for $name<$lanes>
+        impl<const LANES: usize> From<BitMask<LANES>> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
-            BitMask<$lanes>: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
+            BitMask<LANES>: LanesAtMost32,
         {
-            fn from(value: BitMask<$lanes>) -> Self {
+            fn from(value: BitMask<LANES>) -> Self {
                 Self(value.into())
             }
         }
 
-        impl<const $lanes: usize> From<$name<$lanes>> for crate::BitMask<$lanes>
+        impl<const LANES: usize> From<$name<LANES>> for crate::BitMask<LANES>
         where
-            $bits_ty: LanesAtMost32,
-            BitMask<$lanes>: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
+            BitMask<LANES>: LanesAtMost32,
         {
-            fn from(value: $name<$lanes>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                 value.0.into()
             }
         }
 
-        impl<const $lanes: usize> From<$inner_ty> for $name<$lanes>
+        impl<const LANES: usize> From<$inner_ty<LANES>> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
-            fn from(value: $inner_ty) -> Self {
+            fn from(value: $inner_ty<LANES>) -> Self {
                 Self(value)
             }
         }
 
-        impl<const $lanes: usize> From<$name<$lanes>> for $inner_ty
+        impl<const LANES: usize> From<$name<LANES>> for $inner_ty<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
-            fn from(value: $name<$lanes>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                 value.0
             }
         }
 
         // vector/array conversion
-        impl<const $lanes: usize> From<[bool; $lanes]> for $name<$lanes> where $bits_ty: crate::LanesAtMost32 {
-            fn from(array: [bool; $lanes]) -> Self {
+        impl<const LANES: usize> From<[bool; LANES]> for $name<LANES>
+        where
+            $bits_ty<LANES>: crate::LanesAtMost32
+        {
+            fn from(array: [bool; LANES]) -> Self {
                 Self::from_array(array)
             }
         }
 
-        impl <const $lanes: usize> From<$name<$lanes>> for [bool; $lanes] where $bits_ty: crate::LanesAtMost32 {
-            fn from(vector: $name<$lanes>) -> Self {
+        impl <const LANES: usize> From<$name<LANES>> for [bool; LANES]
+        where
+            $bits_ty<LANES>: crate::LanesAtMost32
+        {
+            fn from(vector: $name<LANES>) -> Self {
                 vector.to_array()
             }
         }
 
-        impl<const $lanes: usize> Copy for $name<$lanes>
+        impl<const LANES: usize> Copy for $name<LANES>
         where
-            $inner_ty: Copy,
-            $bits_ty: LanesAtMost32,
+            $inner_ty<LANES>: Copy,
+            $bits_ty<LANES>: LanesAtMost32,
         {}
 
-        impl<const $lanes: usize> Clone for $name<$lanes>
+        impl<const LANES: usize> Clone for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn clone(&self) -> Self {
@@ -136,9 +144,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> Default for $name<$lanes>
+        impl<const LANES: usize> Default for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn default() -> Self {
@@ -146,9 +154,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> PartialEq for $name<$lanes>
+        impl<const LANES: usize> PartialEq for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn eq(&self, other: &Self) -> bool {
@@ -156,9 +164,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> PartialOrd for $name<$lanes>
+        impl<const LANES: usize> PartialOrd for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -166,9 +174,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Debug for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Debug for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::Debug::fmt(&self.0, f)
@@ -177,7 +185,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -188,7 +196,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -199,7 +207,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -210,7 +218,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -221,7 +229,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -232,7 +240,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -243,7 +251,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -254,7 +262,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -265,7 +273,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -276,7 +284,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::Not for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -287,7 +295,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: Self) {
@@ -297,7 +305,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: bool) {
@@ -307,7 +315,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: Self) {
@@ -317,7 +325,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: bool) {
@@ -327,7 +335,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: Self) {
@@ -337,7 +345,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: bool) {
@@ -352,7 +360,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask8<const LANES: usize>(SimdMask8<LANES>);
-    @bits crate::SimdI8<LANES>
+    @bits SimdI8
 }
 
 define_opaque_mask! {
@@ -360,7 +368,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask16<const LANES: usize>(SimdMask16<LANES>);
-    @bits crate::SimdI16<LANES>
+    @bits SimdI16
 }
 
 define_opaque_mask! {
@@ -368,7 +376,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask32<const LANES: usize>(SimdMask32<LANES>);
-    @bits crate::SimdI32<LANES>
+    @bits SimdI32
 }
 
 define_opaque_mask! {
@@ -376,7 +384,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask64<const LANES: usize>(SimdMask64<LANES>);
-    @bits crate::SimdI64<LANES>
+    @bits SimdI64
 }
 
 define_opaque_mask! {
@@ -384,7 +392,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask128<const LANES: usize>(SimdMask128<LANES>);
-    @bits crate::SimdI128<LANES>
+    @bits SimdI128
 }
 
 define_opaque_mask! {
@@ -392,7 +400,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct MaskSize<const LANES: usize>(SimdMaskSize<LANES>);
-    @bits crate::SimdIsize<LANES>
+    @bits SimdIsize
 }
 
 /// Vector of eight 8-bit masks
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
new file mode 100644
index 00000000000..0c6d91a2bef
--- /dev/null
+++ b/crates/core_simd/src/reduction.rs
@@ -0,0 +1,142 @@
+macro_rules! impl_integer_reductions {
+    { $name:ident, $scalar:ty } => {
+        impl<const LANES: usize> crate::$name<LANES>
+        where
+            Self: crate::LanesAtMost32
+        {
+            /// Produces the sum of the lanes of the vector, with wrapping addition.
+            #[inline]
+            pub fn wrapping_sum(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) }
+            }
+
+            /// Produces the sum of the lanes of the vector, with wrapping multiplication.
+            #[inline]
+            pub fn wrapping_product(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) }
+            }
+
+            /// Sequentially performs bitwise "and" between the lanes of the vector.
+            #[inline]
+            pub fn and_lanes(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_and(self) }
+            }
+
+            /// Sequentially performs bitwise "or" between the lanes of the vector.
+            #[inline]
+            pub fn or_lanes(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_or(self) }
+            }
+
+            /// Sequentially performs bitwise "xor" between the lanes of the vector.
+            #[inline]
+            pub fn xor_lanes(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_xor(self) }
+            }
+
+            /// Returns the maximum lane in the vector.
+            #[inline]
+            pub fn max_lane(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_max(self) }
+            }
+
+            /// Returns the minimum lane in the vector.
+            #[inline]
+            pub fn min_lane(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_min(self) }
+            }
+        }
+    }
+}
+
+macro_rules! impl_float_reductions {
+    { $name:ident, $scalar:ty } => {
+        impl<const LANES: usize> crate::$name<LANES>
+        where
+            Self: crate::LanesAtMost32
+        {
+            /// Produces the sum of the lanes of the vector.
+            #[inline]
+            pub fn sum(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
+            }
+
+            /// Produces the sum of the lanes of the vector.
+            #[inline]
+            pub fn product(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
+            }
+
+            /// Returns the maximum lane in the vector.
+            #[inline]
+            pub fn max_lane(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_max(self) }
+            }
+
+            /// Returns the minimum lane in the vector.
+            #[inline]
+            pub fn min_lane(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_min(self) }
+            }
+        }
+    }
+}
+
+macro_rules! impl_full_mask_reductions {
+    { $name:ident, $inner:ident } => {
+        impl<const LANES: usize> crate::$name<LANES>
+        where
+            crate::$inner<LANES>: crate::LanesAtMost32
+        {
+            /// Returns true if any lane is set, or false otherwise.
+            #[inline]
+            pub fn any(self) -> bool {
+                unsafe { crate::intrinsics::simd_reduce_any(self.to_int()) }
+            }
+
+            /// Returns true if all lanes are set, or false otherwise.
+            #[inline]
+            pub fn all(self) -> bool {
+                unsafe { crate::intrinsics::simd_reduce_all(self.to_int()) }
+            }
+        }
+    }
+}
+
+macro_rules! impl_opaque_mask_reductions {
+    { $name:ident, $inner:ident, $bits_ty:ident } => {
+        impl<const LANES: usize> $name<LANES>
+        where
+            $bits_ty<LANES>: crate::LanesAtMost32
+        {
+            /// Returns true if any lane is set, or false otherwise.
+            #[inline]
+            pub fn any(self) -> bool {
+                self.0.any()
+            }
+
+            /// Returns true if all lanes are set, or false otherwise.
+            #[inline]
+            pub fn all(self) -> bool {
+                self.0.all()
+            }
+        }
+    }
+}
+
+impl<const LANES: usize> crate::BitMask<LANES>
+where
+    crate::BitMask<LANES>: crate::LanesAtMost32,
+{
+    /// Returns true if any lane is set, or false otherwise.
+    #[inline]
+    pub fn any(self) -> bool {
+        self.0 != 0
+    }
+
+    /// Returns true if all lanes are set, or false otherwise.
+    #[inline]
+    pub fn all(self) -> bool {
+        self.0 == (!0) >> (64 - LANES)
+    }
+}
diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index 393e39023d9..91585b46946 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -6,6 +6,7 @@
 macro_rules! impl_float_vector {
     { $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
         impl_vector! { $name, $type }
+        impl_float_reductions! { $name, $type }
 
         impl<const LANES: usize> $name<LANES>
         where
diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs
index 5304d11cd6e..24f77cb3e10 100644
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@@ -4,6 +4,7 @@
 macro_rules! impl_integer_vector {
     { $name:ident, $type:ty, $mask_ty:ident, $mask_impl_ty:ident } => {
         impl_vector! { $name, $type }
+        impl_integer_reductions! { $name, $type }
 
         impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
 
diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs
index 71b5b295112..3866b9ca5c6 100644
--- a/crates/core_simd/src/vector/uint.rs
+++ b/crates/core_simd/src/vector/uint.rs
@@ -5,6 +5,7 @@
 macro_rules! impl_unsigned_vector {
     { $name:ident, $type:ty } => {
         impl_vector! { $name, $type }
+        impl_integer_reductions! { $name, $type }
 
         impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
 

From a7b82adb12a9bfbaaf4e446b4b17dcb35a546223 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 8 Mar 2021 00:48:18 -0500
Subject: [PATCH 05/17] Add tests

---
 crates/core_simd/tests/masks.rs      |  18 ++++
 crates/core_simd/tests/ops_macros.rs | 121 +++++++++++++++++++++++++++
 crates/test_helpers/src/biteq.rs     |   2 +-
 3 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 03a835b9c66..59da77de622 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -59,6 +59,24 @@ macro_rules! test_mask_api {
                 let mask = core_simd::$name::<8>::splat(false);
                 let _ = mask.test(8);
             }
+
+            #[test]
+            fn any() {
+                assert!(!core_simd::$name::<8>::splat(false).any());
+                assert!(core_simd::$name::<8>::splat(true).any());
+                let mut v = core_simd::$name::<8>::splat(false);
+                v.set(2, true);
+                assert!(v.any());
+            }
+
+            #[test]
+            fn all() {
+                assert!(!core_simd::$name::<8>::splat(false).all());
+                assert!(core_simd::$name::<8>::splat(true).all());
+                let mut v = core_simd::$name::<8>::splat(false);
+                v.set(2, true);
+                assert!(!v.all());
+            }
         }
     }
 }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index a70a8a9c48b..d9f705cf390 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -136,6 +136,83 @@ macro_rules! impl_binary_checked_op_test {
     };
 }
 
+#[macro_export]
+macro_rules! impl_common_integer_tests {
+    { $vector:ident, $scalar:ident } => {
+        test_helpers::test_lanes! {
+            fn wrapping_sum<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).wrapping_sum(),
+                        x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn wrapping_product<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).wrapping_product(),
+                        x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn and_lanes<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).and_lanes(),
+                        x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn or_lanes<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).or_lanes(),
+                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn xor_lanes<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).xor_lanes(),
+                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn max_lane<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).max_lane(),
+                        x.iter().copied().max().unwrap(),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn min_lane<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).min_lane(),
+                        x.iter().copied().min().unwrap(),
+                    );
+                    Ok(())
+                });
+            }
+        }
+    }
+}
+
 /// Implement tests for signed integers.
 #[macro_export]
 macro_rules! impl_signed_tests {
@@ -144,6 +221,8 @@ macro_rules! impl_signed_tests {
             type Vector<const LANES: usize> = core_simd::$vector<LANES>;
             type Scalar = $scalar;
 
+            impl_common_integer_tests! { Vector, Scalar }
+
             test_helpers::test_lanes! {
                 fn neg<const LANES: usize>() {
                     test_helpers::test_unary_elementwise(
@@ -241,6 +320,8 @@ macro_rules! impl_unsigned_tests {
             type Vector<const LANES: usize> = core_simd::$vector<LANES>;
             type Scalar = $scalar;
 
+            impl_common_integer_tests! { Vector, Scalar }
+
             test_helpers::test_lanes_panic! {
                 fn rem_zero_panic<const LANES: usize>() {
                     let a = Vector::<LANES>::splat(42);
@@ -397,6 +478,46 @@ macro_rules! impl_float_tests {
                         },
                     ).unwrap();
                 }
+
+                fn sum<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).sum(),
+                            x.iter().copied().fold(0 as Scalar, <Scalar as core::ops::Add>::add),
+                        );
+                        Ok(())
+                    });
+                }
+
+                fn product<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).product(),
+                            x.iter().copied().fold(1. as Scalar, <Scalar as core::ops::Mul>::mul),
+                        );
+                        Ok(())
+                    });
+                }
+
+                fn max_lane<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).max_lane(),
+                            x.iter().copied().fold(Scalar::NAN, Scalar::max),
+                        );
+                        Ok(())
+                    });
+                }
+
+                fn min_lane<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).min_lane(),
+                            x.iter().copied().fold(Scalar::NAN, Scalar::min),
+                        );
+                        Ok(())
+                    });
+                }
             }
         }
     }
diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs
index 4a41fe3a16e..00350e22418 100644
--- a/crates/test_helpers/src/biteq.rs
+++ b/crates/test_helpers/src/biteq.rs
@@ -95,7 +95,7 @@ impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
 
 #[macro_export]
 macro_rules! prop_assert_biteq {
-    { $a:expr, $b:expr } => {
+    { $a:expr, $b:expr $(,)? } => {
         {
             use $crate::biteq::BitEqWrapper;
             let a = $a;

From 193cd14b4a7a04d0713593d21f3630de012fd811 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Wed, 10 Mar 2021 23:47:43 -0500
Subject: [PATCH 06/17] Enable special handling of zero

---
 crates/core_simd/tests/ops_macros.rs | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index d9f705cf390..2b65d514623 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -501,20 +501,24 @@ macro_rules! impl_float_tests {
 
                 fn max_lane<const LANES: usize>() {
                     test_helpers::test_1(&|x| {
-                        test_helpers::prop_assert_biteq! (
-                            Vector::<LANES>::from_array(x).max_lane(),
-                            x.iter().copied().fold(Scalar::NAN, Scalar::max),
-                        );
+                        let vmax = Vector::<LANES>::from_array(x).max_lane();
+                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
+                        // 0 and -0 are treated the same
+                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+                            test_helpers::prop_assert_biteq!(vmax, smax);
+                        }
                         Ok(())
                     });
                 }
 
                 fn min_lane<const LANES: usize>() {
                     test_helpers::test_1(&|x| {
-                        test_helpers::prop_assert_biteq! (
-                            Vector::<LANES>::from_array(x).min_lane(),
-                            x.iter().copied().fold(Scalar::NAN, Scalar::min),
-                        );
+                        let vmax = Vector::<LANES>::from_array(x).min_lane();
+                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
+                        // 0 and -0 are treated the same
+                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+                            test_helpers::prop_assert_biteq!(vmax, smax);
+                        }
                         Ok(())
                     });
                 }

From 02608d44f7542981202792234540915484e0560d Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 11 Mar 2021 00:05:20 -0500
Subject: [PATCH 07/17] Fix mask ops

---
 crates/core_simd/src/masks/bitmask.rs | 6 +++---
 crates/core_simd/src/reduction.rs     | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index 1d25db46742..b4d1b6d9557 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,9 +1,9 @@
 use crate::LanesAtMost32;
 
 /// A mask where each lane is represented by a single bit.
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Hash)]
 #[repr(transparent)]
-pub struct BitMask<const LANES: usize>(pub(crate) u64)
+pub struct BitMask<const LANES: usize>(u64)
 where
     BitMask<LANES>: LanesAtMost32;
 
@@ -14,7 +14,7 @@ where
     /// Construct a mask by setting all lanes to the given value.
     pub fn splat(value: bool) -> Self {
         if value {
-            Self(u64::MAX)
+            Self(u64::MAX >> (64 - LANES))
         } else {
             Self(u64::MIN)
         }
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 0c6d91a2bef..d314cc737ed 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -131,12 +131,12 @@ where
     /// Returns true if any lane is set, or false otherwise.
     #[inline]
     pub fn any(self) -> bool {
-        self.0 != 0
+        self != Self::splat(false)
     }
 
     /// Returns true if all lanes are set, or false otherwise.
     #[inline]
     pub fn all(self) -> bool {
-        self.0 == (!0) >> (64 - LANES)
+        self == Self::splat(true)
     }
 }

From 64f564866bf09f98ae7a044fa8ca98a53bbbff1f Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 11 Mar 2021 00:27:47 -0500
Subject: [PATCH 08/17] Update documentation and fix i586 inaccuracy

---
 crates/core_simd/src/reduction.rs | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index d314cc737ed..684879021b4 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -58,22 +58,38 @@ macro_rules! impl_float_reductions {
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn sum(self) -> $scalar {
-                unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
+                // f32 SIMD sum is inaccurate on i586
+                if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 {
+                    self.as_slice().iter().sum()
+                } else {
+                    unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
+                }
             }
 
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn product(self) -> $scalar {
-                unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                // f32 SIMD product is inaccurate on i586
+                if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 {
+                    self.as_slice().iter().product()
+                } else {
+                    unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                }
             }
 
             /// Returns the maximum lane in the vector.
+            ///
+            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
             #[inline]
             pub fn max_lane(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_max(self) }
             }
 
             /// Returns the minimum lane in the vector.
+            ///
+            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
             #[inline]
             pub fn min_lane(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_min(self) }

From 4b8cbd5385e8d6e851edb2d1e37ddbf843dda02a Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 11 Mar 2021 01:02:47 -0500
Subject: [PATCH 09/17] Fix i586 detection

---
 crates/core_simd/src/reduction.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 684879021b4..e59bf93baa3 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -55,11 +55,12 @@ macro_rules! impl_float_reductions {
         where
             Self: crate::LanesAtMost32
         {
+
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn sum(self) -> $scalar {
                 // f32 SIMD sum is inaccurate on i586
-                if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 {
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
                     self.as_slice().iter().sum()
                 } else {
                     unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
@@ -70,7 +71,7 @@ macro_rules! impl_float_reductions {
             #[inline]
             pub fn product(self) -> $scalar {
                 // f32 SIMD product is inaccurate on i586
-                if cfg!(target_arch = "i586") && core::mem::size_of::<$scalar>() == 4 {
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
                     self.as_slice().iter().product()
                 } else {
                     unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }

From b51febbd348924a4cee970ef302dcaf5ff0fac18 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 12 Mar 2021 00:29:18 -0500
Subject: [PATCH 10/17] Revert i586 fix, fix test instead

---
 crates/core_simd/src/reduction.rs    | 14 ++------------
 crates/core_simd/tests/ops_macros.rs |  4 ++--
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index e59bf93baa3..177669ff444 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -59,23 +59,13 @@ macro_rules! impl_float_reductions {
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn sum(self) -> $scalar {
-                // f32 SIMD sum is inaccurate on i586
-                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
-                    self.as_slice().iter().sum()
-                } else {
-                    unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
-                }
+                unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
             }
 
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn product(self) -> $scalar {
-                // f32 SIMD product is inaccurate on i586
-                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
-                    self.as_slice().iter().product()
-                } else {
-                    unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
-                }
+                unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
             }
 
             /// Returns the maximum lane in the vector.
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 2b65d514623..59e923ac5c1 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -483,7 +483,7 @@ macro_rules! impl_float_tests {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
                             Vector::<LANES>::from_array(x).sum(),
-                            x.iter().copied().fold(0 as Scalar, <Scalar as core::ops::Add>::add),
+                            x.iter().sum(),
                         );
                         Ok(())
                     });
@@ -493,7 +493,7 @@ macro_rules! impl_float_tests {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
                             Vector::<LANES>::from_array(x).product(),
-                            x.iter().copied().fold(1. as Scalar, <Scalar as core::ops::Mul>::mul),
+                            x.iter().product(),
                         );
                         Ok(())
                     });

From 3fae09bd08b4ffacd3f81cc6ec13772e99d29796 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 12 Mar 2021 20:09:31 -0500
Subject: [PATCH 11/17] Revert "Revert i586 fix, fix test instead"

This reverts commit 1ea2f128821339d8050ca936f24b71677352437e.
---
 crates/core_simd/src/reduction.rs    | 14 ++++++++++++--
 crates/core_simd/tests/ops_macros.rs |  4 ++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 177669ff444..e59bf93baa3 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -59,13 +59,23 @@ macro_rules! impl_float_reductions {
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn sum(self) -> $scalar {
-                unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
+                // f32 SIMD sum is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
+                    self.as_slice().iter().sum()
+                } else {
+                    unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
+                }
             }
 
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn product(self) -> $scalar {
-                unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                // f32 SIMD product is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
+                    self.as_slice().iter().product()
+                } else {
+                    unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                }
             }
 
             /// Returns the maximum lane in the vector.
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 59e923ac5c1..2b65d514623 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -483,7 +483,7 @@ macro_rules! impl_float_tests {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
                             Vector::<LANES>::from_array(x).sum(),
-                            x.iter().sum(),
+                            x.iter().copied().fold(0 as Scalar, <Scalar as core::ops::Add>::add),
                         );
                         Ok(())
                     });
@@ -493,7 +493,7 @@ macro_rules! impl_float_tests {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
                             Vector::<LANES>::from_array(x).product(),
-                            x.iter().product(),
+                            x.iter().copied().fold(1. as Scalar, <Scalar as core::ops::Mul>::mul),
                         );
                         Ok(())
                     });

From 3cf970fc0997591cb1a0388874506e58a8c44baf Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 12 Mar 2021 20:10:51 -0500
Subject: [PATCH 12/17] Fix test sum/product implementation

---
 crates/core_simd/tests/ops_macros.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 2b65d514623..59e923ac5c1 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -483,7 +483,7 @@ macro_rules! impl_float_tests {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
                             Vector::<LANES>::from_array(x).sum(),
-                            x.iter().copied().fold(0 as Scalar, <Scalar as core::ops::Add>::add),
+                            x.iter().sum(),
                         );
                         Ok(())
                     });
@@ -493,7 +493,7 @@ macro_rules! impl_float_tests {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
                             Vector::<LANES>::from_array(x).product(),
-                            x.iter().copied().fold(1. as Scalar, <Scalar as core::ops::Mul>::mul),
+                            x.iter().product(),
                         );
                         Ok(())
                     });

From e2fa502617175e90f47e0e50873774e512a3ce62 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 12 Mar 2021 20:31:30 -0500
Subject: [PATCH 13/17] Enable i586 workaround for both f32 and f64

---
 crates/core_simd/src/reduction.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index e59bf93baa3..a2b652189c8 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -59,8 +59,8 @@ macro_rules! impl_float_reductions {
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn sum(self) -> $scalar {
-                // f32 SIMD sum is inaccurate on i586
-                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
+                // LLVM sum is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                     self.as_slice().iter().sum()
                 } else {
                     unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
@@ -70,8 +70,8 @@ macro_rules! impl_float_reductions {
             /// Produces the sum of the lanes of the vector.
             #[inline]
             pub fn product(self) -> $scalar {
-                // f32 SIMD product is inaccurate on i586
-                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && core::mem::size_of::<$scalar>() == 4 {
+                // LLVM product is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                     self.as_slice().iter().product()
                 } else {
                     unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }

From e12758670900aa079b4151bb262d7e4dc0f375c6 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 11 Apr 2021 10:59:05 -0400
Subject: [PATCH 14/17] Improve function names and docs

---
 crates/core_simd/src/reduction.rs    | 39 +++++++++++++++-------------
 crates/core_simd/tests/ops_macros.rs | 28 ++++++++++----------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index a2b652189c8..e728f8ad82a 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -4,45 +4,48 @@ macro_rules! impl_integer_reductions {
         where
             Self: crate::LanesAtMost32
         {
-            /// Produces the sum of the lanes of the vector, with wrapping addition.
+            /// Horizontal wrapping add.  Computes the sum of the lanes of the vector, with wrapping addition.
             #[inline]
             pub fn wrapping_sum(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) }
             }
 
-            /// Produces the sum of the lanes of the vector, with wrapping multiplication.
+            /// Horizontal wrapping multiply.  Computes the product of the lanes of the vector, with wrapping multiplication.
             #[inline]
             pub fn wrapping_product(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) }
             }
 
-            /// Sequentially performs bitwise "and" between the lanes of the vector.
+            /// Horizontal bitwise "and".  Computes the cumulative bitwise "and" across the lanes of
+            /// the vector.
             #[inline]
-            pub fn and_lanes(self) -> $scalar {
+            pub fn horizontal_and(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_and(self) }
             }
 
-            /// Sequentially performs bitwise "or" between the lanes of the vector.
+            /// Horizontal bitwise "or".  Computes the cumulative bitwise "or" across the lanes of
+            /// the vector.
             #[inline]
-            pub fn or_lanes(self) -> $scalar {
+            pub fn horizontal_or(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_or(self) }
             }
 
-            /// Sequentially performs bitwise "xor" between the lanes of the vector.
+            /// Horizontal bitwise "xor".  Computes the cumulative bitwise "xor" across the lanes of
+            /// the vector.
             #[inline]
-            pub fn xor_lanes(self) -> $scalar {
+            pub fn horizontal_xor(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_xor(self) }
             }
 
-            /// Returns the maximum lane in the vector.
+            /// Horizontal maximum.  Computes the maximum lane in the vector.
             #[inline]
-            pub fn max_lane(self) -> $scalar {
+            pub fn horizontal_max(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_max(self) }
             }
 
-            /// Returns the minimum lane in the vector.
+            /// Horizontal minimum.  Computes the minimum lane in the vector.
             #[inline]
-            pub fn min_lane(self) -> $scalar {
+            pub fn horizontal_min(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_min(self) }
             }
         }
@@ -56,7 +59,7 @@ macro_rules! impl_float_reductions {
             Self: crate::LanesAtMost32
         {
 
-            /// Produces the sum of the lanes of the vector.
+            /// Horizontal add.  Computes the sum of the lanes of the vector.
             #[inline]
             pub fn sum(self) -> $scalar {
                 // LLVM sum is inaccurate on i586
@@ -67,7 +70,7 @@ macro_rules! impl_float_reductions {
                 }
             }
 
-            /// Produces the sum of the lanes of the vector.
+            /// Horizontal multiply.  Computes the sum of the lanes of the vector.
             #[inline]
             pub fn product(self) -> $scalar {
                 // LLVM product is inaccurate on i586
@@ -78,21 +81,21 @@ macro_rules! impl_float_reductions {
                 }
             }
 
-            /// Returns the maximum lane in the vector.
+            /// Horizontal maximum.  Computes the maximum lane in the vector.
             ///
             /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
             /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
             #[inline]
-            pub fn max_lane(self) -> $scalar {
+            pub fn horizontal_max(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_max(self) }
             }
 
-            /// Returns the minimum lane in the vector.
+            /// Horizontal minimum.  Computes the minimum lane in the vector.
             ///
             /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
             /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
             #[inline]
-            pub fn min_lane(self) -> $scalar {
+            pub fn horizontal_min(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_min(self) }
             }
         }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 59e923ac5c1..7ce85b77254 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -160,50 +160,50 @@ macro_rules! impl_common_integer_tests {
                 });
             }
 
-            fn and_lanes<const LANES: usize>() {
+            fn horizontal_and<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).and_lanes(),
+                        $vector::<LANES>::from_array(x).horizontal_and(),
                         x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
                     );
                     Ok(())
                 });
             }
 
-            fn or_lanes<const LANES: usize>() {
+            fn horizontal_or<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).or_lanes(),
+                        $vector::<LANES>::from_array(x).horizontal_or(),
                         x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
                     );
                     Ok(())
                 });
             }
 
-            fn xor_lanes<const LANES: usize>() {
+            fn horizontal_xor<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).xor_lanes(),
+                        $vector::<LANES>::from_array(x).horizontal_xor(),
                         x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
                     );
                     Ok(())
                 });
             }
 
-            fn max_lane<const LANES: usize>() {
+            fn horizontal_max<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).max_lane(),
+                        $vector::<LANES>::from_array(x).horizontal_max(),
                         x.iter().copied().max().unwrap(),
                     );
                     Ok(())
                 });
             }
 
-            fn min_lane<const LANES: usize>() {
+            fn horizontal_min<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).min_lane(),
+                        $vector::<LANES>::from_array(x).horizontal_min(),
                         x.iter().copied().min().unwrap(),
                     );
                     Ok(())
@@ -499,9 +499,9 @@ macro_rules! impl_float_tests {
                     });
                 }
 
-                fn max_lane<const LANES: usize>() {
+                fn horizontal_max<const LANES: usize>() {
                     test_helpers::test_1(&|x| {
-                        let vmax = Vector::<LANES>::from_array(x).max_lane();
+                        let vmax = Vector::<LANES>::from_array(x).horizontal_max();
                         let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
                         // 0 and -0 are treated the same
                         if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
@@ -511,9 +511,9 @@ macro_rules! impl_float_tests {
                     });
                 }
 
-                fn min_lane<const LANES: usize>() {
+                fn horizontal_min<const LANES: usize>() {
                     test_helpers::test_1(&|x| {
-                        let vmax = Vector::<LANES>::from_array(x).min_lane();
+                        let vmax = Vector::<LANES>::from_array(x).horizontal_min();
                         let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
                         // 0 and -0 are treated the same
                         if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {

From 01d78aa21aee98ccf5b71a2ee9a136aa9e5f290c Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 17 Apr 2021 01:32:45 +0000
Subject: [PATCH 15/17] Update docs

---
 crates/core_simd/src/reduction.rs | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index e728f8ad82a..e1fc82e328a 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -4,46 +4,46 @@ macro_rules! impl_integer_reductions {
         where
             Self: crate::LanesAtMost32
         {
-            /// Horizontal wrapping add.  Computes the sum of the lanes of the vector, with wrapping addition.
+            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
             #[inline]
             pub fn wrapping_sum(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) }
             }
 
-            /// Horizontal wrapping multiply.  Computes the product of the lanes of the vector, with wrapping multiplication.
+            /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
             #[inline]
             pub fn wrapping_product(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) }
             }
 
-            /// Horizontal bitwise "and".  Computes the cumulative bitwise "and" across the lanes of
+            /// Horizontal bitwise "and".  Returns the cumulative bitwise "and" across the lanes of
             /// the vector.
             #[inline]
             pub fn horizontal_and(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_and(self) }
             }
 
-            /// Horizontal bitwise "or".  Computes the cumulative bitwise "or" across the lanes of
+            /// Horizontal bitwise "or".  Returns the cumulative bitwise "or" across the lanes of
             /// the vector.
             #[inline]
             pub fn horizontal_or(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_or(self) }
             }
 
-            /// Horizontal bitwise "xor".  Computes the cumulative bitwise "xor" across the lanes of
+            /// Horizontal bitwise "xor".  Returns the cumulative bitwise "xor" across the lanes of
             /// the vector.
             #[inline]
             pub fn horizontal_xor(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_xor(self) }
             }
 
-            /// Horizontal maximum.  Computes the maximum lane in the vector.
+            /// Horizontal maximum.  Returns the maximum lane in the vector.
             #[inline]
             pub fn horizontal_max(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_max(self) }
             }
 
-            /// Horizontal minimum.  Computes the minimum lane in the vector.
+            /// Horizontal minimum.  Returns the minimum lane in the vector.
             #[inline]
             pub fn horizontal_min(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_min(self) }
@@ -59,7 +59,7 @@ macro_rules! impl_float_reductions {
             Self: crate::LanesAtMost32
         {
 
-            /// Horizontal add.  Computes the sum of the lanes of the vector.
+            /// Horizontal add.  Returns the sum of the lanes of the vector.
             #[inline]
             pub fn sum(self) -> $scalar {
                 // LLVM sum is inaccurate on i586
@@ -70,7 +70,7 @@ macro_rules! impl_float_reductions {
                 }
             }
 
-            /// Horizontal multiply.  Computes the sum of the lanes of the vector.
+            /// Horizontal multiply.  Returns the product of the lanes of the vector.
             #[inline]
             pub fn product(self) -> $scalar {
                 // LLVM product is inaccurate on i586
@@ -81,7 +81,7 @@ macro_rules! impl_float_reductions {
                 }
             }
 
-            /// Horizontal maximum.  Computes the maximum lane in the vector.
+            /// Horizontal maximum.  Returns the maximum lane in the vector.
             ///
             /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
             /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
@@ -90,7 +90,7 @@ macro_rules! impl_float_reductions {
                 unsafe { crate::intrinsics::simd_reduce_max(self) }
             }
 
-            /// Horizontal minimum.  Computes the minimum lane in the vector.
+            /// Horizontal minimum.  Returns the minimum lane in the vector.
             ///
             /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
             /// return either.  This function will not return `NaN` unless all lanes are `NaN`.

From 828b274ae75efb984ec6a848ea85868f30c587f9 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 19 Apr 2021 23:41:11 +0000
Subject: [PATCH 16/17] Rename sum, product to horizontal_{sum,product}

---
 crates/core_simd/src/reduction.rs    |  8 ++++----
 crates/core_simd/tests/ops_macros.rs | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index e1fc82e328a..86a34e4455d 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -6,13 +6,13 @@ macro_rules! impl_integer_reductions {
         {
             /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
             #[inline]
-            pub fn wrapping_sum(self) -> $scalar {
+            pub fn horizontal_wrapping_sum(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) }
             }
 
             /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
             #[inline]
-            pub fn wrapping_product(self) -> $scalar {
+            pub fn horizontal_wrapping_product(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) }
             }
 
@@ -61,7 +61,7 @@ macro_rules! impl_float_reductions {
 
             /// Horizontal add.  Returns the sum of the lanes of the vector.
             #[inline]
-            pub fn sum(self) -> $scalar {
+            pub fn horizontal_sum(self) -> $scalar {
                 // LLVM sum is inaccurate on i586
                 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                     self.as_slice().iter().sum()
@@ -72,7 +72,7 @@ macro_rules! impl_float_reductions {
 
             /// Horizontal multiply.  Returns the product of the lanes of the vector.
             #[inline]
-            pub fn product(self) -> $scalar {
+            pub fn horizontal_product(self) -> $scalar {
                 // LLVM product is inaccurate on i586
                 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                     self.as_slice().iter().product()
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 7ce85b77254..a1213e39e34 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -140,20 +140,20 @@ macro_rules! impl_binary_checked_op_test {
 macro_rules! impl_common_integer_tests {
     { $vector:ident, $scalar:ident } => {
         test_helpers::test_lanes! {
-            fn wrapping_sum<const LANES: usize>() {
+            fn horizontal_wrapping_sum<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).wrapping_sum(),
+                        $vector::<LANES>::from_array(x).horizontal_wrapping_sum(),
                         x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
                     );
                     Ok(())
                 });
             }
 
-            fn wrapping_product<const LANES: usize>() {
+            fn horizontal_wrapping_product<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).wrapping_product(),
+                        $vector::<LANES>::from_array(x).horizontal_wrapping_product(),
                         x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
                     );
                     Ok(())
@@ -479,20 +479,20 @@ macro_rules! impl_float_tests {
                     ).unwrap();
                 }
 
-                fn sum<const LANES: usize>() {
+                fn horizontal_sum<const LANES: usize>() {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
-                            Vector::<LANES>::from_array(x).sum(),
+                            Vector::<LANES>::from_array(x).horizontal_sum(),
                             x.iter().sum(),
                         );
                         Ok(())
                     });
                 }
 
-                fn product<const LANES: usize>() {
+                fn horizontal_product<const LANES: usize>() {
                     test_helpers::test_1(&|x| {
                         test_helpers::prop_assert_biteq! (
-                            Vector::<LANES>::from_array(x).product(),
+                            Vector::<LANES>::from_array(x).horizontal_product(),
                             x.iter().product(),
                         );
                         Ok(())

From 04ee1073237dc77b3742e7a1c0d3740c1df499c4 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 22 Apr 2021 22:41:12 +0000
Subject: [PATCH 17/17] Remove wrapping from sum/product fns

---
 crates/core_simd/src/reduction.rs    | 4 ++--
 crates/core_simd/tests/ops_macros.rs | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 86a34e4455d..382d366dd3d 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -6,13 +6,13 @@ macro_rules! impl_integer_reductions {
         {
             /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
             #[inline]
-            pub fn horizontal_wrapping_sum(self) -> $scalar {
+            pub fn horizontal_sum(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) }
             }
 
             /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
             #[inline]
-            pub fn horizontal_wrapping_product(self) -> $scalar {
+            pub fn horizontal_product(self) -> $scalar {
                 unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) }
             }
 
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index a1213e39e34..37f3b49a330 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -140,20 +140,20 @@ macro_rules! impl_binary_checked_op_test {
 macro_rules! impl_common_integer_tests {
     { $vector:ident, $scalar:ident } => {
         test_helpers::test_lanes! {
-            fn horizontal_wrapping_sum<const LANES: usize>() {
+            fn horizontal_sum<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_wrapping_sum(),
+                        $vector::<LANES>::from_array(x).horizontal_sum(),
                         x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
                     );
                     Ok(())
                 });
             }
 
-            fn horizontal_wrapping_product<const LANES: usize>() {
+            fn horizontal_product<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_wrapping_product(),
+                        $vector::<LANES>::from_array(x).horizontal_product(),
                         x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
                     );
                     Ok(())