Sync portable-simd to rust-lang/portable-simd@72df4c4505

2022-03-12 16:09:37 -08:00 · 2022-03-12 16:09:37 -08:00 · 94c7da04b4
commit 94c7da04b4
parent c43129f837
19 changed files with 213 additions and 104 deletions
--- a/beginners-guide.md
+++ b/beginners-guide.md
@ -33,7 +33,7 @@ SIMD has a few special vocabulary terms you should know:

 * **Vertical:** When an operation is "vertical", each lane processes individually without regard to the other lanes in the same vector. For example, a "vertical add" between two vectors would add lane 0 in `a` with lane 0 in `b`, with the total in lane 0 of `out`, and then the same thing for lanes 1, 2, etc. Most SIMD operations are vertical operations, so if your problem is a vertical problem then you can probably solve it with SIMD.

-* **Horizontal:** When an operation is "horizontal", the lanes within a single vector interact in some way. A "horizontal add" might add up lane 0 of `a` with lane 1 of `a`, with the total in lane 0 of `out`.
+* **Reducing/Reduce:** When an operation is "reducing" (functions named `reduce_*`), the lanes within a single vector are merged using some operation such as addition, returning the merged value as a scalar. For instance, a reducing add would return the sum of all the lanes' values.

 * **Target Feature:** Rust calls a CPU architecture extension a `target_feature`. Proper SIMD requires various CPU extensions to be enabled (details below). Don't confuse this with `feature`, which is a Cargo crate concept.

--- a/crates/core_simd/Cargo.toml
+++ b/crates/core_simd/Cargo.toml
@ -9,7 +9,7 @@ categories = ["hardware-support", "no-std"]
 license = "MIT OR Apache-2.0"

 [features]
-default = ["std", "generic_const_exprs"]
+default = []
 std = []
 generic_const_exprs = []

--- a/crates/core_simd/examples/matrix_inversion.rs
+++ b/crates/core_simd/examples/matrix_inversion.rs
@ -233,7 +233,7 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
    let det = det.rotate_lanes_right::<2>() + det;
    let det = det.reverse().rotate_lanes_right::<2>() + det;

-    if det.horizontal_sum() == 0. {
+    if det.reduce_sum() == 0. {
        return None;
    }
    // calculate the reciprocal
--- a/crates/core_simd/examples/nbody.rs
+++ b/crates/core_simd/examples/nbody.rs
@ -107,10 +107,10 @@ mod nbody {
        let mut e = 0.;
        for i in 0..N_BODIES {
            let bi = &bodies[i];
-            e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5;
+            e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5;
            for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
                let dx = bi.x - bj.x;
-                e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt()
+                e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt()
            }
        }
        e
@ -134,8 +134,8 @@ mod nbody {
        let mut mag = [0.0; N];
        for i in (0..N).step_by(2) {
            let d2s = f64x2::from_array([
-                (r[i] * r[i]).horizontal_sum(),
-                (r[i + 1] * r[i + 1]).horizontal_sum(),
+                (r[i] * r[i]).reduce_sum(),
+                (r[i + 1] * r[i + 1]).reduce_sum(),
            ]);
            let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
            mag[i] = dmags[0];
--- a/crates/core_simd/examples/spectral_norm.rs
+++ b/crates/core_simd/examples/spectral_norm.rs
@ -20,7 +20,7 @@ fn mult_av(v: &[f64], out: &mut [f64]) {
            sum += b / a;
            j += 2
        }
-        *out = sum.horizontal_sum();
+        *out = sum.reduce_sum();
    }
 }

@ -38,7 +38,7 @@ fn mult_atv(v: &[f64], out: &mut [f64]) {
            sum += b / a;
            j += 2
        }
-        *out = sum.horizontal_sum();
+        *out = sum.reduce_sum();
    }
 }

--- a/crates/core_simd/src/comparisons.rs
+++ b/crates/core_simd/src/comparisons.rs
@ -66,3 +66,55 @@ where
        unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
    }
 }
+
+macro_rules! impl_ord_methods_vector {
+    { $type:ty } => {
+        impl<const LANES: usize> Simd<$type, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            /// Returns the lane-wise minimum with `other`.
+            #[must_use = "method returns a new vector and does not mutate the original value"]
+            #[inline]
+            pub fn min(self, other: Self) -> Self {
+                self.lanes_gt(other).select(other, self)
+            }
+
+            /// Returns the lane-wise maximum with `other`.
+            #[must_use = "method returns a new vector and does not mutate the original value"]
+            #[inline]
+            pub fn max(self, other: Self) -> Self {
+                self.lanes_lt(other).select(other, self)
+            }
+
+            /// Restrict each lane to a certain interval.
+            ///
+            /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is
+            /// less than `min`. Otherwise returns `self`.
+            ///
+            /// # Panics
+            ///
+            /// Panics if `min > max` on any lane.
+            #[must_use = "method returns a new vector and does not mutate the original value"]
+            #[inline]
+            pub fn clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.lanes_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                self.max(min).min(max)
+            }
+        }
+    }
+}
+
+impl_ord_methods_vector!(i8);
+impl_ord_methods_vector!(i16);
+impl_ord_methods_vector!(i32);
+impl_ord_methods_vector!(i64);
+impl_ord_methods_vector!(isize);
+impl_ord_methods_vector!(u8);
+impl_ord_methods_vector!(u16);
+impl_ord_methods_vector!(u32);
+impl_ord_methods_vector!(u64);
+impl_ord_methods_vector!(usize);
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@ -18,7 +18,6 @@
 //!
 //! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.

-
 // These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
 // mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
 // The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
@ -130,6 +129,14 @@ extern "platform-intrinsic" {
    pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;

    // truncate integer vector to bitmask
+    // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and
+    // returns either an unsigned integer or array of `u8`.
+    // Every element in the vector becomes a single bit in the returned bitmask.
+    // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
+    // The bit order of the result depends on the byte endianness. LSB-first for little
+    // endian and MSB-first for big endian.
+    //
+    // UB if called on a vector with values other than 0 and -1.
    #[allow(unused)]
    pub(crate) fn simd_bitmask<T, U>(x: T) -> U;

--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@ -1,6 +1,5 @@
-#![cfg_attr(not(feature = "std"), no_std)]
+#![no_std]
 #![feature(
-    const_fn_trait_bound,
    convert_float_to_int,
    decl_macro,
    intra_doc_pointers,
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@ -50,6 +50,9 @@ macro_rules! impl_integer_intrinsic {
 }

 impl_integer_intrinsic! {
+    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 1>
+    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 2>
+    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 4>
    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
    unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
    unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
--- a/crates/core_simd/src/math.rs
+++ b/crates/core_simd/src/math.rs
@ -10,8 +10,7 @@ macro_rules! impl_uint_arith {
            /// # Examples
            /// ```
            /// # #![feature(portable_simd)]
-            /// # #[cfg(feature = "std")] use core_simd::Simd;
-            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+            /// # use core::simd::Simd;
            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
            /// let x = Simd::from_array([2, 1, 0, MAX]);
            /// let max = Simd::splat(MAX);
@ -31,8 +30,7 @@ macro_rules! impl_uint_arith {
            /// # Examples
            /// ```
            /// # #![feature(portable_simd)]
-            /// # #[cfg(feature = "std")] use core_simd::Simd;
-            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+            /// # use core::simd::Simd;
            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
            /// let x = Simd::from_array([2, 1, 0, MAX]);
            /// let max = Simd::splat(MAX);
@ -58,8 +56,7 @@ macro_rules! impl_int_arith {
            /// # Examples
            /// ```
            /// # #![feature(portable_simd)]
-            /// # #[cfg(feature = "std")] use core_simd::Simd;
-            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+            /// # use core::simd::Simd;
            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
            /// let x = Simd::from_array([MIN, 0, 1, MAX]);
            /// let max = Simd::splat(MAX);
@ -79,8 +76,7 @@ macro_rules! impl_int_arith {
            /// # Examples
            /// ```
            /// # #![feature(portable_simd)]
-            /// # #[cfg(feature = "std")] use core_simd::Simd;
-            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+            /// # use core::simd::Simd;
            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
            /// let x = Simd::from_array([MIN, -2, -1, MAX]);
            /// let max = Simd::splat(MAX);
@ -100,8 +96,7 @@ macro_rules! impl_int_arith {
            /// # Examples
            /// ```
            /// # #![feature(portable_simd)]
-            /// # #[cfg(feature = "std")] use core_simd::Simd;
-            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+            /// # use core::simd::Simd;
            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
            /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
            /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
@ -119,8 +114,7 @@ macro_rules! impl_int_arith {
            /// # Examples
            /// ```
            /// # #![feature(portable_simd)]
-            /// # #[cfg(feature = "std")] use core_simd::Simd;
-            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+            /// # use core::simd::Simd;
            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
            /// let xs = Simd::from_array([MIN, -2, 0, 3]);
            /// let unsat = xs.abs();
@ -142,8 +136,7 @@ macro_rules! impl_int_arith {
            /// # Examples
            /// ```
            /// # #![feature(portable_simd)]
-            /// # #[cfg(feature = "std")] use core_simd::Simd;
-            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+            /// # use core::simd::Simd;
            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
            /// let x = Simd::from_array([MIN, -2, 3, MAX]);
            /// let unsat = -x;
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@ -11,30 +11,30 @@ macro_rules! impl_integer_reductions {
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
-            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
+            /// Reducing wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
            #[inline]
-            pub fn horizontal_sum(self) -> $scalar {
+            pub fn reduce_sum(self) -> $scalar {
                // Safety: `self` is an integer vector
                unsafe { simd_reduce_add_ordered(self, 0) }
            }

-            /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
+            /// Reducing wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
            #[inline]
-            pub fn horizontal_product(self) -> $scalar {
+            pub fn reduce_product(self) -> $scalar {
                // Safety: `self` is an integer vector
                unsafe { simd_reduce_mul_ordered(self, 1) }
            }

-            /// Horizontal maximum.  Returns the maximum lane in the vector.
+            /// Reducing maximum.  Returns the maximum lane in the vector.
            #[inline]
-            pub fn horizontal_max(self) -> $scalar {
+            pub fn reduce_max(self) -> $scalar {
                // Safety: `self` is an integer vector
                unsafe { simd_reduce_max(self) }
            }

-            /// Horizontal minimum.  Returns the minimum lane in the vector.
+            /// Reducing minimum.  Returns the minimum lane in the vector.
            #[inline]
-            pub fn horizontal_min(self) -> $scalar {
+            pub fn reduce_min(self) -> $scalar {
                // Safety: `self` is an integer vector
                unsafe { simd_reduce_min(self) }
            }
@ -60,9 +60,9 @@ macro_rules! impl_float_reductions {
            LaneCount<LANES>: SupportedLaneCount,
        {

-            /// Horizontal add.  Returns the sum of the lanes of the vector.
+            /// Reducing add.  Returns the sum of the lanes of the vector.
            #[inline]
-            pub fn horizontal_sum(self) -> $scalar {
+            pub fn reduce_sum(self) -> $scalar {
                // LLVM sum is inaccurate on i586
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                    self.as_array().iter().sum()
@ -72,9 +72,9 @@ macro_rules! impl_float_reductions {
                }
            }

-            /// Horizontal multiply.  Returns the product of the lanes of the vector.
+            /// Reducing multiply.  Returns the product of the lanes of the vector.
            #[inline]
-            pub fn horizontal_product(self) -> $scalar {
+            pub fn reduce_product(self) -> $scalar {
                // LLVM product is inaccurate on i586
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                    self.as_array().iter().product()
@ -84,22 +84,22 @@ macro_rules! impl_float_reductions {
                }
            }

-            /// Horizontal maximum.  Returns the maximum lane in the vector.
+            /// Reducing maximum.  Returns the maximum lane in the vector.
            ///
            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
            #[inline]
-            pub fn horizontal_max(self) -> $scalar {
+            pub fn reduce_max(self) -> $scalar {
                // Safety: `self` is a float vector
                unsafe { simd_reduce_max(self) }
            }

-            /// Horizontal minimum.  Returns the minimum lane in the vector.
+            /// Reducing minimum.  Returns the minimum lane in the vector.
            ///
            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
            #[inline]
-            pub fn horizontal_min(self) -> $scalar {
+            pub fn reduce_min(self) -> $scalar {
                // Safety: `self` is a float vector
                unsafe { simd_reduce_min(self) }
            }
@ -116,10 +116,10 @@ where
    T: SimdElement + BitAnd<T, Output = T>,
    LaneCount<LANES>: SupportedLaneCount,
 {
-    /// Horizontal bitwise "and".  Returns the cumulative bitwise "and" across the lanes of
+    /// Reducing bitwise "and".  Returns the cumulative bitwise "and" across the lanes of
    /// the vector.
    #[inline]
-    pub fn horizontal_and(self) -> T {
+    pub fn reduce_and(self) -> T {
        unsafe { simd_reduce_and(self) }
    }
 }
@ -130,10 +130,10 @@ where
    T: SimdElement + BitOr<T, Output = T>,
    LaneCount<LANES>: SupportedLaneCount,
 {
-    /// Horizontal bitwise "or".  Returns the cumulative bitwise "or" across the lanes of
+    /// Reducing bitwise "or".  Returns the cumulative bitwise "or" across the lanes of
    /// the vector.
    #[inline]
-    pub fn horizontal_or(self) -> T {
+    pub fn reduce_or(self) -> T {
        unsafe { simd_reduce_or(self) }
    }
 }
@ -144,10 +144,10 @@ where
    T: SimdElement + BitXor<T, Output = T>,
    LaneCount<LANES>: SupportedLaneCount,
 {
-    /// Horizontal bitwise "xor".  Returns the cumulative bitwise "xor" across the lanes of
+    /// Reducing bitwise "xor".  Returns the cumulative bitwise "xor" across the lanes of
    /// the vector.
    #[inline]
-    pub fn horizontal_xor(self) -> T {
+    pub fn reduce_xor(self) -> T {
        unsafe { simd_reduce_xor(self) }
    }
 }
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@ -14,8 +14,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
-    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+    /// # use core::simd::{Simd, Mask};
    /// let a = Simd::from_array([0, 1, 2, 3]);
    /// let b = Simd::from_array([4, 5, 6, 7]);
    /// let mask = Mask::from_array([true, false, false, true]);
@ -45,8 +44,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::Mask;
-    /// # #[cfg(not(feature = "std"))] use core::simd::Mask;
+    /// # use core::simd::Mask;
    /// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
    /// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
    /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@ -12,8 +12,7 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
 /// ## One source vector
 /// ```
 /// # #![feature(portable_simd)]
-/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle};
-/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle};
+/// # use core::simd::{Simd, simd_swizzle};
 /// let v = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
 ///
 /// // Keeping the same size
@ -28,8 +27,7 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
 /// ## Two source vectors
 /// ```
 /// # #![feature(portable_simd)]
-/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle, Which};
-/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle, Which};
+/// # use core::simd::{Simd, simd_swizzle, Which};
 /// use Which::*;
 /// let a = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
 /// let b = Simd::<f32, 4>::from_array([4., 5., 6., 7.]);
@ -273,8 +271,7 @@ where
    ///
    /// ```
    /// #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::Simd;
-    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+    /// # use core::simd::Simd;
    /// let a = Simd::from_array([0, 1, 2, 3]);
    /// let b = Simd::from_array([4, 5, 6, 7]);
    /// let (x, y) = a.interleave(b);
@ -337,8 +334,7 @@ where
    ///
    /// ```
    /// #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::Simd;
-    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+    /// # use core::simd::Simd;
    /// let a = Simd::from_array([0, 4, 1, 5]);
    /// let b = Simd::from_array([2, 6, 3, 7]);
    /// let (x, y) = a.deinterleave(b);
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@ -153,8 +153,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::Simd;
-    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+    /// # use core::simd::Simd;
    /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
    /// let ints = floats.cast::<i32>();
    /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
@ -180,8 +179,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::Simd;
-    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+    /// # use core::simd::Simd;
    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
    /// let idxs = Simd::from_array([9, 3, 0, 5]);
    /// let alt = Simd::from_array([-5, -4, -3, -2]);
@ -201,8 +199,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::Simd;
-    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+    /// # use core::simd::Simd;
    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
    /// let idxs = Simd::from_array([9, 3, 0, 5]);
    ///
@ -225,8 +222,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
-    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+    /// # use core::simd::{Simd, Mask};
    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
    /// let idxs = Simd::from_array([9, 3, 0, 5]);
    /// let alt = Simd::from_array([-5, -4, -3, -2]);
@ -260,8 +256,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
-    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+    /// # use core::simd::{Simd, Mask};
    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
    /// let idxs = Simd::from_array([9, 3, 0, 5]);
    /// let alt = Simd::from_array([-5, -4, -3, -2]);
@ -296,8 +291,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::Simd;
-    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+    /// # use core::simd::Simd;
    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
    /// let idxs = Simd::from_array([9, 3, 0, 0]);
    /// let vals = Simd::from_array([-27, 82, -41, 124]);
@ -319,8 +313,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
-    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+    /// # use core::simd::{Simd, Mask};
    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
    /// let idxs = Simd::from_array([9, 3, 0, 0]);
    /// let vals = Simd::from_array([-27, 82, -41, 124]);
@ -354,8 +347,7 @@ where
    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
-    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+    /// # use core::simd::{Simd, Mask};
    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
    /// let idxs = Simd::from_array([9, 3, 0, 0]);
    /// let vals = Simd::from_array([-27, 82, -41, 124]);
--- a/crates/core_simd/tests/i16_ops.rs
+++ b/crates/core_simd/tests/i16_ops.rs
@ -1,5 +1,32 @@
 #![feature(portable_simd)]
+use core_simd::i16x2;

 #[macro_use]
 mod ops_macros;
 impl_signed_tests! { i16 }
+
+#[test]
+fn max_is_not_lexicographic() {
+    let a = i16x2::splat(10);
+    let b = i16x2::from_array([-4, 12]);
+    assert_eq!(a.max(b), i16x2::from_array([10, 12]));
+}
+
+#[test]
+fn min_is_not_lexicographic() {
+    let a = i16x2::splat(10);
+    let b = i16x2::from_array([12, -4]);
+    assert_eq!(a.min(b), i16x2::from_array([10, -4]));
+}
+
+#[test]
+fn clamp_is_not_lexicographic() {
+    let a = i16x2::splat(10);
+    let lo = i16x2::from_array([-12, -4]);
+    let up = i16x2::from_array([-4, 12]);
+    assert_eq!(a.clamp(lo, up), i16x2::from_array([-4, 10]));
+
+    let x = i16x2::from_array([1, 10]);
+    let y = x.clamp(i16x2::splat(0), i16x2::splat(9));
+    assert_eq!(y, i16x2::from_array([1, 9]));
+}
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@ -94,70 +94,70 @@ macro_rules! impl_binary_checked_op_test {
 macro_rules! impl_common_integer_tests {
    { $vector:ident, $scalar:ident } => {
        test_helpers::test_lanes! {
-            fn horizontal_sum<const LANES: usize>() {
+            fn reduce_sum<const LANES: usize>() {
                test_helpers::test_1(&|x| {
                    test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_sum(),
+                        $vector::<LANES>::from_array(x).reduce_sum(),
                        x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
                    );
                    Ok(())
                });
            }

-            fn horizontal_product<const LANES: usize>() {
+            fn reduce_product<const LANES: usize>() {
                test_helpers::test_1(&|x| {
                    test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_product(),
+                        $vector::<LANES>::from_array(x).reduce_product(),
                        x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
                    );
                    Ok(())
                });
            }

-            fn horizontal_and<const LANES: usize>() {
+            fn reduce_and<const LANES: usize>() {
                test_helpers::test_1(&|x| {
                    test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_and(),
+                        $vector::<LANES>::from_array(x).reduce_and(),
                        x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
                    );
                    Ok(())
                });
            }

-            fn horizontal_or<const LANES: usize>() {
+            fn reduce_or<const LANES: usize>() {
                test_helpers::test_1(&|x| {
                    test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_or(),
+                        $vector::<LANES>::from_array(x).reduce_or(),
                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
                    );
                    Ok(())
                });
            }

-            fn horizontal_xor<const LANES: usize>() {
+            fn reduce_xor<const LANES: usize>() {
                test_helpers::test_1(&|x| {
                    test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_xor(),
+                        $vector::<LANES>::from_array(x).reduce_xor(),
                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
                    );
                    Ok(())
                });
            }

-            fn horizontal_max<const LANES: usize>() {
+            fn reduce_max<const LANES: usize>() {
                test_helpers::test_1(&|x| {
                    test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_max(),
+                        $vector::<LANES>::from_array(x).reduce_max(),
                        x.iter().copied().max().unwrap(),
                    );
                    Ok(())
                });
            }

-            fn horizontal_min<const LANES: usize>() {
+            fn reduce_min<const LANES: usize>() {
                test_helpers::test_1(&|x| {
                    test_helpers::prop_assert_biteq! (
-                        $vector::<LANES>::from_array(x).horizontal_min(),
+                        $vector::<LANES>::from_array(x).reduce_min(),
                        x.iter().copied().min().unwrap(),
                    );
                    Ok(())
@ -222,6 +222,35 @@ macro_rules! impl_signed_tests {
                    assert_eq!(a % b, Vector::<LANES>::splat(0));
                }

+                fn min<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.min(b), a);
+                    let a = Vector::<LANES>::splat(Scalar::MAX);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.min(b), b);
+                }
+
+                fn max<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.max(b), b);
+                    let a = Vector::<LANES>::splat(Scalar::MAX);
+                    let b = Vector::<LANES>::splat(0);
+                    assert_eq!(a.max(b), a);
+                }
+
+                fn clamp<const LANES: usize>() {
+                    let min = Vector::<LANES>::splat(Scalar::MIN);
+                    let max = Vector::<LANES>::splat(Scalar::MAX);
+                    let zero = Vector::<LANES>::splat(0);
+                    let one = Vector::<LANES>::splat(1);
+                    let negone = Vector::<LANES>::splat(-1);
+                    assert_eq!(zero.clamp(min, max), zero);
+                    assert_eq!(zero.clamp(min, one), zero);
+                    assert_eq!(zero.clamp(one, max), one);
+                    assert_eq!(zero.clamp(min, negone), negone);
+                }
            }

            test_helpers::test_lanes_panic! {
@ -499,29 +528,29 @@ macro_rules! impl_float_tests {
                    })
                }

-                fn horizontal_sum<const LANES: usize>() {
+                fn reduce_sum<const LANES: usize>() {
                    test_helpers::test_1(&|x| {
                        test_helpers::prop_assert_biteq! (
-                            Vector::<LANES>::from_array(x).horizontal_sum(),
+                            Vector::<LANES>::from_array(x).reduce_sum(),
                            x.iter().sum(),
                        );
                        Ok(())
                    });
                }

-                fn horizontal_product<const LANES: usize>() {
+                fn reduce_product<const LANES: usize>() {
                    test_helpers::test_1(&|x| {
                        test_helpers::prop_assert_biteq! (
-                            Vector::<LANES>::from_array(x).horizontal_product(),
+                            Vector::<LANES>::from_array(x).reduce_product(),
                            x.iter().product(),
                        );
                        Ok(())
                    });
                }

-                fn horizontal_max<const LANES: usize>() {
+                fn reduce_max<const LANES: usize>() {
                    test_helpers::test_1(&|x| {
-                        let vmax = Vector::<LANES>::from_array(x).horizontal_max();
+                        let vmax = Vector::<LANES>::from_array(x).reduce_max();
                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
                        // 0 and -0 are treated the same
                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
@ -531,9 +560,9 @@ macro_rules! impl_float_tests {
                    });
                }

-                fn horizontal_min<const LANES: usize>() {
+                fn reduce_min<const LANES: usize>() {
                    test_helpers::test_1(&|x| {
-                        let vmax = Vector::<LANES>::from_array(x).horizontal_min();
+                        let vmax = Vector::<LANES>::from_array(x).reduce_min();
                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
                        // 0 and -0 are treated the same
                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
--- a/crates/core_simd/tests/round.rs
+++ b/crates/core_simd/tests/round.rs
@ -9,7 +9,6 @@ macro_rules! float_rounding_test {
            type Scalar = $scalar;
            type IntScalar = $int_scalar;

-            #[cfg(feature = "std")]
            test_helpers::test_lanes! {
                fn ceil<const LANES: usize>() {
                    test_helpers::test_unary_elementwise(
--- a/crates/std_float/Cargo.toml
+++ b/crates/std_float/Cargo.toml
@ -6,7 +6,7 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-core_simd = { path = "../core_simd" }
+core_simd = { path = "../core_simd", default-features = false }

 [features]
 default = ["as_crate"]
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@ -77,11 +77,21 @@ impl<T: core::fmt::Debug + DefaultStrategy, const LANES: usize> DefaultStrategy
    }
 }

+#[cfg(not(miri))]
+fn make_runner() -> proptest::test_runner::TestRunner {
+    Default::default()
+}
+#[cfg(miri)]
+fn make_runner() -> proptest::test_runner::TestRunner {
+    // Only run a few tests on Miri
+    proptest::test_runner::TestRunner::new(proptest::test_runner::Config::with_cases(4))
+}
+
 /// Test a function that takes a single value.
 pub fn test_1<A: core::fmt::Debug + DefaultStrategy>(
    f: &dyn Fn(A) -> proptest::test_runner::TestCaseResult,
 ) {
-    let mut runner = proptest::test_runner::TestRunner::default();
+    let mut runner = make_runner();
    runner.run(&A::default_strategy(), f).unwrap();
 }

@ -89,7 +99,7 @@ pub fn test_1<A: core::fmt::Debug + DefaultStrategy>(
 pub fn test_2<A: core::fmt::Debug + DefaultStrategy, B: core::fmt::Debug + DefaultStrategy>(
    f: &dyn Fn(A, B) -> proptest::test_runner::TestCaseResult,
 ) {
-    let mut runner = proptest::test_runner::TestRunner::default();
+    let mut runner = make_runner();
    runner
        .run(&(A::default_strategy(), B::default_strategy()), |(a, b)| {
            f(a, b)
@ -105,7 +115,7 @@ pub fn test_3<
 >(
    f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult,
 ) {
-    let mut runner = proptest::test_runner::TestRunner::default();
+    let mut runner = make_runner();
    runner
        .run(
            &(
@ -361,24 +371,28 @@ macro_rules! test_lanes {

                #[test]
                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
+                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                fn lanes_8() {
                    implementation::<8>();
                }

                #[test]
                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
+                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                fn lanes_16() {
                    implementation::<16>();
                }

                #[test]
                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
+                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                fn lanes_32() {
                    implementation::<32>();
                }

                #[test]
                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
+                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                fn lanes_64() {
                    implementation::<64>();
                }