From 11c43c0c160539b6d040539b668e0142769537a5 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 21 Jul 2023 15:33:09 -0400
Subject: [PATCH 01/59] Fix is_subnormal on architectures that flush subnormals
 to zero

---
 crates/core_simd/src/elements/float.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
index 501c1c5ddd3..d700011ff9c 100644
--- a/crates/core_simd/src/elements/float.rs
+++ b/crates/core_simd/src/elements/float.rs
@@ -336,7 +336,10 @@ fn is_finite(self) -> Self::Mask {
 
             #[inline]
             fn is_subnormal(self) -> Self::Mask {
-                self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
+                // On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero,
+                // so this comparison must be done with integers.
+                let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits());
+                not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
             }
 
             #[inline]

From dc0ba7836528b7d8720b868e331a378ceaf8fa95 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 21 Jul 2023 15:44:15 -0400
Subject: [PATCH 02/59] Don't require strict equality when subnormals are
 flushed

---
 crates/test_helpers/Cargo.toml   |  7 +++----
 crates/test_helpers/src/biteq.rs |  2 ++
 crates/test_helpers/src/lib.rs   | 13 +++++++++++++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml
index 1d2bc8b519a..bd481db6bac 100644
--- a/crates/test_helpers/Cargo.toml
+++ b/crates/test_helpers/Cargo.toml
@@ -4,10 +4,9 @@ version = "0.1.0"
 edition = "2021"
 publish = false
 
-[dependencies.proptest]
-version = "0.10"
-default-features = false
-features = ["alloc"]
+[dependencies]
+float_eq = "1.0"
+proptest = { version = "0.10", default-features = false, features = ["alloc"] }
 
 [features]
 all_lane_counts = []
diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs
index 7d91260d838..515eaf1c048 100644
--- a/crates/test_helpers/src/biteq.rs
+++ b/crates/test_helpers/src/biteq.rs
@@ -40,6 +40,8 @@ impl BitEq for $type {
             fn biteq(&self, other: &Self) -> bool {
                 if self.is_nan() && other.is_nan() {
                     true // exact nan bits don't matter
+                } else if crate::flush_subnormals::<Self>() {
+                    self.to_bits() == other.to_bits() || float_eq::float_eq!(self, other, abs <= 2. * <$type>::EPSILON)
                 } else {
                     self.to_bits() == other.to_bits()
                 }
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index b26cdc311a2..1b98bccf706 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -6,6 +6,19 @@
 #[macro_use]
 pub mod biteq;
 
+/// Indicates if subnormal floats are flushed to zero.
+pub fn flush_subnormals<T>() -> bool {
+    let is_f32 = core::mem::size_of::<T>() == 4;
+    let ppc_flush = is_f32
+        && cfg!(all(
+            target_arch = "powerpc64",
+            target_endian = "big",
+            not(target_feature = "vsx")
+        ));
+    let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon"));
+    ppc_flush || arm_flush
+}
+
 /// Specifies the default strategy for testing a type.
 ///
 /// This strategy should be what "makes sense" to test.

From 38c7ba09dd6c81ff76477a7e54b561c07f1d1db0 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 00:41:21 -0400
Subject: [PATCH 03/59] Handle subnormal numbers exactly

---
 crates/core_simd/tests/ops_macros.rs  |  23 ++++--
 crates/test_helpers/Cargo.toml        |   1 -
 crates/test_helpers/src/biteq.rs      |  34 +++++++-
 crates/test_helpers/src/lib.rs        | 108 ++++++++++++++++++++++----
 crates/test_helpers/src/subnormals.rs |  39 ++++++++++
 5 files changed, 181 insertions(+), 24 deletions(-)
 create mode 100644 crates/test_helpers/src/subnormals.rs

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 3a02f3f01e1..1f5d9488ad0 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test {
     { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
         test_helpers::test_lanes! {
             fn $fn<const LANES: usize>() {
-                test_helpers::test_unary_elementwise(
+                test_helpers::test_unary_elementwise_flush_subnormals(
                     &<core_simd::simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                     &$scalar_fn,
                     &|_| true,
@@ -31,7 +31,7 @@ mod $fn {
 
             test_helpers::test_lanes! {
                 fn normal<const LANES: usize>() {
-                    test_helpers::test_binary_elementwise(
+                    test_helpers::test_binary_elementwise_flush_subnormals(
                         &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                         &$scalar_fn,
                         &|_, _| true,
@@ -39,7 +39,7 @@ fn normal<const LANES: usize>() {
                 }
 
                 fn assign<const LANES: usize>() {
-                    test_helpers::test_binary_elementwise(
+                    test_helpers::test_binary_elementwise_flush_subnormals(
                         &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
                         &$scalar_fn,
                         &|_, _| true,
@@ -433,7 +433,7 @@ fn recip<const LANES: usize>() {
                 }
 
                 fn to_degrees<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::to_degrees,
                         &Scalar::to_degrees,
                         &|_| true,
@@ -441,7 +441,7 @@ fn to_degrees<const LANES: usize>() {
                 }
 
                 fn to_radians<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::to_radians,
                         &Scalar::to_radians,
                         &|_| true,
@@ -512,6 +512,7 @@ fn simd_max<const LANES: usize>() {
 
                 fn simd_clamp<const LANES: usize>() {
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+                        use test_helpers::subnormals::FlushSubnormals;
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {
                             if max < min {
                                 core::mem::swap(min, max);
@@ -528,8 +529,18 @@ fn simd_clamp<const LANES: usize>() {
                         for i in 0..LANES {
                             result_scalar[i] = value[i].clamp(min[i], max[i]);
                         }
+                        let mut result_scalar_flush = [Scalar::default(); LANES];
+                        for i in 0..LANES {
+                            result_scalar_flush[i] = value[i];
+                            if FlushSubnormals::flush(value[i]) < FlushSubnormals::flush(min[i]) {
+                                result_scalar_flush[i] = min[i];
+                            }
+                            if FlushSubnormals::flush(value[i]) > FlushSubnormals::flush(max[i]) {
+                                result_scalar_flush[i] = max[i];
+                            }
+                        }
                         let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
-                        test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+                        test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush);
                         Ok(())
                     })
                 }
diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml
index bd481db6bac..23dae7c9338 100644
--- a/crates/test_helpers/Cargo.toml
+++ b/crates/test_helpers/Cargo.toml
@@ -5,7 +5,6 @@ edition = "2021"
 publish = false
 
 [dependencies]
-float_eq = "1.0"
 proptest = { version = "0.10", default-features = false, features = ["alloc"] }
 
 [features]
diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs
index 515eaf1c048..cbc20cda0d6 100644
--- a/crates/test_helpers/src/biteq.rs
+++ b/crates/test_helpers/src/biteq.rs
@@ -40,8 +40,6 @@ impl BitEq for $type {
             fn biteq(&self, other: &Self) -> bool {
                 if self.is_nan() && other.is_nan() {
                     true // exact nan bits don't matter
-                } else if crate::flush_subnormals::<Self>() {
-                    self.to_bits() == other.to_bits() || float_eq::float_eq!(self, other, abs <= 2. * <$type>::EPSILON)
                 } else {
                     self.to_bits() == other.to_bits()
                 }
@@ -115,6 +113,27 @@ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
     }
 }
 
+#[doc(hidden)]
+pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T);
+
+impl<T: BitEq> PartialEq<BitEqEitherWrapper<'_, T>> for BitEqWrapper<'_, T> {
+    fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool {
+        self.0.biteq(other.0) || self.0.biteq(other.1)
+    }
+}
+
+impl<T: BitEq> core::fmt::Debug for BitEqEitherWrapper<'_, T> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        if self.0.biteq(self.1) {
+            self.0.fmt(f)
+        } else {
+            self.0.fmt(f)?;
+            write!(f, " or ")?;
+            self.1.fmt(f)
+        }
+    }
+}
+
 #[macro_export]
 macro_rules! prop_assert_biteq {
     { $a:expr, $b:expr $(,)? } => {
@@ -124,5 +143,14 @@ macro_rules! prop_assert_biteq {
             let b = $b;
             proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b));
         }
-    }
+    };
+    { $a:expr, $b:expr, $c:expr $(,)? } => {
+        {
+            use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper};
+            let a = $a;
+            let b = $b;
+            let c = $c;
+            proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c));
+        }
+    };
 }
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index 1b98bccf706..63308a2ca33 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -6,18 +6,8 @@
 #[macro_use]
 pub mod biteq;
 
-/// Indicates if subnormal floats are flushed to zero.
-pub fn flush_subnormals<T>() -> bool {
-    let is_f32 = core::mem::size_of::<T>() == 4;
-    let ppc_flush = is_f32
-        && cfg!(all(
-            target_arch = "powerpc64",
-            target_endian = "big",
-            not(target_feature = "vsx")
-        ));
-    let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon"));
-    ppc_flush || arm_flush
-}
+pub mod subnormals;
+use subnormals::FlushSubnormals;
 
 /// Specifies the default strategy for testing a type.
 ///
@@ -164,7 +154,6 @@ pub fn test_3<
 }
 
 /// Test a unary vector function against a unary scalar function, applied elementwise.
-#[inline(never)]
 pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const LANES: usize>(
     fv: &dyn Fn(Vector) -> VectorResult,
     fs: &dyn Fn(Scalar) -> ScalarResult,
@@ -190,6 +179,48 @@ pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const
     });
 }
 
+/// Test a unary vector function against a unary scalar function, applied elementwise.
+///
+/// Where subnormals are flushed, use approximate equality.
+pub fn test_unary_elementwise_flush_subnormals<
+    Scalar,
+    ScalarResult,
+    Vector,
+    VectorResult,
+    const LANES: usize,
+>(
+    fv: &dyn Fn(Vector) -> VectorResult,
+    fs: &dyn Fn(Scalar) -> ScalarResult,
+    check: &dyn Fn([Scalar; LANES]) -> bool,
+) where
+    Scalar: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy,
+    VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+    let flush = |x: Scalar| FlushSubnormals::flush(fs(FlushSubnormals::flush(x)));
+    test_1(&|x: [Scalar; LANES]| {
+        proptest::prop_assume!(check(x));
+        let result_v: [ScalarResult; LANES] = fv(x.into()).into();
+        let result_s: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .map(fs)
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        let result_sf: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .map(flush)
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_biteq!(result_v, result_s, result_sf);
+        Ok(())
+    });
+}
+
 /// Test a unary vector function against a unary scalar function, applied elementwise.
 #[inline(never)]
 pub fn test_unary_mask_elementwise<Scalar, Vector, Mask, const LANES: usize>(
@@ -217,7 +248,6 @@ pub fn test_unary_mask_elementwise<Scalar, Vector, Mask, const LANES: usize>(
 }
 
 /// Test a binary vector function against a binary scalar function, applied elementwise.
-#[inline(never)]
 pub fn test_binary_elementwise<
     Scalar1,
     Scalar2,
@@ -254,6 +284,56 @@ pub fn test_binary_elementwise<
     });
 }
 
+/// Test a binary vector function against a binary scalar function, applied elementwise.
+///
+/// Where subnormals are flushed, use approximate equality.
+pub fn test_binary_elementwise_flush_subnormals<
+    Scalar1,
+    Scalar2,
+    ScalarResult,
+    Vector1,
+    Vector2,
+    VectorResult,
+    const LANES: usize,
+>(
+    fv: &dyn Fn(Vector1, Vector2) -> VectorResult,
+    fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult,
+    check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
+) where
+    Scalar1: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    Scalar2: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+    Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+    VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+    let flush = |x: Scalar1, y: Scalar2| {
+        FlushSubnormals::flush(fs(FlushSubnormals::flush(x), FlushSubnormals::flush(y)))
+    };
+    test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
+        proptest::prop_assume!(check(x, y));
+        let result_v: [ScalarResult; LANES] = fv(x.into(), y.into()).into();
+        let result_s: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .zip(y.iter().copied())
+            .map(|(x, y)| fs(x, y))
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        let result_sf: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .zip(y.iter().copied())
+            .map(|(x, y)| flush(x, y))
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_biteq!(result_v, result_s, result_sf);
+        Ok(())
+    });
+}
+
 /// Test a binary vector-scalar function against a binary scalar function, applied elementwise.
 #[inline(never)]
 pub fn test_binary_scalar_rhs_elementwise<
diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs
new file mode 100644
index 00000000000..122304f96db
--- /dev/null
+++ b/crates/test_helpers/src/subnormals.rs
@@ -0,0 +1,39 @@
+pub trait FlushSubnormals: Sized {
+    fn flush(self) -> Self {
+        self
+    }
+}
+
+impl<T> FlushSubnormals for *const T {}
+impl<T> FlushSubnormals for *mut T {}
+
+macro_rules! impl_float {
+    { $($ty:ty),* } => {
+        $(
+        impl FlushSubnormals for $ty {
+            fn flush(self) -> Self {
+                let is_f32 = core::mem::size_of::<Self>() == 4;
+                let ppc_flush = is_f32 && cfg!(all(target_arch = "powerpc64", target_endian = "big", not(target_feature = "vsx")));
+                let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon"));
+                let flush = ppc_flush || arm_flush;
+                if flush && self.is_subnormal() {
+                    <$ty>::copysign(0., self)
+                } else {
+                    self
+                }
+            }
+        }
+        )*
+    }
+}
+
+macro_rules! impl_else {
+    { $($ty:ty),* } => {
+        $(
+        impl FlushSubnormals for $ty {}
+        )*
+    }
+}
+
+impl_float! { f32, f64 }
+impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }

From 8c89a7240c668b92cbd66d8afc1d939ae45141c4 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 00:44:48 -0400
Subject: [PATCH 04/59] Enable more cross tests

---
 .github/workflows/ci.yml | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1ff377fce34..5ae654bef3b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -171,36 +171,19 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      # TODO: Sadly, we cant configure target-feature in a meaningful way
-      # because `cross` doesn't tell qemu to enable any non-default cpu
-      # features, nor does it give us a way to do so.
-      #
-      # Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
-      # This is a lot more complex... but in practice it's likely that we can just
-      # snarf the docker config from around [here][1000-dockerfiles].
-      #
-      # [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
-      # [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker
 
       matrix:
         target:
-          - i586-unknown-linux-gnu
-          # 32-bit arm has a few idiosyncracies like having subnormal flushing
-          # to zero on by default. Ideally we'd set
           - armv7-unknown-linux-gnueabihf
           - aarch64-unknown-linux-gnu
-          # Note: The issue above means neither of these mips targets will use
-          # MSA (mips simd) but MIPS uses a nonstandard binary representation
-          # for NaNs which makes it worth testing on despite that.
+          - powerpc-unknown-linux-gnu
+          - powerpc64-unknown-linux-gnu
+          - powerpc64le-unknown-linux-gnu
+          - riscv64gc-unknown-linux-gnu
+          # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
           # - mips-unknown-linux-gnu
           # - mips64-unknown-linux-gnuabi64
-          - riscv64gc-unknown-linux-gnu
-          # TODO this test works, but it appears to time out
-          # - powerpc-unknown-linux-gnu
-          # TODO this test is broken, but it appears to be a problem with QEMU, not us.
-          # - powerpc64le-unknown-linux-gnu
-          # TODO enable this once a new version of cross is released
-          # - powerpc64-unknown-linux-gnu
+        target_feature: ["", "+native"]
 
     steps:
       - uses: actions/checkout@v2
@@ -217,11 +200,14 @@ jobs:
         # being part of the tarball means we can't just use the download/latest
         # URL :(
         run: |
-          CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
+          CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
           mkdir -p "$HOME/.bin"
           curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
           echo "$HOME/.bin" >> $GITHUB_PATH
 
+      - name: Configure RUSTFLAGS
+        run: echo "-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
+
       - name: Test (debug)
         run: cross test --verbose --target=${{ matrix.target }}
 

From 36c8bf363fa8f5afe6e56641ae7fd4ae65bb1deb Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 08:55:55 -0400
Subject: [PATCH 05/59] Improve cross tests

---
 .github/workflows/ci.yml | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5ae654bef3b..7758409a92d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -167,7 +167,7 @@ jobs:
             RUSTFLAGS: ${{ matrix.rustflags }}
 
   cross-tests:
-    name: "${{ matrix.target }} (via cross)"
+    name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -175,6 +175,7 @@ jobs:
       matrix:
         target:
           - armv7-unknown-linux-gnueabihf
+          - thumbv7neon-unknown-linux-gnueabihf
           - aarch64-unknown-linux-gnu
           - powerpc-unknown-linux-gnu
           - powerpc64-unknown-linux-gnu
@@ -183,7 +184,11 @@ jobs:
           # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
           # - mips-unknown-linux-gnu
           # - mips64-unknown-linux-gnuabi64
-        target_feature: ["", "+native"]
+        target_feature: "default"
+        include:
+          - { target: powerpc64-unknown-linux-gnu, target_feature: "native" }
+          - { target: powerpc64le-unknown-linux-gnu, target_feature: "native" }
+          - { target: riscv64gc-unknown-linux-gnu, target_feature: "native" }
 
     steps:
       - uses: actions/checkout@v2
@@ -206,7 +211,18 @@ jobs:
           echo "$HOME/.bin" >> $GITHUB_PATH
 
       - name: Configure RUSTFLAGS
-        run: echo "-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
+        shell: bash
+        run: |
+          case "${{ matrix.target_feature }}" in
+            default)
+              echo "RUSTFLAGS=" >> $GITHUB_ENV;;
+            native)
+              echo "RUSTFLAGS=-Ctarget-cpu=native" >> $GITHUB_ENV
+              ;;
+            *)
+              echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
+              ;;
+          esac
 
       - name: Test (debug)
         run: cross test --verbose --target=${{ matrix.target }}

From 40f04353d8fa40484a1df8ca15091b07413b5655 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 08:58:37 -0400
Subject: [PATCH 06/59] Fix workflow

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7758409a92d..73d38b41a38 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -184,7 +184,7 @@ jobs:
           # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
           # - mips-unknown-linux-gnu
           # - mips64-unknown-linux-gnuabi64
-        target_feature: "default"
+        target_feature: [default]
         include:
           - { target: powerpc64-unknown-linux-gnu, target_feature: "native" }
           - { target: powerpc64le-unknown-linux-gnu, target_feature: "native" }

From 415b50f6715c7967fc83c5a05dd6917a344f0b5f Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 09:16:42 -0400
Subject: [PATCH 07/59] Flush subnormals in f32::fract test

---
 crates/core_simd/tests/round.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs
index aacf7bd3bcc..191c39e2370 100644
--- a/crates/core_simd/tests/round.rs
+++ b/crates/core_simd/tests/round.rs
@@ -43,7 +43,7 @@ fn trunc<const LANES: usize>() {
                 }
 
                 fn fract<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::fract,
                         &Scalar::fract,
                         &|_| true,

From 1948b02e40293c56e9498a5e553d29d48fcc3836 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 09:35:18 -0400
Subject: [PATCH 08/59] Don't use native cpu in cross

---
 .github/workflows/ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 73d38b41a38..d36cad638af 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -186,9 +186,9 @@ jobs:
           # - mips64-unknown-linux-gnuabi64
         target_feature: [default]
         include:
-          - { target: powerpc64-unknown-linux-gnu, target_feature: "native" }
-          - { target: powerpc64le-unknown-linux-gnu, target_feature: "native" }
-          - { target: riscv64gc-unknown-linux-gnu, target_feature: "native" }
+          - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
+          - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
+          - { target: riscv64gc-unknown-linux-gnu, target_feature: "+zvl128b" }
 
     steps:
       - uses: actions/checkout@v2

From 94f20143a9d6f69f5472565c6694d05277112ad3 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 09:48:26 -0400
Subject: [PATCH 09/59] Enable v extension on riscv

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d36cad638af..89c355e1190 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -188,7 +188,7 @@ jobs:
         include:
           - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
           - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
-          - { target: riscv64gc-unknown-linux-gnu, target_feature: "+zvl128b" }
+          - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 
     steps:
       - uses: actions/checkout@v2

From 5c6405ba8956d5a0252789b7152fb0c5b684b67f Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 10:24:32 -0400
Subject: [PATCH 10/59] Disable riscv v extension

---
 .github/workflows/ci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 89c355e1190..c0429a1332c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -188,7 +188,8 @@ jobs:
         include:
           - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
           - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
-          - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
+          # We should test this, but cross currently can't run it
+          # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 
     steps:
       - uses: actions/checkout@v2

From 49e92a2918f9f3d95ad8a0060a2d63d6f7b52950 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 14:18:16 -0400
Subject: [PATCH 11/59] Improve powerpc subnormal flushing check

---
 .github/workflows/ci.yml              | 4 +---
 crates/test_helpers/src/subnormals.rs | 6 +++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c0429a1332c..42172968341 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -186,6 +186,7 @@ jobs:
           # - mips64-unknown-linux-gnuabi64
         target_feature: [default]
         include:
+          - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
           - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
           - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
           # We should test this, but cross currently can't run it
@@ -217,9 +218,6 @@ jobs:
           case "${{ matrix.target_feature }}" in
             default)
               echo "RUSTFLAGS=" >> $GITHUB_ENV;;
-            native)
-              echo "RUSTFLAGS=-Ctarget-cpu=native" >> $GITHUB_ENV
-              ;;
             *)
               echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
               ;;
diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs
index 122304f96db..d46e8524116 100644
--- a/crates/test_helpers/src/subnormals.rs
+++ b/crates/test_helpers/src/subnormals.rs
@@ -13,7 +13,11 @@ macro_rules! impl_float {
         impl FlushSubnormals for $ty {
             fn flush(self) -> Self {
                 let is_f32 = core::mem::size_of::<Self>() == 4;
-                let ppc_flush = is_f32 && cfg!(all(target_arch = "powerpc64", target_endian = "big", not(target_feature = "vsx")));
+                let ppc_flush = is_f32 && cfg!(all(
+                    any(target_arch = "powerpc", all(target_arch = "powerpc64", target_endian = "big")),
+                    target_feature = "altivec",
+                    not(target_feature = "vsx"),
+                ));
                 let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon"));
                 let flush = ppc_flush || arm_flush;
                 if flush && self.is_subnormal() {

From e73d02929abe6c24b1223a007333d7799e50bb57 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 16:02:00 -0400
Subject: [PATCH 12/59] Specify emulated CPUs

---
 .github/workflows/ci.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 42172968341..42e2ba55c28 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -187,8 +187,8 @@ jobs:
         target_feature: [default]
         include:
           - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
-          - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
-          - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
+          - { target: powerpc64-unknown-linux-gnu, target_feature: "+power10-vector" }
+          - { target: powerpc64le-unknown-linux-gnu, target_feature: "+power10-vector" }
           # We should test this, but cross currently can't run it
           # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 
@@ -212,6 +212,12 @@ jobs:
           curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
           echo "$HOME/.bin" >> $GITHUB_PATH
 
+      - name: Configure Emulated CPUs
+        run: |
+          echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
+          echo "CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64 -cpu power10" >> $GITHUB_ENV
+          echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
+
       - name: Configure RUSTFLAGS
         shell: bash
         run: |

From d07ce3cef9223c918ae2381fd46ff53ce0cf38b4 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 17:56:21 -0400
Subject: [PATCH 13/59] Account for possible qemu bug

---
 crates/test_helpers/src/lib.rs        |  6 +++--
 crates/test_helpers/src/subnormals.rs | 35 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index 63308a2ca33..d032ded576d 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(stdsimd, powerpc_target_feature)]
+
 pub mod array;
 
 #[cfg(target_arch = "wasm32")]
@@ -198,7 +200,7 @@ pub fn test_unary_elementwise_flush_subnormals<
     Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy,
     VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
 {
-    let flush = |x: Scalar| FlushSubnormals::flush(fs(FlushSubnormals::flush(x)));
+    let flush = |x: Scalar| subnormals::flush(fs(subnormals::flush_in(x)));
     test_1(&|x: [Scalar; LANES]| {
         proptest::prop_assume!(check(x));
         let result_v: [ScalarResult; LANES] = fv(x.into()).into();
@@ -308,7 +310,7 @@ pub fn test_binary_elementwise_flush_subnormals<
     VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
 {
     let flush = |x: Scalar1, y: Scalar2| {
-        FlushSubnormals::flush(fs(FlushSubnormals::flush(x), FlushSubnormals::flush(y)))
+        subnormals::flush(fs(subnormals::flush_in(x), subnormals::flush_in(y)))
     };
     test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
         proptest::prop_assume!(check(x, y));
diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs
index d46e8524116..585b80bb6c7 100644
--- a/crates/test_helpers/src/subnormals.rs
+++ b/crates/test_helpers/src/subnormals.rs
@@ -41,3 +41,38 @@ impl FlushSubnormals for $ty {}
 
 impl_float! { f32, f64 }
 impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }
+
+/// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs.
+/// https://gitlab.com/qemu-project/qemu/-/issues/1779
+#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))]
+fn in_buggy_qemu() -> bool {
+    use std::sync::OnceLock;
+    static BUGGY: OnceLock<bool> = OnceLock::new();
+
+    fn add(x: f32, y: f32) -> f32 {
+        use core::arch::powerpc::*;
+        let array: [f32; 4] =
+            unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) };
+        array[0]
+    }
+
+    *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative())
+}
+
+#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))]
+pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
+    if in_buggy_qemu() {
+        x
+    } else {
+        x.flush()
+    }
+}
+
+#[cfg(not(all(target_arch = "powerpc", target_feature = "altivec")))]
+pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
+    x.flush()
+}
+
+pub fn flush<T: FlushSubnormals>(x: T) -> T {
+    x.flush()
+}

From ad747af0bb3f2249ec2a87b9f25f22b238dc700a Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 18:33:33 -0400
Subject: [PATCH 14/59] Apply workaround to ppc64

---
 crates/test_helpers/src/subnormals.rs | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs
index 585b80bb6c7..ec0f1fb24b9 100644
--- a/crates/test_helpers/src/subnormals.rs
+++ b/crates/test_helpers/src/subnormals.rs
@@ -44,13 +44,20 @@ impl FlushSubnormals for $ty {}
 
 /// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs.
 /// https://gitlab.com/qemu-project/qemu/-/issues/1779
-#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))]
+#[cfg(all(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    target_feature = "altivec"
+))]
 fn in_buggy_qemu() -> bool {
     use std::sync::OnceLock;
     static BUGGY: OnceLock<bool> = OnceLock::new();
 
     fn add(x: f32, y: f32) -> f32 {
+        #[cfg(target_arch = "powerpc")]
         use core::arch::powerpc::*;
+        #[cfg(target_arch = "powerpc64")]
+        use core::arch::powerpc64::*;
+
         let array: [f32; 4] =
             unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) };
         array[0]
@@ -59,7 +66,10 @@ fn add(x: f32, y: f32) -> f32 {
     *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative())
 }
 
-#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))]
+#[cfg(all(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    target_feature = "altivec"
+))]
 pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
     if in_buggy_qemu() {
         x
@@ -68,7 +78,10 @@ pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
     }
 }
 
-#[cfg(not(all(target_arch = "powerpc", target_feature = "altivec")))]
+#[cfg(not(all(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    target_feature = "altivec"
+)))]
 pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
     x.flush()
 }

From ca12492584e77fbb2d982942c9cf2843b8f99487 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 18:35:04 -0400
Subject: [PATCH 15/59] Revert some CI changes

---
 .github/workflows/ci.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 42e2ba55c28..4dd334a1344 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -187,8 +187,8 @@ jobs:
         target_feature: [default]
         include:
           - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
-          - { target: powerpc64-unknown-linux-gnu, target_feature: "+power10-vector" }
-          - { target: powerpc64le-unknown-linux-gnu, target_feature: "+power10-vector" }
+          - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
+          - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
           # We should test this, but cross currently can't run it
           # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 
@@ -215,8 +215,7 @@ jobs:
       - name: Configure Emulated CPUs
         run: |
           echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
-          echo "CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64 -cpu power10" >> $GITHUB_ENV
-          echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
+          # echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
 
       - name: Configure RUSTFLAGS
         shell: bash

From 52d6397da7d75deb32efa6636839ed4bcc6b2fdc Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 19:22:30 -0400
Subject: [PATCH 16/59] Flush subnormals in reduce tests

---
 crates/core_simd/tests/ops_macros.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 1f5d9488ad0..22265b8cf86 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -96,9 +96,11 @@ macro_rules! impl_common_integer_tests {
         test_helpers::test_lanes! {
             fn reduce_sum<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
+                    use test_helpers::subnormals::{flush, flush_in};
                     test_helpers::prop_assert_biteq! (
                         $vector::<LANES>::from_array(x).reduce_sum(),
                         x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+                        flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)),
                     );
                     Ok(())
                 });
@@ -106,9 +108,11 @@ fn reduce_sum<const LANES: usize>() {
 
             fn reduce_product<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
+                    use test_helpers::subnormals::{flush, flush_in};
                     test_helpers::prop_assert_biteq! (
                         $vector::<LANES>::from_array(x).reduce_product(),
                         x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+                        flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)),
                     );
                     Ok(())
                 });

From bd4e6616f36a47f74059803fbfd0b8ddeff1b46f Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 20:50:49 -0400
Subject: [PATCH 17/59] Adjust clamp test

---
 crates/core_simd/tests/ops_macros.rs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 22265b8cf86..7e705892b53 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -516,7 +516,7 @@ fn simd_max<const LANES: usize>() {
 
                 fn simd_clamp<const LANES: usize>() {
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
-                        use test_helpers::subnormals::FlushSubnormals;
+                        use test_helpers::subnormals::flush_in;
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {
                             if max < min {
                                 core::mem::swap(min, max);
@@ -535,13 +535,14 @@ fn simd_clamp<const LANES: usize>() {
                         }
                         let mut result_scalar_flush = [Scalar::default(); LANES];
                         for i in 0..LANES {
-                            result_scalar_flush[i] = value[i];
-                            if FlushSubnormals::flush(value[i]) < FlushSubnormals::flush(min[i]) {
-                                result_scalar_flush[i] = min[i];
+                            let mut value = flush_in(value[i]);
+                            if value < flush_in(min[i]) {
+                                value = min[i];
                             }
-                            if FlushSubnormals::flush(value[i]) > FlushSubnormals::flush(max[i]) {
-                                result_scalar_flush[i] = max[i];
+                            if value > flush_in(max[i]) {
+                                value = max[i];
                             }
+                            result_scalar_flush[i] = value
                         }
                         let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
                         test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush);

From 616cb6e9e6de58dd9409d815078451ee550fb946 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 23:00:51 -0400
Subject: [PATCH 18/59] Disable simd_clamp test on ppc64

---
 crates/core_simd/tests/ops_macros.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 7e705892b53..a6d67986728 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -514,6 +514,8 @@ fn simd_max<const LANES: usize>() {
                     assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.));
                 }
 
+                #[cfg(not(all(target_arch = "powerpc64", target_feature = "vsx")))]
+                // https://gitlab.com/qemu-project/qemu/-/issues/1780
                 fn simd_clamp<const LANES: usize>() {
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
                         use test_helpers::subnormals::flush_in;

From fd712fe3f39470160baf95ff8eb6fb00ebf31453 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 22 Jul 2023 23:38:20 -0400
Subject: [PATCH 19/59] Fix test skip

---
 crates/core_simd/tests/ops_macros.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index a6d67986728..7b309df22cb 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -514,9 +514,11 @@ fn simd_max<const LANES: usize>() {
                     assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.));
                 }
 
-                #[cfg(not(all(target_arch = "powerpc64", target_feature = "vsx")))]
-                // https://gitlab.com/qemu-project/qemu/-/issues/1780
                 fn simd_clamp<const LANES: usize>() {
+                    if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) {
+                        // https://gitlab.com/qemu-project/qemu/-/issues/1780
+                        return;
+                    }
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
                         use test_helpers::subnormals::flush_in;
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {

From dbcbc3e4c50e4127e7034aef962f4241143e6c79 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 27 Jul 2023 00:07:28 -0400
Subject: [PATCH 20/59] Disable misbehaving targets

---
 .github/workflows/ci.yml | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4dd334a1344..ca1ab996a7b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -175,20 +175,24 @@ jobs:
       matrix:
         target:
           - armv7-unknown-linux-gnueabihf
-          - thumbv7neon-unknown-linux-gnueabihf
-          - aarch64-unknown-linux-gnu
+          - thumbv7neon-unknown-linux-gnueabihf # includes neon by default
+          - aarch64-unknown-linux-gnu           # includes neon by default
           - powerpc-unknown-linux-gnu
-          - powerpc64-unknown-linux-gnu
-          - powerpc64le-unknown-linux-gnu
+          - powerpc64le-unknown-linux-gnu       # includes altivec by default
           - riscv64gc-unknown-linux-gnu
           # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
+          # non-nightly since https://github.com/rust-lang/rust/pull/113274
           # - mips-unknown-linux-gnu
           # - mips64-unknown-linux-gnuabi64
+          # Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
+          # - powerpc64-unknown-linux-gnu
         target_feature: [default]
         include:
-          - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
           - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
           - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
+          # Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
+          # This target is somewhat redundant, since ppc64le has altivec as well.
+          # - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
           # We should test this, but cross currently can't run it
           # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 

From cb461aceb3da91c116f13cdc5a3574bdf028923a Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 27 Jul 2023 01:02:26 -0400
Subject: [PATCH 21/59] Only flush comparison in test

---
 crates/core_simd/tests/ops_macros.rs |  6 +++---
 crates/test_helpers/src/lib.rs       | 29 ++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 7b309df22cb..8386850cb90 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -539,11 +539,11 @@ fn simd_clamp<const LANES: usize>() {
                         }
                         let mut result_scalar_flush = [Scalar::default(); LANES];
                         for i in 0..LANES {
-                            let mut value = flush_in(value[i]);
-                            if value < flush_in(min[i]) {
+                            let mut value = value[i];
+                            if flush_in(value) < flush_in(min[i]) {
                                 value = min[i];
                             }
-                            if value > flush_in(max[i]) {
+                            if flush_in(value) > flush_in(max[i]) {
                                 value = max[i];
                             }
                             result_scalar_flush[i] = value
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index d032ded576d..b80c745aaf2 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -336,6 +336,35 @@ pub fn test_binary_elementwise_flush_subnormals<
     });
 }
 
+/// Test a unary vector function against a unary scalar function, applied elementwise.
+#[inline(never)]
+pub fn test_binary_mask_elementwise<Scalar1, Scalar2, Vector1, Vector2, Mask, const LANES: usize>(
+    fv: &dyn Fn(Vector1, Vector2) -> Mask,
+    fs: &dyn Fn(Scalar1, Scalar2) -> bool,
+    check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
+) where
+    Scalar1: Copy + core::fmt::Debug + DefaultStrategy,
+    Scalar2: Copy + core::fmt::Debug + DefaultStrategy,
+    Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+    Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+    Mask: Into<[bool; LANES]> + From<[bool; LANES]> + Copy,
+{
+    test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
+        proptest::prop_assume!(check(x, y));
+        let result_v: [bool; LANES] = fv(x.into(), y.into()).into();
+        let result_s: [bool; LANES] = x
+            .iter()
+            .copied()
+            .zip(y.iter().copied())
+            .map(|(x, y)| fs(x, y))
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_biteq!(result_v, result_s);
+        Ok(())
+    });
+}
+
 /// Test a binary vector-scalar function against a binary scalar function, applied elementwise.
 #[inline(never)]
 pub fn test_binary_scalar_rhs_elementwise<

From baa5791a453dccf5d09404e601cc6403c6a9cd3b Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 27 Jul 2023 01:03:43 -0400
Subject: [PATCH 22/59] Document odd test behavior

---
 crates/core_simd/tests/ops_macros.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 8386850cb90..f6ded66e9fc 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -539,6 +539,7 @@ fn simd_clamp<const LANES: usize>() {
                         }
                         let mut result_scalar_flush = [Scalar::default(); LANES];
                         for i in 0..LANES {
+                            // Comparisons flush-to-zero, but return value selection is _not_ flushed.
                             let mut value = value[i];
                             if flush_in(value) < flush_in(min[i]) {
                                 value = min[i];

From 5e5745318a7efdfd3f927102550ba4697c4f5863 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 27 Jul 2023 10:15:05 -0400
Subject: [PATCH 23/59] Disable big endian ppc64

---
 .github/workflows/ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ca1ab996a7b..ed1589be4f1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -188,7 +188,6 @@ jobs:
           # - powerpc64-unknown-linux-gnu
         target_feature: [default]
         include:
-          - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" }
           - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
           # Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
           # This target is somewhat redundant, since ppc64le has altivec as well.

From 6e8d21ee760d4672ed6c374c9be1687c531499fb Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 27 Jul 2023 13:21:56 -0400
Subject: [PATCH 24/59] Define portability

---
 crates/core_simd/src/core_simd_docs.md | 33 ++++++++++++++++++++++++++
 crates/core_simd/src/mod.rs            |  3 ++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md
index 15e8ed0253e..3d2e737e2af 100644
--- a/crates/core_simd/src/core_simd_docs.md
+++ b/crates/core_simd/src/core_simd_docs.md
@@ -2,3 +2,36 @@ Portable SIMD module.
 
 This module offers a portable abstraction for SIMD operations
 that is not bound to any particular hardware architecture.
+
+# What is "portable"?
+
+This module provides a SIMD implementation that is fast and predictable on any target.
+
+### Portable SIMD works on every target
+
+Unlike target-specific SIMD in `std::arch`, portable SIMD compiles for every target.
+In this regard, it is just like "regular" Rust.
+
+### Portable SIMD is consistent between targets
+
+A program using portable SIMD can expect identical behavior on any target.
+In most regards, [`Simd<T, N>`] can be thought of as a parallelized `[T; N]` and operates like a sequence of `T`.
+
+This has one notable exception: a handful of older architectures (e.g. `armv7` and `powerpc`) flush [subnormal](`f32::is_subnormal`) `f32` values to zero.
+On these architectures, subnormal `f32` input values are replaced with zeros, and any operation producing subnormal `f32` values produces zeros instead.
+This doesn't affect most architectures or programs.
+
+### Operations use the best instructions available
+
+Operations provided by this module compile to the best available SIMD instructions.
+
+Portable SIMD is not a low-level vendor library, and operations in portable SIMD _do not_ necessarily map to a single instruction.
+Instead, they map to a reasonable implementation of the operation for the target.
+
+Consistency between targets is not compromised to use faster or fewer instructions.
+In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent.
+For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`) can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`].
+When necessary, [`Simd<T, N>`] can be converted to the types provided by `std::arch` to make use of target-specific functions.
+
+Many targets simply don't have SIMD, or don't support SIMD for a particular element type.
+In those cases, regular scalar operations are generated instead.
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index f9891a3b7c1..dd954b7cc48 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -21,8 +21,9 @@
 mod vector;
 mod vendor;
 
-#[doc = include_str!("core_simd_docs.md")]
 pub mod simd {
+    #![doc = include_str!("core_simd_docs.md")]
+
     pub mod prelude;
 
     pub(crate) use crate::core_simd::intrinsics;

From 927139d1e711a9fd276632616feea393693258f4 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 27 Jul 2023 14:15:20 -0400
Subject: [PATCH 25/59] Add scalar shifts

---
 crates/core_simd/src/ops.rs              |  1 +
 crates/core_simd/src/ops/shift_scalar.rs | 58 ++++++++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 crates/core_simd/src/ops/shift_scalar.rs

diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index b007456cf2c..63a96106283 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -6,6 +6,7 @@
 
 mod assign;
 mod deref;
+mod shift_scalar;
 mod unary;
 
 impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
diff --git a/crates/core_simd/src/ops/shift_scalar.rs b/crates/core_simd/src/ops/shift_scalar.rs
new file mode 100644
index 00000000000..77aac656395
--- /dev/null
+++ b/crates/core_simd/src/ops/shift_scalar.rs
@@ -0,0 +1,58 @@
+// Shift operations uniquely typically only have a scalar on the right-hand side.
+// Here, we implement shifts for scalar RHS arguments.
+
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+
+macro_rules! impl_splatted_shifts {
+    { impl $trait:ident :: $trait_fn:ident for $ty:ty } => {
+        impl<const N: usize> core::ops::$trait<$ty> for Simd<$ty, N>
+        where
+            LaneCount<N>: SupportedLaneCount,
+        {
+            type Output = Self;
+            fn $trait_fn(self, rhs: $ty) -> Self::Output {
+                self.$trait_fn(Simd::splat(rhs))
+            }
+        }
+
+        impl<const N: usize> core::ops::$trait<&$ty> for Simd<$ty, N>
+        where
+            LaneCount<N>: SupportedLaneCount,
+        {
+            type Output = Self;
+            fn $trait_fn(self, rhs: &$ty) -> Self::Output {
+                self.$trait_fn(Simd::splat(*rhs))
+            }
+        }
+
+        impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N>
+        where
+            LaneCount<N>: SupportedLaneCount,
+        {
+            type Output = Simd<$ty, N>;
+            fn $trait_fn(self, rhs: $ty) -> Self::Output {
+                self.$trait_fn(Simd::splat(rhs))
+            }
+        }
+
+        impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N>
+        where
+            LaneCount<N>: SupportedLaneCount,
+        {
+            type Output = Simd<$ty, N>;
+            fn $trait_fn(self, rhs: &$ty) -> Self::Output {
+                self.$trait_fn(Simd::splat(*rhs))
+            }
+        }
+    };
+    { $($ty:ty),* } => {
+        $(
+        impl_splatted_shifts! { impl Shl::shl for $ty }
+        impl_splatted_shifts! { impl Shr::shr for $ty }
+        )*
+    }
+}
+
+// In the past there were inference issues when generically splatting arguments.
+// Enumerate them instead.
+impl_splatted_shifts! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }

From 5c97c0db2457872ef83a2b30c9d30f24963a1752 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 27 Jul 2023 16:26:22 -0400
Subject: [PATCH 26/59] Add wrapping negation

---
 crates/core_simd/src/elements/uint.rs | 16 ++++++++++++++--
 crates/core_simd/tests/ops_macros.rs  | 10 ++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs
index 3926c395ec9..c8bf24998dd 100644
--- a/crates/core_simd/src/elements/uint.rs
+++ b/crates/core_simd/src/elements/uint.rs
@@ -16,6 +16,12 @@ pub trait SimdUint: Copy + Sealed {
     #[must_use]
     fn cast<T: SimdCast>(self) -> Self::Cast<T>;
 
+    /// Wrapping negation.
+    ///
+    /// Like [`u32::wrapping_neg`], all applications of this function will wrap, with the exception
+    /// of `-0`.
+    fn wrapping_neg(self) -> Self;
+
     /// Lanewise saturating add.
     ///
     /// # Examples
@@ -74,7 +80,7 @@ pub trait SimdUint: Copy + Sealed {
 }
 
 macro_rules! impl_trait {
-    { $($ty:ty),* } => {
+    { $($ty:ident ($signed:ident)),* } => {
         $(
         impl<const LANES: usize> Sealed for Simd<$ty, LANES>
         where
@@ -95,6 +101,12 @@ fn cast<T: SimdCast>(self) -> Self::Cast<T> {
                 unsafe { intrinsics::simd_as(self) }
             }
 
+            #[inline]
+            fn wrapping_neg(self) -> Self {
+                use crate::simd::SimdInt;
+                (-self.cast::<$signed>()).cast()
+            }
+
             #[inline]
             fn saturating_add(self, second: Self) -> Self {
                 // Safety: `self` is a vector
@@ -153,4 +165,4 @@ fn reduce_xor(self) -> Self::Scalar {
     }
 }
 
-impl_trait! { u8, u16, u32, u64, usize }
+impl_trait! { u8 (i8), u16 (i16), u32 (i32), u64 (i64), usize (isize) }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 3a02f3f01e1..ee0d3ce2f5a 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -327,6 +327,16 @@ fn rem_zero_panic<const LANES: usize>() {
                 }
             }
 
+            test_helpers::test_lanes! {
+                fn wrapping_neg<const LANES: usize>() {
+                    test_helpers::test_unary_elementwise(
+                        &Vector::<LANES>::wrapping_neg,
+                        &Scalar::wrapping_neg,
+                        &|_| true,
+                    );
+                }
+            }
+
             impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
             impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
             impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);

From e51ee248c33d8c1662c61fcfcb8cf1843979007f Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 28 Jul 2023 21:26:14 -0400
Subject: [PATCH 27/59] Add tests

---
 crates/core_simd/tests/ops_macros.rs | 30 ++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 3a02f3f01e1..dfc0e1a3708 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -94,6 +94,36 @@ fn assign<const LANES: usize>() {
 macro_rules! impl_common_integer_tests {
     { $vector:ident, $scalar:ident } => {
         test_helpers::test_lanes! {
+            fn shr<const LANES: usize>() {
+                use core::ops::Shr;
+                let shr = |x: $scalar, y: $scalar| x.wrapping_shr(y as _);
+                test_helpers::test_binary_elementwise(
+                    &<$vector::<LANES> as Shr<$vector::<LANES>>>::shr,
+                    &shr,
+                    &|_, _| true,
+                );
+                test_helpers::test_binary_scalar_rhs_elementwise(
+                    &<$vector::<LANES> as Shr<$scalar>>::shr,
+                    &shr,
+                    &|_, _| true,
+                );
+            }
+
+            fn shl<const LANES: usize>() {
+                use core::ops::Shl;
+                let shl = |x: $scalar, y: $scalar| x.wrapping_shl(y as _);
+                test_helpers::test_binary_elementwise(
+                    &<$vector::<LANES> as Shl<$vector::<LANES>>>::shl,
+                    &shl,
+                    &|_, _| true,
+                );
+                test_helpers::test_binary_scalar_rhs_elementwise(
+                    &<$vector::<LANES> as Shl<$scalar>>::shl,
+                    &shl,
+                    &|_, _| true,
+                );
+            }
+
             fn reduce_sum<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
                     test_helpers::prop_assert_biteq! (

From 3da60554e59f0e9ffe85c717253dd0c3c681296b Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 28 Jul 2023 21:38:58 -0400
Subject: [PATCH 28/59] Add footnote

---
 crates/core_simd/src/core_simd_docs.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md
index 3d2e737e2af..8acdeb04427 100644
--- a/crates/core_simd/src/core_simd_docs.md
+++ b/crates/core_simd/src/core_simd_docs.md
@@ -30,8 +30,10 @@ Instead, they map to a reasonable implementation of the operation for the target
 
 Consistency between targets is not compromised to use faster or fewer instructions.
 In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent.
-For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`) can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`].
+For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`].
 When necessary, [`Simd<T, N>`] can be converted to the types provided by `std::arch` to make use of target-specific functions.
 
 Many targets simply don't have SIMD, or don't support SIMD for a particular element type.
 In those cases, regular scalar operations are generated instead.
+
+[^1]: `_mm_min_ps(x, y)` is equivalent to `x.simd_lt(y).select(x, y)`

From 8101074e2e1b8fb3a7469446746625b7febb7f33 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 30 Jul 2023 15:53:32 -0400
Subject: [PATCH 29/59] Add various integer ops

---
 crates/core_simd/src/elements/int.rs  | 59 ++++++++++++++++++++++++++-
 crates/core_simd/src/elements/uint.rs | 53 ++++++++++++++++++++++++
 crates/core_simd/src/intrinsics.rs    |  6 +++
 crates/core_simd/src/to_bytes.rs      | 44 ++++++++++++++++++++
 crates/core_simd/tests/ops_macros.rs  | 48 ++++++++++++++++++++++
 crates/core_simd/tests/to_bytes.rs    | 16 ++++++--
 6 files changed, 220 insertions(+), 6 deletions(-)

diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
index 6db89ff9a65..6992b679515 100644
--- a/crates/core_simd/src/elements/int.rs
+++ b/crates/core_simd/src/elements/int.rs
@@ -191,10 +191,29 @@ pub trait SimdInt: Copy + Sealed {
 
     /// Returns the cumulative bitwise "xor" across the lanes of the vector.
     fn reduce_xor(self) -> Self::Scalar;
+
+    /// Reverses the byte order of each element.
+    fn swap_bytes(self) -> Self;
+
+    /// Reverses the order of bits in each elemnent.
+    /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
+    fn reverse_bits(self) -> Self;
+
+    /// Returns the number of leading zeros in the binary representation of each element.
+    fn leading_zeros(self) -> Self;
+
+    /// Returns the number of trailing zeros in the binary representation of each element.
+    fn trailing_zeros(self) -> Self;
+
+    /// Returns the number of leading ones in the binary representation of each element.
+    fn leading_ones(self) -> Self;
+
+    /// Returns the number of trailing ones in the binary representation of each element.
+    fn trailing_ones(self) -> Self;
 }
 
 macro_rules! impl_trait {
-    { $($ty:ty),* } => {
+    { $($ty:ident ($unsigned:ident)),* } => {
         $(
         impl<const LANES: usize> Sealed for Simd<$ty, LANES>
         where
@@ -307,9 +326,45 @@ fn reduce_xor(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
                 unsafe { intrinsics::simd_reduce_xor(self) }
             }
+
+            #[inline]
+            fn swap_bytes(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_bswap(self) }
+            }
+
+            #[inline]
+            fn reverse_bits(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_bitreverse(self) }
+            }
+
+            #[inline]
+            fn leading_zeros(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_ctlz(self) }
+            }
+
+            #[inline]
+            fn trailing_zeros(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_cttz(self) }
+            }
+
+            #[inline]
+            fn leading_ones(self) -> Self {
+                use crate::simd::SimdUint;
+                self.cast::<$unsigned>().leading_ones().cast()
+            }
+
+            #[inline]
+            fn trailing_ones(self) -> Self {
+                use crate::simd::SimdUint;
+                self.cast::<$unsigned>().trailing_ones().cast()
+            }
         }
         )*
     }
 }
 
-impl_trait! { i8, i16, i32, i64, isize }
+impl_trait! { i8 (u8), i16 (u16), i32 (u32), i64 (u64), isize (usize) }
diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs
index 3926c395ec9..7490340e4f4 100644
--- a/crates/core_simd/src/elements/uint.rs
+++ b/crates/core_simd/src/elements/uint.rs
@@ -71,6 +71,25 @@ pub trait SimdUint: Copy + Sealed {
 
     /// Returns the cumulative bitwise "xor" across the lanes of the vector.
     fn reduce_xor(self) -> Self::Scalar;
+
+    /// Reverses the byte order of each element.
+    fn swap_bytes(self) -> Self;
+
+    /// Reverses the order of bits in each elemnent.
+    /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
+    fn reverse_bits(self) -> Self;
+
+    /// Returns the number of leading zeros in the binary representation of each element.
+    fn leading_zeros(self) -> Self;
+
+    /// Returns the number of trailing zeros in the binary representation of each element.
+    fn trailing_zeros(self) -> Self;
+
+    /// Returns the number of leading ones in the binary representation of each element.
+    fn leading_ones(self) -> Self;
+
+    /// Returns the number of trailing ones in the binary representation of each element.
+    fn trailing_ones(self) -> Self;
 }
 
 macro_rules! impl_trait {
@@ -148,6 +167,40 @@ fn reduce_xor(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
                 unsafe { intrinsics::simd_reduce_xor(self) }
             }
+
+            #[inline]
+            fn swap_bytes(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_bswap(self) }
+            }
+
+            #[inline]
+            fn reverse_bits(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_bitreverse(self) }
+            }
+
+            #[inline]
+            fn leading_zeros(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_ctlz(self) }
+            }
+
+            #[inline]
+            fn trailing_zeros(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_cttz(self) }
+            }
+
+            #[inline]
+            fn leading_ones(self) -> Self {
+                (!self).leading_zeros()
+            }
+
+            #[inline]
+            fn trailing_ones(self) -> Self {
+                (!self).trailing_zeros()
+            }
         }
         )*
     }
diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index dd6698e2ba5..b27893bc729 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -160,4 +160,10 @@
 
     /// convert an exposed address back to a pointer
     pub(crate) fn simd_from_exposed_addr<T, U>(addr: T) -> U;
+
+    // Integer operations
+    pub(crate) fn simd_bswap<T>(x: T) -> T;
+    pub(crate) fn simd_bitreverse<T>(x: T) -> T;
+    pub(crate) fn simd_ctlz<T>(x: T) -> T;
+    pub(crate) fn simd_cttz<T>(x: T) -> T;
 }
diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index b36b1a347b2..563b0c95a8a 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -1,3 +1,5 @@
+use crate::simd::SimdUint;
+
 macro_rules! impl_to_bytes {
     { $ty:ty, $size:literal } => {
         impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
@@ -12,12 +14,54 @@ pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
                 unsafe { core::mem::transmute_copy(&self) }
             }
 
+            /// Return the memory representation of this integer as a byte array in big-endian
+            /// (network) byte order.
+            pub fn to_be_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+                let bytes = self.to_ne_bytes();
+                if cfg!(target_endian = "big") {
+                    bytes
+                } else {
+                    bytes.swap_bytes()
+                }
+            }
+
+            /// Return the memory representation of this integer as a byte array in little-endian
+            /// byte order.
+            pub fn to_le_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+                let bytes = self.to_ne_bytes();
+                if cfg!(target_endian = "little") {
+                    bytes
+                } else {
+                    bytes.swap_bytes()
+                }
+            }
+
             /// Create a native endian integer value from its memory representation as a byte array
             /// in native endianness.
             pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
                 // Safety: transmuting between vectors is safe
                 unsafe { core::mem::transmute_copy(&bytes) }
             }
+
+            /// Create an integer value from its representation as a byte array in big endian.
+            pub fn from_be_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
+                let bytes = if cfg!(target_endian = "big") {
+                    bytes
+                } else {
+                    bytes.swap_bytes()
+                };
+                Self::from_ne_bytes(bytes)
+            }
+
+            /// Create an integer value from its representation as a byte array in little endian.
+            pub fn from_le_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
+                let bytes = if cfg!(target_endian = "little") {
+                    bytes
+                } else {
+                    bytes.swap_bytes()
+                };
+                Self::from_ne_bytes(bytes)
+            }
         }
     }
 }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index dfc0e1a3708..bd1856e1bcc 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -193,6 +193,54 @@ fn reduce_min<const LANES: usize>() {
                     Ok(())
                 });
             }
+
+            fn swap_bytes<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::swap_bytes,
+                    &$scalar::swap_bytes,
+                    &|_| true,
+                )
+            }
+
+            fn reverse_bits<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::reverse_bits,
+                    &$scalar::reverse_bits,
+                    &|_| true,
+                )
+            }
+
+            fn leading_zeros<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::leading_zeros,
+                    &|x| x.leading_zeros() as $scalar,
+                    &|_| true,
+                )
+            }
+
+            fn trailing_zeros<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::leading_zeros,
+                    &|x| x.trailing_zeros() as $scalar,
+                    &|_| true,
+                )
+            }
+
+            fn leading_ones<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::leading_ones,
+                    &|x| x.leading_ones() as $scalar,
+                    &|_| true,
+                )
+            }
+
+            fn trailing_ones<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::leading_ones,
+                    &|x| x.trailing_ones() as $scalar,
+                    &|_| true,
+                )
+            }
         }
     }
 }
diff --git a/crates/core_simd/tests/to_bytes.rs b/crates/core_simd/tests/to_bytes.rs
index be0ee4349c5..7dd740d65dd 100644
--- a/crates/core_simd/tests/to_bytes.rs
+++ b/crates/core_simd/tests/to_bytes.rs
@@ -7,8 +7,16 @@
 #[test]
 fn byte_convert() {
     let int = Simd::<u32, 2>::from_array([0xdeadbeef, 0x8badf00d]);
-    let bytes = int.to_ne_bytes();
-    assert_eq!(int[0].to_ne_bytes(), bytes[..4]);
-    assert_eq!(int[1].to_ne_bytes(), bytes[4..]);
-    assert_eq!(Simd::<u32, 2>::from_ne_bytes(bytes), int);
+    let ne_bytes = int.to_ne_bytes();
+    let be_bytes = int.to_be_bytes();
+    let le_bytes = int.to_le_bytes();
+    assert_eq!(int[0].to_ne_bytes(), ne_bytes[..4]);
+    assert_eq!(int[1].to_ne_bytes(), ne_bytes[4..]);
+    assert_eq!(int[0].to_be_bytes(), be_bytes[..4]);
+    assert_eq!(int[1].to_be_bytes(), be_bytes[4..]);
+    assert_eq!(int[0].to_le_bytes(), le_bytes[..4]);
+    assert_eq!(int[1].to_le_bytes(), le_bytes[4..]);
+    assert_eq!(Simd::<u32, 2>::from_ne_bytes(ne_bytes), int);
+    assert_eq!(Simd::<u32, 2>::from_be_bytes(be_bytes), int);
+    assert_eq!(Simd::<u32, 2>::from_le_bytes(le_bytes), int);
 }

From b1245ffb1277ad4274f600607f9058281baf3bc6 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 30 Jul 2023 16:20:20 -0400
Subject: [PATCH 30/59] Fix bad copy-paste

---
 crates/core_simd/tests/ops_macros.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index bd1856e1bcc..23e914e64b5 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -220,7 +220,7 @@ fn leading_zeros<const LANES: usize>() {
 
             fn trailing_zeros<const LANES: usize>() {
                 test_helpers::test_unary_elementwise(
-                    &$vector::<LANES>::leading_zeros,
+                    &$vector::<LANES>::trailing_zeros,
                     &|x| x.trailing_zeros() as $scalar,
                     &|_| true,
                 )
@@ -236,7 +236,7 @@ fn leading_ones<const LANES: usize>() {
 
             fn trailing_ones<const LANES: usize>() {
                 test_helpers::test_unary_elementwise(
-                    &$vector::<LANES>::leading_ones,
+                    &$vector::<LANES>::trailing_ones,
                     &|x| x.trailing_ones() as $scalar,
                     &|_| true,
                 )

From c948b703ff57f25a1a41be5e03553065454080b9 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Tue, 1 Aug 2023 07:58:44 -0400
Subject: [PATCH 31/59] Simplify signed leading_ones/trailing_ones

---
 crates/core_simd/src/elements/int.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
index 6992b679515..e81ed7bf601 100644
--- a/crates/core_simd/src/elements/int.rs
+++ b/crates/core_simd/src/elements/int.rs
@@ -213,7 +213,7 @@ pub trait SimdInt: Copy + Sealed {
 }
 
 macro_rules! impl_trait {
-    { $($ty:ident ($unsigned:ident)),* } => {
+    { $($ty:ty),* } => {
         $(
         impl<const LANES: usize> Sealed for Simd<$ty, LANES>
         where
@@ -353,18 +353,16 @@ fn trailing_zeros(self) -> Self {
 
             #[inline]
             fn leading_ones(self) -> Self {
-                use crate::simd::SimdUint;
-                self.cast::<$unsigned>().leading_ones().cast()
+                (!self).leading_zeros()
             }
 
             #[inline]
             fn trailing_ones(self) -> Self {
-                use crate::simd::SimdUint;
-                self.cast::<$unsigned>().trailing_ones().cast()
+                (!self).trailing_zeros()
             }
         }
         )*
     }
 }
 
-impl_trait! { i8 (u8), i16 (u16), i32 (u32), i64 (u64), isize (usize) }
+impl_trait! { i8, i16, i32, i64, isize }

From 29392c091b8c22a42ff5e8074d164c28e1122113 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Sat, 26 Aug 2023 12:00:05 +0000
Subject: [PATCH 32/59] Commit Cargo.lock

As of rust-lang/cargo#8728 it is now recommended to always check in
Cargo.lock. This will help with reproducability and will avoid the need
for cg_clif's build system to keep it's own copy of Cargo.lock for
vendoring. It will also allow tidy to run on the portable-simd
workspace.
---
 .gitignore |   1 -
 Cargo.lock | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 304 insertions(+), 1 deletion(-)
 create mode 100644 Cargo.lock

diff --git a/.gitignore b/.gitignore
index 96ef6c0b944..ea8c4bf7f35 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1 @@
 /target
-Cargo.lock
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 00000000000..46312c09657
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,304 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bumpalo"
+version = "3.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
+
+[[package]]
+name = "byteorder"
+version = "1.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "console_error_panic_hook"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "core_simd"
+version = "0.1.0"
+dependencies = [
+ "proptest",
+ "std_float",
+ "test_helpers",
+ "wasm-bindgen",
+ "wasm-bindgen-test",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "log"
+version = "0.4.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+
+[[package]]
+name = "num-traits"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "proptest"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12e6c80c1139113c28ee4670dc50cc42915228b51f56a9e407f0ec60f966646f"
+dependencies = [
+ "bitflags",
+ "byteorder",
+ "num-traits",
+ "rand",
+ "rand_chacha",
+ "rand_xorshift",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+ "rand_hc",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+
+[[package]]
+name = "rand_hc"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "rand_xorshift"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77d416b86801d23dde1aa643023b775c3a462efc0ed96443add11546cdf1dca8"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "scoped-tls"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+
+[[package]]
+name = "std_float"
+version = "0.1.0"
+dependencies = [
+ "core_simd",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "test_helpers"
+version = "0.1.0"
+dependencies = [
+ "proptest",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
+
+[[package]]
+name = "wasm-bindgen-test"
+version = "0.3.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671"
+dependencies = [
+ "console_error_panic_hook",
+ "js-sys",
+ "scoped-tls",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wasm-bindgen-test-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-test-macro"
+version = "0.3.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]

From d08b2a50a3a06edd6c2f412a4ffbeaec53507d2f Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Sat, 26 Aug 2023 12:32:10 +0000
Subject: [PATCH 33/59] Allow internal_features lint

---
 crates/core_simd/src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index fde406bda70..2d68e4cce85 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -18,6 +18,7 @@
 #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
 #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
 #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
+#![allow(internal_features)]
 #![unstable(feature = "portable_simd", issue = "86656")]
 //! Portable SIMD module.
 

From 0a1e7453204383a109f4854cc588fd3b35c990c6 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 10 Sep 2023 18:20:47 -0400
Subject: [PATCH 34/59] Return unsigned integers from some signed integer
 functions

---
 crates/core_simd/src/elements/int.rs | 37 +++++++++++++++-------------
 crates/core_simd/tests/ops_macros.rs |  8 +++---
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
index e81ed7bf601..c341c59545c 100644
--- a/crates/core_simd/src/elements/int.rs
+++ b/crates/core_simd/src/elements/int.rs
@@ -1,6 +1,7 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SupportedLaneCount,
+    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SimdUint,
+    SupportedLaneCount,
 };
 
 /// Operations on SIMD vectors of signed integers.
@@ -11,6 +12,9 @@ pub trait SimdInt: Copy + Sealed {
     /// Scalar type contained by this SIMD vector type.
     type Scalar;
 
+    /// A SIMD vector of unsigned integers with the same element size.
+    type Unsigned;
+
     /// A SIMD vector with a different element type.
     type Cast<T: SimdElement>;
 
@@ -200,20 +204,20 @@ pub trait SimdInt: Copy + Sealed {
     fn reverse_bits(self) -> Self;
 
     /// Returns the number of leading zeros in the binary representation of each element.
-    fn leading_zeros(self) -> Self;
+    fn leading_zeros(self) -> Self::Unsigned;
 
     /// Returns the number of trailing zeros in the binary representation of each element.
-    fn trailing_zeros(self) -> Self;
+    fn trailing_zeros(self) -> Self::Unsigned;
 
     /// Returns the number of leading ones in the binary representation of each element.
-    fn leading_ones(self) -> Self;
+    fn leading_ones(self) -> Self::Unsigned;
 
     /// Returns the number of trailing ones in the binary representation of each element.
-    fn trailing_ones(self) -> Self;
+    fn trailing_ones(self) -> Self::Unsigned;
 }
 
 macro_rules! impl_trait {
-    { $($ty:ty),* } => {
+    { $($ty:ident ($unsigned:ident)),* } => {
         $(
         impl<const LANES: usize> Sealed for Simd<$ty, LANES>
         where
@@ -227,6 +231,7 @@ impl<const LANES: usize> SimdInt for Simd<$ty, LANES>
         {
             type Mask = Mask<<$ty as SimdElement>::Mask, LANES>;
             type Scalar = $ty;
+            type Unsigned = Simd<$unsigned, LANES>;
             type Cast<T: SimdElement> = Simd<T, LANES>;
 
             #[inline]
@@ -340,29 +345,27 @@ fn reverse_bits(self) -> Self {
             }
 
             #[inline]
-            fn leading_zeros(self) -> Self {
-                // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_ctlz(self) }
+            fn leading_zeros(self) -> Self::Unsigned {
+                self.cast::<$unsigned>().leading_zeros()
             }
 
             #[inline]
-            fn trailing_zeros(self) -> Self {
-                // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_cttz(self) }
+            fn trailing_zeros(self) -> Self::Unsigned {
+                self.cast::<$unsigned>().trailing_zeros()
             }
 
             #[inline]
-            fn leading_ones(self) -> Self {
-                (!self).leading_zeros()
+            fn leading_ones(self) -> Self::Unsigned {
+                self.cast::<$unsigned>().leading_ones()
             }
 
             #[inline]
-            fn trailing_ones(self) -> Self {
-                (!self).trailing_zeros()
+            fn trailing_ones(self) -> Self::Unsigned {
+                self.cast::<$unsigned>().trailing_ones()
             }
         }
         )*
     }
 }
 
-impl_trait! { i8, i16, i32, i64, isize }
+impl_trait! { i8 (u8), i16 (u16), i32 (u32), i64 (u64), isize (usize) }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 23e914e64b5..135f3ecf7b2 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -213,7 +213,7 @@ fn reverse_bits<const LANES: usize>() {
             fn leading_zeros<const LANES: usize>() {
                 test_helpers::test_unary_elementwise(
                     &$vector::<LANES>::leading_zeros,
-                    &|x| x.leading_zeros() as $scalar,
+                    &|x| x.leading_zeros() as _,
                     &|_| true,
                 )
             }
@@ -221,7 +221,7 @@ fn leading_zeros<const LANES: usize>() {
             fn trailing_zeros<const LANES: usize>() {
                 test_helpers::test_unary_elementwise(
                     &$vector::<LANES>::trailing_zeros,
-                    &|x| x.trailing_zeros() as $scalar,
+                    &|x| x.trailing_zeros() as _,
                     &|_| true,
                 )
             }
@@ -229,7 +229,7 @@ fn trailing_zeros<const LANES: usize>() {
             fn leading_ones<const LANES: usize>() {
                 test_helpers::test_unary_elementwise(
                     &$vector::<LANES>::leading_ones,
-                    &|x| x.leading_ones() as $scalar,
+                    &|x| x.leading_ones() as _,
                     &|_| true,
                 )
             }
@@ -237,7 +237,7 @@ fn leading_ones<const LANES: usize>() {
             fn trailing_ones<const LANES: usize>() {
                 test_helpers::test_unary_elementwise(
                     &$vector::<LANES>::trailing_ones,
-                    &|x| x.trailing_ones() as $scalar,
+                    &|x| x.trailing_ones() as _,
                     &|_| true,
                 )
             }

From eb3c050405cf9fe8342225b328e245dc02dd8b48 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 23 Sep 2023 13:30:21 -0400
Subject: [PATCH 35/59] Fix lints

---
 crates/core_simd/src/masks/to_bitmask.rs | 1 +
 crates/core_simd/src/ops/shift_scalar.rs | 4 ++++
 crates/core_simd/src/to_bytes.rs         | 6 ++++++
 crates/core_simd/src/vendor.rs           | 2 +-
 crates/core_simd/src/vendor/x86.rs       | 2 +-
 5 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index fc7d6b781f2..8e724c9de8c 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -74,6 +74,7 @@ impl ToBitMask<BitMask=u64> for Mask<_, 64>
 
 /// Returns the minimum number of bytes in a bitmask with `lanes` lanes.
 #[cfg(feature = "generic_const_exprs")]
+#[allow(clippy::missing_inline_in_public_items)]
 pub const fn bitmask_len(lanes: usize) -> usize {
     (lanes + 7) / 8
 }
diff --git a/crates/core_simd/src/ops/shift_scalar.rs b/crates/core_simd/src/ops/shift_scalar.rs
index 77aac656395..f5115a5a5e9 100644
--- a/crates/core_simd/src/ops/shift_scalar.rs
+++ b/crates/core_simd/src/ops/shift_scalar.rs
@@ -10,6 +10,7 @@ impl<const N: usize> core::ops::$trait<$ty> for Simd<$ty, N>
             LaneCount<N>: SupportedLaneCount,
         {
             type Output = Self;
+            #[inline]
             fn $trait_fn(self, rhs: $ty) -> Self::Output {
                 self.$trait_fn(Simd::splat(rhs))
             }
@@ -20,6 +21,7 @@ impl<const N: usize> core::ops::$trait<&$ty> for Simd<$ty, N>
             LaneCount<N>: SupportedLaneCount,
         {
             type Output = Self;
+            #[inline]
             fn $trait_fn(self, rhs: &$ty) -> Self::Output {
                 self.$trait_fn(Simd::splat(*rhs))
             }
@@ -30,6 +32,7 @@ impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N>
             LaneCount<N>: SupportedLaneCount,
         {
             type Output = Simd<$ty, N>;
+            #[inline]
             fn $trait_fn(self, rhs: $ty) -> Self::Output {
                 self.$trait_fn(Simd::splat(rhs))
             }
@@ -40,6 +43,7 @@ impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N>
             LaneCount<N>: SupportedLaneCount,
         {
             type Output = Simd<$ty, N>;
+            #[inline]
             fn $trait_fn(self, rhs: &$ty) -> Self::Output {
                 self.$trait_fn(Simd::splat(*rhs))
             }
diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index 563b0c95a8a..5f1374fd5a5 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -9,6 +9,7 @@ impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
         {
             /// Return the memory representation of this integer as a byte array in native byte
             /// order.
+            #[inline]
             pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
                 // Safety: transmuting between vectors is safe
                 unsafe { core::mem::transmute_copy(&self) }
@@ -16,6 +17,7 @@ pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
 
             /// Return the memory representation of this integer as a byte array in big-endian
             /// (network) byte order.
+            #[inline]
             pub fn to_be_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
                 let bytes = self.to_ne_bytes();
                 if cfg!(target_endian = "big") {
@@ -27,6 +29,7 @@ pub fn to_be_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
 
             /// Return the memory representation of this integer as a byte array in little-endian
             /// byte order.
+            #[inline]
             pub fn to_le_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
                 let bytes = self.to_ne_bytes();
                 if cfg!(target_endian = "little") {
@@ -38,12 +41,14 @@ pub fn to_le_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
 
             /// Create a native endian integer value from its memory representation as a byte array
             /// in native endianness.
+            #[inline]
             pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
                 // Safety: transmuting between vectors is safe
                 unsafe { core::mem::transmute_copy(&bytes) }
             }
 
             /// Create an integer value from its representation as a byte array in big endian.
+            #[inline]
             pub fn from_be_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
                 let bytes = if cfg!(target_endian = "big") {
                     bytes
@@ -54,6 +59,7 @@ pub fn from_be_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self
             }
 
             /// Create an integer value from its representation as a byte array in little endian.
+            #[inline]
             pub fn from_le_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
                 let bytes = if cfg!(target_endian = "little") {
                     bytes
diff --git a/crates/core_simd/src/vendor.rs b/crates/core_simd/src/vendor.rs
index 9fb70218c95..6223bedb4e1 100644
--- a/crates/core_simd/src/vendor.rs
+++ b/crates/core_simd/src/vendor.rs
@@ -21,7 +21,7 @@ fn from(value: $from) -> $to {
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 mod x86;
 
-#[cfg(any(target_arch = "wasm32"))]
+#[cfg(target_arch = "wasm32")]
 mod wasm32;
 
 #[cfg(any(target_arch = "aarch64", target_arch = "arm",))]
diff --git a/crates/core_simd/src/vendor/x86.rs b/crates/core_simd/src/vendor/x86.rs
index 0dd47015ed2..66aaf90eef5 100644
--- a/crates/core_simd/src/vendor/x86.rs
+++ b/crates/core_simd/src/vendor/x86.rs
@@ -1,6 +1,6 @@
 use crate::simd::*;
 
-#[cfg(any(target_arch = "x86"))]
+#[cfg(target_arch = "x86")]
 use core::arch::x86::*;
 
 #[cfg(target_arch = "x86_64")]

From 4825b2a64d765317066948867e8714674419359b Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 1 Oct 2023 21:29:03 -0400
Subject: [PATCH 36/59] Fix lint

---
 crates/std_float/src/lib.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs
index 4bd4d4c05e3..bb2b5a2dbba 100644
--- a/crates/std_float/src/lib.rs
+++ b/crates/std_float/src/lib.rs
@@ -2,7 +2,8 @@
 #![cfg_attr(
     feature = "as_crate",
     feature(platform_intrinsics),
-    feature(portable_simd)
+    feature(portable_simd),
+    allow(internal_features)
 )]
 #[cfg(not(feature = "as_crate"))]
 use core::simd;

From a93ded542652cdff67e8b222c91a401d8e905777 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 1 Oct 2023 21:28:03 -0400
Subject: [PATCH 37/59] Remove generic_const_exprs

---
 crates/core_simd/Cargo.toml              |   1 -
 crates/core_simd/src/lib.rs              |   2 -
 crates/core_simd/src/masks.rs            |   5 +-
 crates/core_simd/src/masks/bitmask.rs    |   2 -
 crates/core_simd/src/masks/full_masks.rs |  25 +---
 crates/core_simd/src/masks/to_bitmask.rs |  64 ++++-----
 crates/core_simd/src/mod.rs              |   5 +-
 crates/core_simd/src/to_bytes.rs         | 163 +++++++++++++++--------
 crates/core_simd/tests/masks.rs          |   1 -
 crates/core_simd/tests/to_bytes.rs       |   6 +-
 10 files changed, 156 insertions(+), 118 deletions(-)

diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml
index d1a3a515a7e..b4a8fd70f4c 100644
--- a/crates/core_simd/Cargo.toml
+++ b/crates/core_simd/Cargo.toml
@@ -12,7 +12,6 @@ license = "MIT OR Apache-2.0"
 default = ["as_crate"]
 as_crate = []
 std = []
-generic_const_exprs = []
 all_lane_counts = []
 
 [target.'cfg(target_arch = "wasm32")'.dev-dependencies]
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 2d68e4cce85..dd3c546e014 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -14,8 +14,6 @@
     strict_provenance,
     ptr_metadata
 )]
-#![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))]
-#![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
 #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
 #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
 #![allow(internal_features)]
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index fea687bdc1a..b6af9f83581 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -13,10 +13,7 @@
 mod mask_impl;
 
 mod to_bitmask;
-pub use to_bitmask::ToBitMask;
-
-#[cfg(feature = "generic_const_exprs")]
-pub use to_bitmask::{bitmask_len, ToBitMaskArray};
+pub use to_bitmask::{ToBitMask, ToBitMaskArray};
 
 use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
 use core::cmp::Ordering;
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index 20465ba9b07..a7df6304bc7 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -119,7 +119,6 @@ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
         unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new array and does not mutate the original value"]
     pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] {
@@ -129,7 +128,6 @@ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
         unsafe { core::mem::transmute_copy(&self.0) }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn from_bitmask_array<const N: usize>(bitmask: [u8; N]) -> Self {
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 1d13c45b8e7..4b36adece71 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -1,12 +1,9 @@
 //! Masks that take up full SIMD vector registers.
 
-use super::MaskElement;
+use super::{to_bitmask::ToBitMaskArray, MaskElement};
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
 
-#[cfg(feature = "generic_const_exprs")]
-use crate::simd::ToBitMaskArray;
-
 #[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
 where
@@ -145,23 +142,19 @@ pub fn convert<U>(self) -> Mask<U, LANES>
         unsafe { Mask(intrinsics::simd_cast(self.0)) }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new array and does not mutate the original value"]
     pub fn to_bitmask_array<const N: usize>(self) -> [u8; N]
     where
         super::Mask<T, LANES>: ToBitMaskArray,
-        [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
     {
-        assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
-
-        // Safety: N is the correct bitmask size
+        // Safety: Bytes is the right size array
         unsafe {
             // Compute the bitmask
-            let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
+            let bitmask: <super::Mask<T, LANES> as ToBitMaskArray>::BitMaskArray =
                 intrinsics::simd_bitmask(self.0);
 
-            // Transmute to the return type, previously asserted to be the same size
+            // Transmute to the return type
             let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask);
 
             // LLVM assumes bit order should match endianness
@@ -175,17 +168,13 @@ pub fn convert<U>(self) -> Mask<U, LANES>
         }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn from_bitmask_array<const N: usize>(mut bitmask: [u8; N]) -> Self
     where
         super::Mask<T, LANES>: ToBitMaskArray,
-        [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
     {
-        assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
-
-        // Safety: N is the correct bitmask size
+        // Safety: Bytes is the right size array
         unsafe {
             // LLVM assumes bit order should match endianness
             if cfg!(target_endian = "big") {
@@ -194,8 +183,8 @@ pub fn convert<U>(self) -> Mask<U, LANES>
                 }
             }
 
-            // Transmute to the bitmask type, previously asserted to be the same size
-            let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
+            // Transmute to the bitmask
+            let bitmask: <super::Mask<T, LANES> as ToBitMaskArray>::BitMaskArray =
                 core::mem::transmute_copy(&bitmask);
 
             // Compute the regular mask
diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index 8e724c9de8c..7041d15164d 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -30,19 +30,18 @@ pub trait ToBitMask: Sealed {
 /// Converts masks to and from byte array bitmasks.
 ///
 /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte.
-#[cfg(feature = "generic_const_exprs")]
 pub trait ToBitMaskArray: Sealed {
-    /// The length of the bitmask array.
-    const BYTES: usize;
+    /// The bitmask array.
+    type BitMaskArray;
 
     /// Converts a mask to a bitmask.
-    fn to_bitmask_array(self) -> [u8; Self::BYTES];
+    fn to_bitmask_array(self) -> Self::BitMaskArray;
 
     /// Converts a bitmask to a mask.
-    fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self;
+    fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self;
 }
 
-macro_rules! impl_integer_intrinsic {
+macro_rules! impl_integer {
     { $(impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
         $(
         impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
@@ -62,7 +61,27 @@ fn from_bitmask(bitmask: $int) -> Self {
     }
 }
 
-impl_integer_intrinsic! {
+macro_rules! impl_array {
+    { $(impl ToBitMaskArray<Bytes=$int:literal> for Mask<_, $lanes:literal>)* } => {
+        $(
+        impl<T: MaskElement> ToBitMaskArray for Mask<T, $lanes> {
+            type BitMaskArray = [u8; $int];
+
+            #[inline]
+            fn to_bitmask_array(self) -> Self::BitMaskArray {
+                self.0.to_bitmask_array()
+            }
+
+            #[inline]
+            fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self {
+                Self(mask_impl::Mask::from_bitmask_array(bitmask))
+            }
+        }
+        )*
+    }
+}
+
+impl_integer! {
     impl ToBitMask<BitMask=u8> for Mask<_, 1>
     impl ToBitMask<BitMask=u8> for Mask<_, 2>
     impl ToBitMask<BitMask=u8> for Mask<_, 4>
@@ -72,27 +91,12 @@ impl ToBitMask<BitMask=u32> for Mask<_, 32>
     impl ToBitMask<BitMask=u64> for Mask<_, 64>
 }
 
-/// Returns the minimum number of bytes in a bitmask with `lanes` lanes.
-#[cfg(feature = "generic_const_exprs")]
-#[allow(clippy::missing_inline_in_public_items)]
-pub const fn bitmask_len(lanes: usize) -> usize {
-    (lanes + 7) / 8
-}
-
-#[cfg(feature = "generic_const_exprs")]
-impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES>
-where
-    LaneCount<LANES>: SupportedLaneCount,
-{
-    const BYTES: usize = bitmask_len(LANES);
-
-    #[inline]
-    fn to_bitmask_array(self) -> [u8; Self::BYTES] {
-        self.0.to_bitmask_array()
-    }
-
-    #[inline]
-    fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self {
-        Mask(mask_impl::Mask::from_bitmask_array(bitmask))
-    }
+impl_array! {
+    impl ToBitMaskArray<Bytes=1> for Mask<_, 1>
+    impl ToBitMaskArray<Bytes=1> for Mask<_, 2>
+    impl ToBitMaskArray<Bytes=1> for Mask<_, 4>
+    impl ToBitMaskArray<Bytes=1> for Mask<_, 8>
+    impl ToBitMaskArray<Bytes=2> for Mask<_, 16>
+    impl ToBitMaskArray<Bytes=4> for Mask<_, 32>
+    impl ToBitMaskArray<Bytes=8> for Mask<_, 64>
 }
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index dd954b7cc48..f489ae36de4 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -3,9 +3,6 @@
 
 pub(crate) mod intrinsics;
 
-#[cfg(feature = "generic_const_exprs")]
-mod to_bytes;
-
 mod alias;
 mod cast;
 mod elements;
@@ -18,6 +15,7 @@
 mod ord;
 mod select;
 mod swizzle_dyn;
+mod to_bytes;
 mod vector;
 mod vendor;
 
@@ -37,5 +35,6 @@ pub mod simd {
     pub use crate::core_simd::ord::*;
     pub use crate::core_simd::swizzle::*;
     pub use crate::core_simd::swizzle_dyn::*;
+    pub use crate::core_simd::to_bytes::ToBytes;
     pub use crate::core_simd::vector::*;
 }
diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index 5f1374fd5a5..5fe4a77d50d 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -1,73 +1,127 @@
-use crate::simd::SimdUint;
+use crate::simd::{LaneCount, Simd, SimdElement, SimdFloat, SimdInt, SimdUint, SupportedLaneCount};
+
+mod sealed {
+    use super::*;
+    pub trait Sealed {}
+    impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> where LaneCount<N>: SupportedLaneCount {}
+}
+use sealed::Sealed;
+
+/// Convert SIMD vectors to vectors of bytes
+pub trait ToBytes: Sealed {
+    /// This type, reinterpreted as bytes.
+    type Bytes;
+
+    /// Return the memory representation of this integer as a byte array in native byte
+    /// order.
+    fn to_ne_bytes(self) -> Self::Bytes;
+
+    /// Return the memory representation of this integer as a byte array in big-endian
+    /// (network) byte order.
+    fn to_be_bytes(self) -> Self::Bytes;
+
+    /// Return the memory representation of this integer as a byte array in little-endian
+    /// byte order.
+    fn to_le_bytes(self) -> Self::Bytes;
+
+    /// Create a native endian integer value from its memory representation as a byte array
+    /// in native endianness.
+    fn from_ne_bytes(bytes: Self::Bytes) -> Self;
+
+    /// Create an integer value from its representation as a byte array in big endian.
+    fn from_be_bytes(bytes: Self::Bytes) -> Self;
+
+    /// Create an integer value from its representation as a byte array in little endian.
+    fn from_le_bytes(bytes: Self::Bytes) -> Self;
+}
+
+macro_rules! swap_bytes {
+    { f32, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) };
+    { f64, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) };
+    { $ty:ty, $x:expr } => { $x.swap_bytes() }
+}
 
 macro_rules! impl_to_bytes {
-    { $ty:ty, $size:literal } => {
-        impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
-        where
-            crate::simd::LaneCount<LANES>: crate::simd::SupportedLaneCount,
-            crate::simd::LaneCount<{{ $size * LANES }}>: crate::simd::SupportedLaneCount,
-        {
-            /// Return the memory representation of this integer as a byte array in native byte
-            /// order.
+    { $ty:tt, $size:tt } => {
+        impl_to_bytes! { $ty, $size * 1 }
+        impl_to_bytes! { $ty, $size * 2 }
+        impl_to_bytes! { $ty, $size * 4 }
+        impl_to_bytes! { $ty, $size * 8 }
+        impl_to_bytes! { $ty, $size * 16 }
+        impl_to_bytes! { $ty, $size * 32 }
+        impl_to_bytes! { $ty, $size * 64 }
+    };
+
+    // multiply element size by number of elements
+    { $ty:tt, 1 * $elems:literal } => { impl_to_bytes! { @impl [$ty; $elems], $elems } };
+    { $ty:tt, $size:literal * 1 } => { impl_to_bytes! { @impl [$ty; 1], $size } };
+    { $ty:tt, 2 * 2  } => { impl_to_bytes! { @impl [$ty; 2], 4  } };
+    { $ty:tt, 2 * 4  } => { impl_to_bytes! { @impl [$ty; 4], 8  } };
+    { $ty:tt, 2 * 8  } => { impl_to_bytes! { @impl [$ty; 8], 16 } };
+    { $ty:tt, 2 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 32 } };
+    { $ty:tt, 2 * 32 } => { impl_to_bytes! { @impl [$ty; 32], 64 } };
+    { $ty:tt, 4 * 2  } => { impl_to_bytes! { @impl [$ty; 2], 8  } };
+    { $ty:tt, 4 * 4  } => { impl_to_bytes! { @impl [$ty; 4], 16 } };
+    { $ty:tt, 4 * 8  } => { impl_to_bytes! { @impl [$ty; 8], 32 } };
+    { $ty:tt, 4 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 64 } };
+    { $ty:tt, 8 * 2  } => { impl_to_bytes! { @impl [$ty; 2], 16 } };
+    { $ty:tt, 8 * 4  } => { impl_to_bytes! { @impl [$ty; 4], 32 } };
+    { $ty:tt, 8 * 8  } => { impl_to_bytes! { @impl [$ty; 8], 64 } };
+
+    // unsupported number of lanes
+    { $ty:ty, $a:literal * $b:literal } => { };
+
+    { @impl [$ty:tt; $elem:literal], $bytes:literal } => {
+        impl ToBytes for Simd<$ty, $elem> {
+            type Bytes = Simd<u8, $bytes>;
+
             #[inline]
-            pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+            fn to_ne_bytes(self) -> Self::Bytes {
                 // Safety: transmuting between vectors is safe
-                unsafe { core::mem::transmute_copy(&self) }
+                unsafe { core::mem::transmute(self) }
             }
 
-            /// Return the memory representation of this integer as a byte array in big-endian
-            /// (network) byte order.
             #[inline]
-            pub fn to_be_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
-                let bytes = self.to_ne_bytes();
+            fn to_be_bytes(mut self) -> Self::Bytes {
+                if !cfg!(target_endian = "big") {
+                    self = swap_bytes!($ty, self);
+                }
+                self.to_ne_bytes()
+            }
+
+            #[inline]
+            fn to_le_bytes(mut self) -> Self::Bytes {
+                if !cfg!(target_endian = "little") {
+                    self = swap_bytes!($ty, self);
+                }
+                self.to_ne_bytes()
+            }
+
+            #[inline]
+            fn from_ne_bytes(bytes: Self::Bytes) -> Self {
+                // Safety: transmuting between vectors is safe
+                unsafe { core::mem::transmute(bytes) }
+            }
+
+            #[inline]
+            fn from_be_bytes(bytes: Self::Bytes) -> Self {
+                let ret = Self::from_ne_bytes(bytes);
                 if cfg!(target_endian = "big") {
-                    bytes
+                    ret
                 } else {
-                    bytes.swap_bytes()
+                    swap_bytes!($ty, ret)
                 }
             }
 
-            /// Return the memory representation of this integer as a byte array in little-endian
-            /// byte order.
             #[inline]
-            pub fn to_le_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
-                let bytes = self.to_ne_bytes();
+            fn from_le_bytes(bytes: Self::Bytes) -> Self {
+                let ret = Self::from_ne_bytes(bytes);
                 if cfg!(target_endian = "little") {
-                    bytes
+                    ret
                 } else {
-                    bytes.swap_bytes()
+                    swap_bytes!($ty, ret)
                 }
             }
-
-            /// Create a native endian integer value from its memory representation as a byte array
-            /// in native endianness.
-            #[inline]
-            pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
-                // Safety: transmuting between vectors is safe
-                unsafe { core::mem::transmute_copy(&bytes) }
-            }
-
-            /// Create an integer value from its representation as a byte array in big endian.
-            #[inline]
-            pub fn from_be_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
-                let bytes = if cfg!(target_endian = "big") {
-                    bytes
-                } else {
-                    bytes.swap_bytes()
-                };
-                Self::from_ne_bytes(bytes)
-            }
-
-            /// Create an integer value from its representation as a byte array in little endian.
-            #[inline]
-            pub fn from_le_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
-                let bytes = if cfg!(target_endian = "little") {
-                    bytes
-                } else {
-                    bytes.swap_bytes()
-                };
-                Self::from_ne_bytes(bytes)
-            }
         }
     }
 }
@@ -89,3 +143,6 @@ pub fn from_le_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self
 impl_to_bytes! { isize, 4 }
 #[cfg(target_pointer_width = "64")]
 impl_to_bytes! { isize, 8 }
+
+impl_to_bytes! { f32, 4 }
+impl_to_bytes! { f64, 8 }
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 9f8bad1c36c..7c1d4c7dd3f 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -125,7 +125,6 @@ fn cast_impl<T: core_simd::simd::MaskElement>()
                 cast_impl::<isize>();
             }
 
-            #[cfg(feature = "generic_const_exprs")]
             #[test]
             fn roundtrip_bitmask_array_conversion() {
                 use core_simd::simd::ToBitMaskArray;
diff --git a/crates/core_simd/tests/to_bytes.rs b/crates/core_simd/tests/to_bytes.rs
index 7dd740d65dd..66a7981cdc3 100644
--- a/crates/core_simd/tests/to_bytes.rs
+++ b/crates/core_simd/tests/to_bytes.rs
@@ -1,8 +1,6 @@
-#![feature(portable_simd, generic_const_exprs, adt_const_params)]
-#![allow(incomplete_features)]
-#![cfg(feature = "generic_const_exprs")]
+#![feature(portable_simd)]
 
-use core_simd::simd::Simd;
+use core_simd::simd::{Simd, ToBytes};
 
 #[test]
 fn byte_convert() {

From b070f0f657bbe4e8a24c3731fe6a230fda64cdd0 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 1 Oct 2023 23:34:48 -0400
Subject: [PATCH 38/59] Fix cargo features in CI and enable them for testing

---
 .github/workflows/ci.yml | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ed1589be4f1..90543044ea8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -238,7 +238,7 @@ jobs:
         run: cross test --verbose --target=${{ matrix.target }} --release
 
   features:
-    name: "Check cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
+    name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -249,12 +249,8 @@ jobs:
         features:
           - ""
           - "--features std"
-          - "--features generic_const_exprs"
-          - "--features std --features generic_const_exprs"
           - "--features all_lane_counts"
-          - "--features all_lane_counts --features std"
-          - "--features all_lane_counts --features generic_const_exprs"
-          - "--features all_lane_counts --features std --features generic_const_exprs"
+          - "--all-features"
 
     steps:
       - uses: actions/checkout@v2
@@ -266,9 +262,9 @@ jobs:
         run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
       - name: Check build
         if: ${{ matrix.simd == '' }}
-        run: RUSTFLAGS="-Dwarnings" cargo check --all-targets --no-default-features ${{ matrix.features }}
+        run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
       - name: Check AVX
         if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
         run: |
           echo "Found AVX features: $CPU_FEATURE"
-          RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo check --all-targets --no-default-features ${{ matrix.features }}
+          RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}

From b411cb401d97d128f87c47f3f58f615fa041d879 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 2 Oct 2023 16:15:37 -0400
Subject: [PATCH 39/59] Simplify macro

---
 crates/core_simd/src/to_bytes.rs | 43 +++++++++-----------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index 5fe4a77d50d..07a3efea01a 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -42,38 +42,18 @@ macro_rules! swap_bytes {
 }
 
 macro_rules! impl_to_bytes {
-    { $ty:tt, $size:tt } => {
-        impl_to_bytes! { $ty, $size * 1 }
-        impl_to_bytes! { $ty, $size * 2 }
-        impl_to_bytes! { $ty, $size * 4 }
-        impl_to_bytes! { $ty, $size * 8 }
-        impl_to_bytes! { $ty, $size * 16 }
-        impl_to_bytes! { $ty, $size * 32 }
-        impl_to_bytes! { $ty, $size * 64 }
-    };
+    { $ty:tt, 1  } => { impl_to_bytes! { $ty, 1  * [1, 2, 4, 8, 16, 32, 64] } };
+    { $ty:tt, 2  } => { impl_to_bytes! { $ty, 2  * [1, 2, 4, 8, 16, 32] } };
+    { $ty:tt, 4  } => { impl_to_bytes! { $ty, 4  * [1, 2, 4, 8, 16] } };
+    { $ty:tt, 8  } => { impl_to_bytes! { $ty, 8  * [1, 2, 4, 8] } };
+    { $ty:tt, 16 } => { impl_to_bytes! { $ty, 16 * [1, 2, 4] } };
+    { $ty:tt, 32 } => { impl_to_bytes! { $ty, 32 * [1, 2] } };
+    { $ty:tt, 64 } => { impl_to_bytes! { $ty, 64 * [1] } };
 
-    // multiply element size by number of elements
-    { $ty:tt, 1 * $elems:literal } => { impl_to_bytes! { @impl [$ty; $elems], $elems } };
-    { $ty:tt, $size:literal * 1 } => { impl_to_bytes! { @impl [$ty; 1], $size } };
-    { $ty:tt, 2 * 2  } => { impl_to_bytes! { @impl [$ty; 2], 4  } };
-    { $ty:tt, 2 * 4  } => { impl_to_bytes! { @impl [$ty; 4], 8  } };
-    { $ty:tt, 2 * 8  } => { impl_to_bytes! { @impl [$ty; 8], 16 } };
-    { $ty:tt, 2 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 32 } };
-    { $ty:tt, 2 * 32 } => { impl_to_bytes! { @impl [$ty; 32], 64 } };
-    { $ty:tt, 4 * 2  } => { impl_to_bytes! { @impl [$ty; 2], 8  } };
-    { $ty:tt, 4 * 4  } => { impl_to_bytes! { @impl [$ty; 4], 16 } };
-    { $ty:tt, 4 * 8  } => { impl_to_bytes! { @impl [$ty; 8], 32 } };
-    { $ty:tt, 4 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 64 } };
-    { $ty:tt, 8 * 2  } => { impl_to_bytes! { @impl [$ty; 2], 16 } };
-    { $ty:tt, 8 * 4  } => { impl_to_bytes! { @impl [$ty; 4], 32 } };
-    { $ty:tt, 8 * 8  } => { impl_to_bytes! { @impl [$ty; 8], 64 } };
-
-    // unsupported number of lanes
-    { $ty:ty, $a:literal * $b:literal } => { };
-
-    { @impl [$ty:tt; $elem:literal], $bytes:literal } => {
-        impl ToBytes for Simd<$ty, $elem> {
-            type Bytes = Simd<u8, $bytes>;
+    { $ty:tt, $size:literal * [$($elems:literal),*] } => {
+        $(
+        impl ToBytes for Simd<$ty, $elems> {
+            type Bytes = Simd<u8, { $size * $elems }>;
 
             #[inline]
             fn to_ne_bytes(self) -> Self::Bytes {
@@ -123,6 +103,7 @@ fn from_le_bytes(bytes: Self::Bytes) -> Self {
                 }
             }
         }
+        )*
     }
 }
 

From afe28b13e73da85c3b8e711e24dc709829a142b9 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 2 Oct 2023 16:27:18 -0400
Subject: [PATCH 40/59] Add various bounds

---
 crates/core_simd/src/masks/to_bitmask.rs | 11 ++++++++++-
 crates/core_simd/src/to_bytes.rs         |  9 ++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index 7041d15164d..12cb1771ce1 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -1,5 +1,6 @@
 use super::{mask_impl, Mask, MaskElement};
 use crate::simd::{LaneCount, SupportedLaneCount};
+use core::borrow::{Borrow, BorrowMut};
 
 mod sealed {
     pub trait Sealed {}
@@ -32,7 +33,15 @@ pub trait ToBitMask: Sealed {
 /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte.
 pub trait ToBitMaskArray: Sealed {
     /// The bitmask array.
-    type BitMaskArray;
+    type BitMaskArray: Copy
+        + Unpin
+        + Send
+        + Sync
+        + AsRef<[u8]>
+        + AsMut<[u8]>
+        + Borrow<[u8]>
+        + BorrowMut<[u8]>
+        + 'static;
 
     /// Converts a mask to a bitmask.
     fn to_bitmask_array(self) -> Self::BitMaskArray;
diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index 07a3efea01a..3c93fe47404 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -10,7 +10,14 @@ impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> where LaneCount<N>: S
 /// Convert SIMD vectors to vectors of bytes
 pub trait ToBytes: Sealed {
     /// This type, reinterpreted as bytes.
-    type Bytes;
+    type Bytes: Copy
+        + Unpin
+        + Send
+        + Sync
+        + AsRef<[u8]>
+        + AsMut<[u8]>
+        + SimdUint<Scalar = u8>
+        + 'static;
 
     /// Return the memory representation of this integer as a byte array in native byte
     /// order.

From 6a3c45eea827681ef1e8895f7714226ead61037e Mon Sep 17 00:00:00 2001
From: David Tolnay <dtolnay@gmail.com>
Date: Sat, 14 Oct 2023 14:00:45 -0700
Subject: [PATCH 41/59] Eliminate use of #[cfg_attr(not(doc),
 repr(transparent))]

---
 crates/core_simd/src/masks.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index fea687bdc1a..e04448a50be 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -88,7 +88,7 @@ unsafe impl MaskElement for $ty {}
 /// The layout of this type is unspecified, and may change between platforms
 /// and/or Rust versions, and code should not assume that it is equivalent to
 /// `[T; LANES]`.
-#[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435
+#[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)
 where
     T: MaskElement,

From 596aabe5c7dd00a037a8aa5fd41b929010ebb7ae Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 23 Sep 2023 00:19:36 -0400
Subject: [PATCH 42/59] Add num, ptr, and cmp modules

---
 crates/core_simd/examples/dot_product.rs      |  2 +-
 crates/core_simd/examples/matrix_inversion.rs |  6 ++++--
 crates/core_simd/examples/nbody.rs            |  2 +-
 crates/core_simd/examples/spectral_norm.rs    |  2 +-
 crates/core_simd/src/core_simd_docs.md        |  2 +-
 crates/core_simd/src/masks.rs                 |  4 +++-
 crates/core_simd/src/mod.rs                   | 12 +++++------
 crates/core_simd/src/ops.rs                   |  2 +-
 crates/core_simd/src/simd/cmp.rs              |  7 +++++++
 crates/core_simd/src/{ => simd/cmp}/eq.rs     |  4 +++-
 crates/core_simd/src/{ => simd/cmp}/ord.rs    |  5 ++++-
 .../src/{elements.rs => simd/num.rs}          |  6 ++----
 .../src/{elements => simd/num}/float.rs       | 14 ++++++-------
 .../src/{elements => simd/num}/int.rs         | 20 +++++++++----------
 .../src/{elements => simd/num}/uint.rs        |  6 +++---
 crates/core_simd/src/simd/prelude.rs          |  6 ++++--
 crates/core_simd/src/simd/ptr.rs              | 11 ++++++++++
 .../src/{elements => simd/ptr}/const_ptr.rs   |  4 +++-
 .../src/{elements => simd/ptr}/mut_ptr.rs     |  4 +++-
 crates/core_simd/src/swizzle_dyn.rs           |  4 ++--
 crates/core_simd/src/to_bytes.rs              |  5 ++++-
 crates/core_simd/src/vector.rs                | 18 +++++++++--------
 crates/core_simd/tests/cast.rs                |  2 +-
 crates/core_simd/tests/ops_macros.rs          | 12 +++++------
 crates/core_simd/tests/pointers.rs            |  5 ++++-
 crates/core_simd/tests/round.rs               |  2 +-
 crates/std_float/src/lib.rs                   |  2 +-
 27 files changed, 104 insertions(+), 65 deletions(-)
 create mode 100644 crates/core_simd/src/simd/cmp.rs
 rename crates/core_simd/src/{ => simd/cmp}/eq.rs (96%)
 rename crates/core_simd/src/{ => simd/cmp}/ord.rs (98%)
 rename crates/core_simd/src/{elements.rs => simd/num.rs} (63%)
 rename crates/core_simd/src/{elements => simd/num}/float.rs (98%)
 rename crates/core_simd/src/{elements => simd/num}/int.rs (96%)
 rename crates/core_simd/src/{elements => simd/num}/uint.rs (98%)
 create mode 100644 crates/core_simd/src/simd/ptr.rs
 rename crates/core_simd/src/{elements => simd/ptr}/const_ptr.rs (97%)
 rename crates/core_simd/src/{elements => simd/ptr}/mut_ptr.rs (97%)

diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs
index 391f08f55a0..e5815888bb7 100644
--- a/crates/core_simd/examples/dot_product.rs
+++ b/crates/core_simd/examples/dot_product.rs
@@ -6,7 +6,7 @@
 #![feature(slice_as_chunks)]
 // Add these imports to use the stdsimd library
 #![feature(portable_simd)]
-use core_simd::simd::*;
+use core_simd::simd::prelude::*;
 
 // This is your barebones dot product implementation:
 // Take 2 vectors, multiply them element wise and *then*
diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs
index 39f530f68f5..5176623c160 100644
--- a/crates/core_simd/examples/matrix_inversion.rs
+++ b/crates/core_simd/examples/matrix_inversion.rs
@@ -2,8 +2,10 @@
 // Code ported from the `packed_simd` crate
 // Run this code with `cargo test --example matrix_inversion`
 #![feature(array_chunks, portable_simd)]
-use core_simd::simd::*;
-use Which::*;
+use core_simd::simd::{
+    prelude::*,
+    Which::{self, *},
+};
 
 // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
 #[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs
index df38a00967f..154e24c460e 100644
--- a/crates/core_simd/examples/nbody.rs
+++ b/crates/core_simd/examples/nbody.rs
@@ -5,7 +5,7 @@
 /// Taken from the `packed_simd` crate
 /// Run this benchmark with `cargo test --example nbody`
 mod nbody {
-    use core_simd::simd::*;
+    use core_simd::simd::prelude::*;
     #[allow(unused)] // False positive?
     use std_float::StdFloat;
 
diff --git a/crates/core_simd/examples/spectral_norm.rs b/crates/core_simd/examples/spectral_norm.rs
index d576bd0ccee..bc7934c2522 100644
--- a/crates/core_simd/examples/spectral_norm.rs
+++ b/crates/core_simd/examples/spectral_norm.rs
@@ -1,6 +1,6 @@
 #![feature(portable_simd)]
 
-use core_simd::simd::*;
+use core_simd::simd::prelude::*;
 
 fn a(i: usize, j: usize) -> f64 {
     ((i + j) * (i + j + 1) / 2 + i + 1) as f64
diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md
index 8acdeb04427..fa93155ff5e 100644
--- a/crates/core_simd/src/core_simd_docs.md
+++ b/crates/core_simd/src/core_simd_docs.md
@@ -30,7 +30,7 @@ Instead, they map to a reasonable implementation of the operation for the target
 
 Consistency between targets is not compromised to use faster or fewer instructions.
 In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent.
-For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`].
+For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`].
 When necessary, [`Simd<T, N>`] can be converted to the types provided by `std::arch` to make use of target-specific functions.
 
 Many targets simply don't have SIMD, or don't support SIMD for a particular element type.
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 13ae5088fb9..0a04cf66757 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -15,7 +15,9 @@
 mod to_bitmask;
 pub use to_bitmask::{ToBitMask, ToBitMaskArray};
 
-use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
+use crate::simd::{
+    cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount,
+};
 use core::cmp::Ordering;
 use core::{fmt, mem};
 
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index f489ae36de4..6fd458d24e7 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -5,14 +5,11 @@
 
 mod alias;
 mod cast;
-mod elements;
-mod eq;
 mod fmt;
 mod iter;
 mod lane_count;
 mod masks;
 mod ops;
-mod ord;
 mod select;
 mod swizzle_dyn;
 mod to_bytes;
@@ -24,15 +21,18 @@ pub mod simd {
 
     pub mod prelude;
 
+    pub mod num;
+
+    pub mod ptr;
+
+    pub mod cmp;
+
     pub(crate) use crate::core_simd::intrinsics;
 
     pub use crate::core_simd::alias::*;
     pub use crate::core_simd::cast::*;
-    pub use crate::core_simd::elements::*;
-    pub use crate::core_simd::eq::*;
     pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
     pub use crate::core_simd::masks::*;
-    pub use crate::core_simd::ord::*;
     pub use crate::core_simd::swizzle::*;
     pub use crate::core_simd::swizzle_dyn::*;
     pub use crate::core_simd::to_bytes::ToBytes;
diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index 63a96106283..d1b4a504884 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -1,4 +1,4 @@
-use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
+use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount};
 use core::ops::{Add, Mul};
 use core::ops::{BitAnd, BitOr, BitXor};
 use core::ops::{Div, Rem, Sub};
diff --git a/crates/core_simd/src/simd/cmp.rs b/crates/core_simd/src/simd/cmp.rs
new file mode 100644
index 00000000000..a8d81dbf20f
--- /dev/null
+++ b/crates/core_simd/src/simd/cmp.rs
@@ -0,0 +1,7 @@
+//! Traits for comparing and ordering vectors.
+
+mod eq;
+mod ord;
+
+pub use eq::*;
+pub use ord::*;
diff --git a/crates/core_simd/src/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs
similarity index 96%
rename from crates/core_simd/src/eq.rs
rename to crates/core_simd/src/simd/cmp/eq.rs
index 80763c07272..627ceba3c6f 100644
--- a/crates/core_simd/src/eq.rs
+++ b/crates/core_simd/src/simd/cmp/eq.rs
@@ -1,5 +1,7 @@
 use crate::simd::{
-    intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdElement, SimdMutPtr, SupportedLaneCount,
+    intrinsics,
+    ptr::{SimdConstPtr, SimdMutPtr},
+    LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
 };
 
 /// Parallel `PartialEq`.
diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs
similarity index 98%
rename from crates/core_simd/src/ord.rs
rename to crates/core_simd/src/simd/cmp/ord.rs
index b2455190e82..509f907785c 100644
--- a/crates/core_simd/src/ord.rs
+++ b/crates/core_simd/src/simd/cmp/ord.rs
@@ -1,5 +1,8 @@
 use crate::simd::{
-    intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdMutPtr, SimdPartialEq, SupportedLaneCount,
+    cmp::SimdPartialEq,
+    intrinsics,
+    ptr::{SimdConstPtr, SimdMutPtr},
+    LaneCount, Mask, Simd, SupportedLaneCount,
 };
 
 /// Parallel `PartialOrd`.
diff --git a/crates/core_simd/src/elements.rs b/crates/core_simd/src/simd/num.rs
similarity index 63%
rename from crates/core_simd/src/elements.rs
rename to crates/core_simd/src/simd/num.rs
index dc7f52a4d57..22a4802ec6c 100644
--- a/crates/core_simd/src/elements.rs
+++ b/crates/core_simd/src/simd/num.rs
@@ -1,15 +1,13 @@
-mod const_ptr;
+//! Traits for vectors with numeric elements.
+
 mod float;
 mod int;
-mod mut_ptr;
 mod uint;
 
 mod sealed {
     pub trait Sealed {}
 }
 
-pub use const_ptr::*;
 pub use float::*;
 pub use int::*;
-pub use mut_ptr::*;
 pub use uint::*;
diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/simd/num/float.rs
similarity index 98%
rename from crates/core_simd/src/elements/float.rs
rename to crates/core_simd/src/simd/num/float.rs
index d700011ff9c..affc01d111f 100644
--- a/crates/core_simd/src/elements/float.rs
+++ b/crates/core_simd/src/simd/num/float.rs
@@ -1,7 +1,7 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialEq, SimdPartialOrd,
-    SupportedLaneCount,
+    cmp::{SimdPartialEq, SimdPartialOrd},
+    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
 };
 
 /// Operations on SIMD vectors of floats.
@@ -28,7 +28,7 @@ pub trait SimdFloat: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{SimdFloat, SimdInt, Simd};
+    /// # use simd::prelude::*;
     /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
     /// let ints = floats.cast::<i32>();
     /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
@@ -162,7 +162,7 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{f32x2, SimdFloat};
+    /// # use simd::prelude::*;
     /// let v = f32x2::from_array([1., 2.]);
     /// assert_eq!(v.reduce_sum(), 3.);
     /// ```
@@ -176,7 +176,7 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{f32x2, SimdFloat};
+    /// # use simd::prelude::*;
     /// let v = f32x2::from_array([3., 4.]);
     /// assert_eq!(v.reduce_product(), 12.);
     /// ```
@@ -195,7 +195,7 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{f32x2, SimdFloat};
+    /// # use simd::prelude::*;
     /// let v = f32x2::from_array([1., 2.]);
     /// assert_eq!(v.reduce_max(), 2.);
     ///
@@ -222,7 +222,7 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{f32x2, SimdFloat};
+    /// # use simd::prelude::*;
     /// let v = f32x2::from_array([3., 7.]);
     /// assert_eq!(v.reduce_min(), 3.);
     ///
diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/simd/num/int.rs
similarity index 96%
rename from crates/core_simd/src/elements/int.rs
rename to crates/core_simd/src/simd/num/int.rs
index c341c59545c..d1f8e856a53 100644
--- a/crates/core_simd/src/elements/int.rs
+++ b/crates/core_simd/src/simd/num/int.rs
@@ -1,6 +1,6 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SimdUint,
+    cmp::SimdPartialOrd, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
     SupportedLaneCount,
 };
 
@@ -32,7 +32,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdInt};
+    /// # use simd::prelude::*;
     /// use core::i32::{MIN, MAX};
     /// let x = Simd::from_array([MIN, 0, 1, MAX]);
     /// let max = Simd::splat(MAX);
@@ -50,7 +50,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdInt};
+    /// # use simd::prelude::*;
     /// use core::i32::{MIN, MAX};
     /// let x = Simd::from_array([MIN, -2, -1, MAX]);
     /// let max = Simd::splat(MAX);
@@ -68,7 +68,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdInt};
+    /// # use simd::prelude::*;
     /// use core::i32::{MIN, MAX};
     /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
     /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
@@ -83,7 +83,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdInt};
+    /// # use simd::prelude::*;
     /// use core::i32::{MIN, MAX};
     /// let xs = Simd::from_array([MIN, -2, 0, 3]);
     /// let unsat = xs.abs();
@@ -101,7 +101,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdInt};
+    /// # use simd::prelude::*;
     /// use core::i32::{MIN, MAX};
     /// let x = Simd::from_array([MIN, -2, 3, MAX]);
     /// let unsat = -x;
@@ -131,7 +131,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{i32x4, SimdInt};
+    /// # use simd::prelude::*;
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_sum(), 10);
     ///
@@ -149,7 +149,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{i32x4, SimdInt};
+    /// # use simd::prelude::*;
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_product(), 24);
     ///
@@ -167,7 +167,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{i32x4, SimdInt};
+    /// # use simd::prelude::*;
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_max(), 4);
     /// ```
@@ -181,7 +181,7 @@ pub trait SimdInt: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{i32x4, SimdInt};
+    /// # use simd::prelude::*;
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_min(), 1);
     /// ```
diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/simd/num/uint.rs
similarity index 98%
rename from crates/core_simd/src/elements/uint.rs
rename to crates/core_simd/src/simd/num/uint.rs
index c33059f7d4e..7eadd2050b9 100644
--- a/crates/core_simd/src/elements/uint.rs
+++ b/crates/core_simd/src/simd/num/uint.rs
@@ -29,7 +29,7 @@ pub trait SimdUint: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdUint};
+    /// # use simd::prelude::*;
     /// use core::u32::MAX;
     /// let x = Simd::from_array([2, 1, 0, MAX]);
     /// let max = Simd::splat(MAX);
@@ -47,7 +47,7 @@ pub trait SimdUint: Copy + Sealed {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdUint};
+    /// # use simd::prelude::*;
     /// use core::u32::MAX;
     /// let x = Simd::from_array([2, 1, 0, MAX]);
     /// let max = Simd::splat(MAX);
@@ -122,7 +122,7 @@ fn cast<T: SimdCast>(self) -> Self::Cast<T> {
 
             #[inline]
             fn wrapping_neg(self) -> Self {
-                use crate::simd::SimdInt;
+                use crate::simd::num::SimdInt;
                 (-self.cast::<$signed>()).cast()
             }
 
diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs
index e8fdc932d49..4b7c744c013 100644
--- a/crates/core_simd/src/simd/prelude.rs
+++ b/crates/core_simd/src/simd/prelude.rs
@@ -7,8 +7,10 @@
 
 #[doc(no_inline)]
 pub use super::{
-    simd_swizzle, Mask, Simd, SimdConstPtr, SimdFloat, SimdInt, SimdMutPtr, SimdOrd, SimdPartialEq,
-    SimdPartialOrd, SimdUint,
+    cmp::{SimdOrd, SimdPartialEq, SimdPartialOrd},
+    num::{SimdFloat, SimdInt, SimdUint},
+    ptr::{SimdConstPtr, SimdMutPtr},
+    simd_swizzle, Mask, Simd,
 };
 
 #[rustfmt::skip]
diff --git a/crates/core_simd/src/simd/ptr.rs b/crates/core_simd/src/simd/ptr.rs
new file mode 100644
index 00000000000..3f8e6669118
--- /dev/null
+++ b/crates/core_simd/src/simd/ptr.rs
@@ -0,0 +1,11 @@
+//! Traits for vectors of pointers.
+
+mod const_ptr;
+mod mut_ptr;
+
+mod sealed {
+    pub trait Sealed {}
+}
+
+pub use const_ptr::*;
+pub use mut_ptr::*;
diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs
similarity index 97%
rename from crates/core_simd/src/elements/const_ptr.rs
rename to crates/core_simd/src/simd/ptr/const_ptr.rs
index f215f9a61d0..f82def1d377 100644
--- a/crates/core_simd/src/elements/const_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -1,5 +1,7 @@
 use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount};
+use crate::simd::{
+    cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
+};
 
 /// Operations on SIMD vectors of constant pointers.
 pub trait SimdConstPtr: Copy + Sealed {
diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs
similarity index 97%
rename from crates/core_simd/src/elements/mut_ptr.rs
rename to crates/core_simd/src/simd/ptr/mut_ptr.rs
index 4bdc6a14ce4..283054dc8ce 100644
--- a/crates/core_simd/src/elements/mut_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -1,5 +1,7 @@
 use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount};
+use crate::simd::{
+    cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
+};
 
 /// Operations on SIMD vectors of mutable pointers.
 pub trait SimdMutPtr: Copy + Sealed {
diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs
index ce621792534..bd8a38e350d 100644
--- a/crates/core_simd/src/swizzle_dyn.rs
+++ b/crates/core_simd/src/swizzle_dyn.rs
@@ -86,7 +86,7 @@ pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
 #[inline]
 #[allow(clippy::let_and_return)]
 unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
-    use crate::simd::SimdPartialOrd;
+    use crate::simd::cmp::SimdPartialOrd;
     #[cfg(target_arch = "x86")]
     use core::arch::x86;
     #[cfg(target_arch = "x86_64")]
@@ -149,7 +149,7 @@ fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
     // On x86, make sure the top bit is set.
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     let idxs = {
-        use crate::simd::SimdPartialOrd;
+        use crate::simd::cmp::SimdPartialOrd;
         idxs.simd_lt(Simd::splat(N as u8))
             .select(idxs, Simd::splat(u8::MAX))
     };
diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index 3c93fe47404..dd01929551c 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -1,4 +1,7 @@
-use crate::simd::{LaneCount, Simd, SimdElement, SimdFloat, SimdInt, SimdUint, SupportedLaneCount};
+use crate::simd::{
+    num::{SimdFloat, SimdInt, SimdUint},
+    LaneCount, Simd, SimdElement, SupportedLaneCount,
+};
 
 mod sealed {
     use super::*;
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 9aa7bacfce9..70188337444 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -1,6 +1,8 @@
 use crate::simd::{
-    intrinsics, LaneCount, Mask, MaskElement, SimdConstPtr, SimdMutPtr, SimdPartialOrd,
-    SupportedLaneCount, Swizzle,
+    cmp::SimdPartialOrd,
+    intrinsics,
+    ptr::{SimdConstPtr, SimdMutPtr},
+    LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
 };
 use core::convert::{TryFrom, TryInto};
 
@@ -394,7 +396,7 @@ pub fn gather_select(
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdPartialOrd, Mask};
+    /// # use simd::{Simd, cmp::SimdPartialOrd, Mask};
     /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index
     /// let alt = Simd::from_array([-5, -4, -3, -2]);
@@ -434,7 +436,7 @@ pub unsafe fn gather_select_unchecked(
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdConstPtr};
+    /// # use simd::prelude::*;
     /// let values = [6, 2, 4, 9];
     /// let offsets = Simd::from_array([1, 0, 0, 3]);
     /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets);
@@ -467,7 +469,7 @@ pub unsafe fn gather_ptr(source: Simd<*const T, N>) -> Self
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Mask, Simd, SimdConstPtr};
+    /// # use simd::prelude::*;
     /// let values = [6, 2, 4, 9];
     /// let enable = Mask::from_array([true, true, false, true]);
     /// let offsets = Simd::from_array([1, 0, 0, 3]);
@@ -550,7 +552,7 @@ pub fn scatter_select(self, slice: &mut [T], enable: Mask<isize, N>, idxs: Simd<
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdPartialOrd, Mask};
+    /// # use simd::{Simd, cmp::SimdPartialOrd, Mask};
     /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 0]);
     /// let vals = Simd::from_array([-27, 82, -41, 124]);
@@ -604,7 +606,7 @@ pub unsafe fn scatter_select_unchecked(
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Simd, SimdMutPtr};
+    /// # use simd::{Simd, ptr::SimdMutPtr};
     /// let mut values = [0; 4];
     /// let offset = Simd::from_array([3, 2, 1, 0]);
     /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset);
@@ -631,7 +633,7 @@ pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, N>) {
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "as_crate")] use core_simd::simd;
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{Mask, Simd, SimdMutPtr};
+    /// # use simd::{Mask, Simd, ptr::SimdMutPtr};
     /// let mut values = [0; 4];
     /// let offset = Simd::from_array([3, 2, 1, 0]);
     /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset);
diff --git a/crates/core_simd/tests/cast.rs b/crates/core_simd/tests/cast.rs
index 00545936ea2..185e1945faa 100644
--- a/crates/core_simd/tests/cast.rs
+++ b/crates/core_simd/tests/cast.rs
@@ -3,7 +3,7 @@ macro_rules! cast_types {
     ($start:ident, $($target:ident),*) => {
         mod $start {
             #[allow(unused)]
-            use core_simd::simd::{Simd, SimdInt, SimdUint, SimdFloat};
+            use core_simd::simd::prelude::*;
             type Vector<const N: usize> = Simd<$start, N>;
             $(
                 mod $target {
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 40aba2fd6cb..50faba04991 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -254,7 +254,7 @@ fn trailing_ones<const LANES: usize>() {
 macro_rules! impl_signed_tests {
     { $scalar:tt } => {
         mod $scalar {
-            use core_simd::simd::SimdInt;
+            use core_simd::simd::num::SimdInt;
             type Vector<const LANES: usize> = core_simd::simd::Simd<Scalar, LANES>;
             type Scalar = $scalar;
 
@@ -306,7 +306,7 @@ fn rem_min_may_overflow<const LANES: usize>() {
                 }
 
                 fn simd_min<const LANES: usize>() {
-                    use core_simd::simd::SimdOrd;
+                    use core_simd::simd::cmp::SimdOrd;
                     let a = Vector::<LANES>::splat(Scalar::MIN);
                     let b = Vector::<LANES>::splat(0);
                     assert_eq!(a.simd_min(b), a);
@@ -316,7 +316,7 @@ fn simd_min<const LANES: usize>() {
                 }
 
                 fn simd_max<const LANES: usize>() {
-                    use core_simd::simd::SimdOrd;
+                    use core_simd::simd::cmp::SimdOrd;
                     let a = Vector::<LANES>::splat(Scalar::MIN);
                     let b = Vector::<LANES>::splat(0);
                     assert_eq!(a.simd_max(b), b);
@@ -326,7 +326,7 @@ fn simd_max<const LANES: usize>() {
                 }
 
                 fn simd_clamp<const LANES: usize>() {
-                    use core_simd::simd::SimdOrd;
+                    use core_simd::simd::cmp::SimdOrd;
                     let min = Vector::<LANES>::splat(Scalar::MIN);
                     let max = Vector::<LANES>::splat(Scalar::MAX);
                     let zero = Vector::<LANES>::splat(0);
@@ -395,7 +395,7 @@ fn rem_neg_one_no_panic<const LANES: usize>() {
 macro_rules! impl_unsigned_tests {
     { $scalar:tt } => {
         mod $scalar {
-            use core_simd::simd::SimdUint;
+            use core_simd::simd::num::SimdUint;
             type Vector<const LANES: usize> = core_simd::simd::Simd<Scalar, LANES>;
             type Scalar = $scalar;
 
@@ -440,7 +440,7 @@ fn wrapping_neg<const LANES: usize>() {
 macro_rules! impl_float_tests {
     { $scalar:tt, $int_scalar:tt } => {
         mod $scalar {
-            use core_simd::simd::SimdFloat;
+            use core_simd::simd::num::SimdFloat;
             type Vector<const LANES: usize> = core_simd::simd::Simd<Scalar, LANES>;
             type Scalar = $scalar;
 
diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs
index 0ae8f83b8b9..a90ff928ced 100644
--- a/crates/core_simd/tests/pointers.rs
+++ b/crates/core_simd/tests/pointers.rs
@@ -1,6 +1,9 @@
 #![feature(portable_simd, strict_provenance)]
 
-use core_simd::simd::{Simd, SimdConstPtr, SimdMutPtr};
+use core_simd::simd::{
+    ptr::{SimdConstPtr, SimdMutPtr},
+    Simd,
+};
 
 macro_rules! common_tests {
     { $constness:ident } => {
diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs
index 191c39e2370..847766ec41e 100644
--- a/crates/core_simd/tests/round.rs
+++ b/crates/core_simd/tests/round.rs
@@ -53,7 +53,7 @@ fn fract<const LANES: usize>() {
 
             test_helpers::test_lanes! {
                 fn to_int_unchecked<const LANES: usize>() {
-                    use core_simd::simd::SimdFloat;
+                    use core_simd::simd::num::SimdFloat;
                     // The maximum integer that can be represented by the equivalently sized float has
                     // all of the mantissa digits set to 1, pushed up to the MSB.
                     const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1);
diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs
index bb2b5a2dbba..1fef17242ca 100644
--- a/crates/std_float/src/lib.rs
+++ b/crates/std_float/src/lib.rs
@@ -149,7 +149,7 @@ fn fract(self) -> Self {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use simd::*;
+    use simd::prelude::*;
 
     #[test]
     fn everything_works() {

From 4fc3ce733d647deb2c537856eb142c6208e2b9f1 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 1 Oct 2023 12:31:39 -0400
Subject: [PATCH 43/59] Simplify Swizzle trait and condense all swizzles into
 this trait

---
 crates/core_simd/examples/matrix_inversion.rs |  13 +-
 crates/core_simd/src/lib.rs                   |   1 +
 crates/core_simd/src/swizzle.rs               | 349 ++++++++----------
 crates/core_simd/src/vector.rs                |   4 +-
 crates/core_simd/tests/swizzle.rs             |   4 +-
 5 files changed, 174 insertions(+), 197 deletions(-)

diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs
index 5176623c160..faf4a44467d 100644
--- a/crates/core_simd/examples/matrix_inversion.rs
+++ b/crates/core_simd/examples/matrix_inversion.rs
@@ -2,10 +2,7 @@
 // Code ported from the `packed_simd` crate
 // Run this code with `cargo test --example matrix_inversion`
 #![feature(array_chunks, portable_simd)]
-use core_simd::simd::{
-    prelude::*,
-    Which::{self, *},
-};
+use core_simd::simd::prelude::*;
 
 // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
 #[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
@@ -166,10 +163,10 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
     let m_2 = f32x4::from_array(m[2]);
     let m_3 = f32x4::from_array(m[3]);
 
-    const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
-    const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
-    const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
-    const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
+    const SHUFFLE01: [usize; 4] = [0, 1, 4, 5];
+    const SHUFFLE02: [usize; 4] = [0, 2, 4, 6];
+    const SHUFFLE13: [usize; 4] = [1, 3, 5, 7];
+    const SHUFFLE23: [usize; 4] = [2, 3, 6, 7];
 
     let tmp = simd_swizzle!(m_0, m_1, SHUFFLE01);
     let row1 = simd_swizzle!(m_2, m_3, SHUFFLE01);
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index dd3c546e014..64ba9705ef5 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -5,6 +5,7 @@
     const_mut_refs,
     convert_float_to_int,
     decl_macro,
+    inline_const,
     intra_doc_pointers,
     platform_intrinsics,
     repr_simd,
diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index 68f20516cf5..fb257e34cf9 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -1,17 +1,15 @@
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
 
-/// Constructs a new SIMD vector by copying elements from selected lanes in other vectors.
+/// Constructs a new SIMD vector by copying elements from selected elements in other vectors.
 ///
-/// When swizzling one vector, lanes are selected by a `const` array of `usize`,
-/// like [`Swizzle`].
+/// When swizzling one vector, elements are selected like [`Swizzle::swizzle`].
 ///
-/// When swizzling two vectors, lanes are selected by a `const` array of [`Which`],
-/// like [`Swizzle2`].
+/// When swizzling two vectors, elements are selected like [`Swizzle::concat_swizzle`].
 ///
 /// # Examples
 ///
-/// With a single SIMD vector, the const array specifies lane indices in that vector:
+/// With a single SIMD vector, the const array specifies element indices in that vector:
 /// ```
 /// # #![feature(portable_simd)]
 /// # use core::simd::{u32x2, u32x4, simd_swizzle};
@@ -21,25 +19,27 @@
 /// let r: u32x4 = simd_swizzle!(v, [3, 0, 1, 2]);
 /// assert_eq!(r.to_array(), [13, 10, 11, 12]);
 ///
-/// // Changing the number of lanes
+/// // Changing the number of elements
 /// let r: u32x2 = simd_swizzle!(v, [3, 1]);
 /// assert_eq!(r.to_array(), [13, 11]);
 /// ```
 ///
-/// With two input SIMD vectors, the const array uses `Which` to specify the source of each index:
+/// With two input SIMD vectors, the const array specifies element indices in the concatenation of
+/// those vectors:
 /// ```
 /// # #![feature(portable_simd)]
-/// # use core::simd::{u32x2, u32x4, simd_swizzle, Which};
-/// use Which::{First, Second};
+/// # #[cfg(feature = "as_crate")] use core_simd::simd;
+/// # #[cfg(not(feature = "as_crate"))] use core::simd;
+/// # use simd::{u32x2, u32x4, simd_swizzle};
 /// let a = u32x4::from_array([0, 1, 2, 3]);
 /// let b = u32x4::from_array([4, 5, 6, 7]);
 ///
 /// // Keeping the same size
-/// let r: u32x4 = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]);
+/// let r: u32x4 = simd_swizzle!(a, b, [0, 1, 6, 7]);
 /// assert_eq!(r.to_array(), [0, 1, 6, 7]);
 ///
-/// // Changing the number of lanes
-/// let r: u32x2 = simd_swizzle!(a, b, [First(0), Second(0)]);
+/// // Changing the number of elements
+/// let r: u32x2 = simd_swizzle!(a, b, [0, 4]);
 /// assert_eq!(r.to_array(), [0, 4]);
 /// ```
 #[allow(unused_macros)]
@@ -50,7 +50,7 @@
         {
             use $crate::simd::Swizzle;
             struct Impl;
-            impl<const LANES: usize> Swizzle<LANES, {$index.len()}> for Impl {
+            impl Swizzle<{$index.len()}> for Impl {
                 const INDEX: [usize; {$index.len()}] = $index;
             }
             Impl::swizzle($vector)
@@ -60,127 +60,117 @@ impl<const LANES: usize> Swizzle<LANES, {$index.len()}> for Impl {
         $first:expr, $second:expr, $index:expr $(,)?
     ) => {
         {
-            use $crate::simd::{Which, Swizzle2};
+            use $crate::simd::Swizzle;
             struct Impl;
-            impl<const LANES: usize> Swizzle2<LANES, {$index.len()}> for Impl {
-                const INDEX: [Which; {$index.len()}] = $index;
+            impl Swizzle<{$index.len()}> for Impl {
+                const INDEX: [usize; {$index.len()}] = $index;
             }
-            Impl::swizzle2($first, $second)
+            Impl::concat_swizzle($first, $second)
         }
     }
 }
 
-/// Specifies a lane index into one of two SIMD vectors.
-///
-/// This is an input type for [Swizzle2] and helper macros like [simd_swizzle].
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum Which {
-    /// Index of a lane in the first input SIMD vector.
-    First(usize),
-    /// Index of a lane in the second input SIMD vector.
-    Second(usize),
-}
-
 /// Create a vector from the elements of another vector.
-pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
-    /// Map from the lanes of the input vector to the output vector.
-    const INDEX: [usize; OUTPUT_LANES];
+pub trait Swizzle<const N: usize> {
+    /// Map from the elements of the input vector to the output vector.
+    const INDEX: [usize; N];
 
-    /// Create a new vector from the lanes of `vector`.
+    /// Create a new vector from the elements of `vector`.
     ///
     /// Lane `i` of the output is `vector[Self::INDEX[i]]`.
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
+    fn swizzle<T, const M: usize>(vector: Simd<T, M>) -> Simd<T, N>
     where
         T: SimdElement,
-        LaneCount<INPUT_LANES>: SupportedLaneCount,
-        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+        LaneCount<N>: SupportedLaneCount,
+        LaneCount<M>: SupportedLaneCount,
     {
-        // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
-        unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
+        // Safety: `vector` is a vector, and the index is a const array of u32.
+        unsafe {
+            intrinsics::simd_shuffle(
+                vector,
+                vector,
+                const {
+                    let mut output = [0; N];
+                    let mut i = 0;
+                    while i < N {
+                        let index = Self::INDEX[i];
+                        assert!(index as u32 as usize == index);
+                        assert!(index < M, "source element index exceeds input vector length");
+                        output[i] = index as u32;
+                        i += 1;
+                    }
+                    output
+                },
+            )
+        }
     }
-}
 
-/// Create a vector from the elements of two other vectors.
-pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
-    /// Map from the lanes of the input vectors to the output vector
-    const INDEX: [Which; OUTPUT_LANES];
-
-    /// Create a new vector from the lanes of `first` and `second`.
+    /// Create a new vector from the elements of `first` and `second`.
     ///
-    /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is
-    /// `Second(j)`.
+    /// Lane `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
+    /// `first` and `second`.
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    fn swizzle2<T>(
-        first: Simd<T, INPUT_LANES>,
-        second: Simd<T, INPUT_LANES>,
-    ) -> Simd<T, OUTPUT_LANES>
+    fn concat_swizzle<T, const M: usize>(first: Simd<T, M>, second: Simd<T, M>) -> Simd<T, N>
     where
         T: SimdElement,
-        LaneCount<INPUT_LANES>: SupportedLaneCount,
-        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+        LaneCount<N>: SupportedLaneCount,
+        LaneCount<M>: SupportedLaneCount,
     {
-        // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
-        unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
+        // Safety: `first` and `second` are vectors, and the index is a const array of u32.
+        unsafe {
+            intrinsics::simd_shuffle(
+                first,
+                second,
+                const {
+                    let mut output = [0; N];
+                    let mut i = 0;
+                    while i < N {
+                        let index = Self::INDEX[i];
+                        assert!(index as u32 as usize == index);
+                        assert!(index < 2 * M, "source element index exceeds input vector length");
+                        output[i] = index as u32;
+                        i += 1;
+                    }
+                    output
+                },
+            )
+        }
     }
-}
 
-/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
-/// This trait hides `INDEX_IMPL` from the public API.
-trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
-    const INDEX_IMPL: [u32; OUTPUT_LANES];
-}
+    /// Create a new mask from the elements of `first` and `second`.
+    ///
+    /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
+    /// `first` and `second`.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original inputs"]
+    fn swizzle_mask<T, const M: usize>(vector: Mask<T, M>) -> Mask<T, N>
+    where
+        T: MaskElement,
+        LaneCount<N>: SupportedLaneCount,
+        LaneCount<M>: SupportedLaneCount,
+    {
+        // SAFETY: all elements of this mask come from another mask
+        unsafe { Mask::from_int_unchecked(Self::swizzle(vector.to_int())) }
+    }
 
-impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES>
-    for T
-where
-    T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized,
-{
-    const INDEX_IMPL: [u32; OUTPUT_LANES] = {
-        let mut output = [0; OUTPUT_LANES];
-        let mut i = 0;
-        while i < OUTPUT_LANES {
-            let index = Self::INDEX[i];
-            assert!(index as u32 as usize == index);
-            assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
-            output[i] = index as u32;
-            i += 1;
-        }
-        output
-    };
-}
-
-/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
-/// This trait hides `INDEX_IMPL` from the public API.
-trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
-    const INDEX_IMPL: [u32; OUTPUT_LANES];
-}
-
-impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES>
-    for T
-where
-    T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized,
-{
-    const INDEX_IMPL: [u32; OUTPUT_LANES] = {
-        let mut output = [0; OUTPUT_LANES];
-        let mut i = 0;
-        while i < OUTPUT_LANES {
-            let (offset, index) = match Self::INDEX[i] {
-                Which::First(index) => (false, index),
-                Which::Second(index) => (true, index),
-            };
-            assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
-
-            // lanes are indexed by the first vector, then second vector
-            let index = if offset { index + INPUT_LANES } else { index };
-            assert!(index as u32 as usize == index);
-            output[i] = index as u32;
-            i += 1;
-        }
-        output
-    };
+    /// Create a new mask from the elements of `first` and `second`.
+    ///
+    /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
+    /// `first` and `second`.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original inputs"]
+    fn concat_swizzle_mask<T, const M: usize>(first: Mask<T, M>, second: Mask<T, M>) -> Mask<T, N>
+    where
+        T: MaskElement,
+        LaneCount<N>: SupportedLaneCount,
+        LaneCount<M>: SupportedLaneCount,
+    {
+        // SAFETY: all elements of this mask come from another mask
+        unsafe { Mask::from_int_unchecked(Self::concat_swizzle(first.to_int(), second.to_int())) }
+    }
 }
 
 impl<T, const LANES: usize> Simd<T, LANES>
@@ -188,24 +178,22 @@ impl<T, const LANES: usize> Simd<T, LANES>
     T: SimdElement,
     LaneCount<LANES>: SupportedLaneCount,
 {
-    /// Reverse the order of the lanes in the vector.
+    /// Reverse the order of the elements in the vector.
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
     pub fn reverse(self) -> Self {
-        const fn reverse_index<const LANES: usize>() -> [usize; LANES] {
-            let mut index = [0; LANES];
-            let mut i = 0;
-            while i < LANES {
-                index[i] = LANES - i - 1;
-                i += 1;
-            }
-            index
-        }
-
         struct Reverse;
 
-        impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
-            const INDEX: [usize; LANES] = reverse_index::<LANES>();
+        impl<const N: usize> Swizzle<N> for Reverse {
+            const INDEX: [usize; N] = const {
+                let mut index = [0; N];
+                let mut i = 0;
+                while i < N {
+                    index[i] = N - i - 1;
+                    i += 1;
+                }
+                index
+            };
         }
 
         Reverse::swizzle(self)
@@ -217,21 +205,19 @@ impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
     pub fn rotate_lanes_left<const OFFSET: usize>(self) -> Self {
-        const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
-            let offset = OFFSET % LANES;
-            let mut index = [0; LANES];
-            let mut i = 0;
-            while i < LANES {
-                index[i] = (i + offset) % LANES;
-                i += 1;
-            }
-            index
-        }
-
         struct Rotate<const OFFSET: usize>;
 
-        impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
-            const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+        impl<const OFFSET: usize, const N: usize> Swizzle<N> for Rotate<OFFSET> {
+            const INDEX: [usize; N] = const {
+                let offset = OFFSET % N;
+                let mut index = [0; N];
+                let mut i = 0;
+                while i < N {
+                    index[i] = (i + offset) % N;
+                    i += 1;
+                }
+                index
+            };
         }
 
         Rotate::<OFFSET>::swizzle(self)
@@ -243,21 +229,19 @@ impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<O
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
     pub fn rotate_lanes_right<const OFFSET: usize>(self) -> Self {
-        const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
-            let offset = LANES - OFFSET % LANES;
-            let mut index = [0; LANES];
-            let mut i = 0;
-            while i < LANES {
-                index[i] = (i + offset) % LANES;
-                i += 1;
-            }
-            index
-        }
-
         struct Rotate<const OFFSET: usize>;
 
-        impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
-            const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+        impl<const OFFSET: usize, const N: usize> Swizzle<N> for Rotate<OFFSET> {
+            const INDEX: [usize; N] = const {
+                let offset = N - OFFSET % N;
+                let mut index = [0; N];
+                let mut i = 0;
+                while i < N {
+                    index[i] = (i + offset) % N;
+                    i += 1;
+                }
+                index
+            };
         }
 
         Rotate::<OFFSET>::swizzle(self)
@@ -265,7 +249,7 @@ impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<O
 
     /// Interleave two vectors.
     ///
-    /// The resulting vectors contain lanes taken alternatively from `self` and `other`, first
+    /// The resulting vectors contain elements taken alternatively from `self` and `other`, first
     /// filling the first result, and then the second.
     ///
     /// The reverse of this operation is [`Simd::deinterleave`].
@@ -282,18 +266,13 @@ impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<O
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
     pub fn interleave(self, other: Self) -> (Self, Self) {
-        const fn interleave<const LANES: usize>(high: bool) -> [Which; LANES] {
-            let mut idx = [Which::First(0); LANES];
+        const fn interleave<const N: usize>(high: bool) -> [usize; N] {
+            let mut idx = [0; N];
             let mut i = 0;
-            while i < LANES {
-                // Treat the source as a concatenated vector
-                let dst_index = if high { i + LANES } else { i };
-                let src_index = dst_index / 2 + (dst_index % 2) * LANES;
-                idx[i] = if src_index < LANES {
-                    Which::First(src_index)
-                } else {
-                    Which::Second(src_index % LANES)
-                };
+            while i < N {
+                let dst_index = if high { i + N } else { i };
+                let src_index = dst_index / 2 + (dst_index % 2) * N;
+                idx[i] = src_index;
                 i += 1;
             }
             idx
@@ -302,24 +281,27 @@ pub fn interleave(self, other: Self) -> (Self, Self) {
         struct Lo;
         struct Hi;
 
-        impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo {
-            const INDEX: [Which; LANES] = interleave::<LANES>(false);
+        impl<const N: usize> Swizzle<N> for Lo {
+            const INDEX: [usize; N] = interleave::<N>(false);
         }
 
-        impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
-            const INDEX: [Which; LANES] = interleave::<LANES>(true);
+        impl<const N: usize> Swizzle<N> for Hi {
+            const INDEX: [usize; N] = interleave::<N>(true);
         }
 
-        (Lo::swizzle2(self, other), Hi::swizzle2(self, other))
+        (
+            Lo::concat_swizzle(self, other),
+            Hi::concat_swizzle(self, other),
+        )
     }
 
     /// Deinterleave two vectors.
     ///
-    /// The first result takes every other lane of `self` and then `other`, starting with
-    /// the first lane.
+    /// The first result takes every other element of `self` and then `other`, starting with
+    /// the first element.
     ///
-    /// The second result takes every other lane of `self` and then `other`, starting with
-    /// the second lane.
+    /// The second result takes every other element of `self` and then `other`, starting with
+    /// the second element.
     ///
     /// The reverse of this operation is [`Simd::interleave`].
     ///
@@ -335,17 +317,11 @@ impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
     pub fn deinterleave(self, other: Self) -> (Self, Self) {
-        const fn deinterleave<const LANES: usize>(second: bool) -> [Which; LANES] {
-            let mut idx = [Which::First(0); LANES];
+        const fn deinterleave<const N: usize>(second: bool) -> [usize; N] {
+            let mut idx = [0; N];
             let mut i = 0;
-            while i < LANES {
-                // Treat the source as a concatenated vector
-                let src_index = i * 2 + second as usize;
-                idx[i] = if src_index < LANES {
-                    Which::First(src_index)
-                } else {
-                    Which::Second(src_index % LANES)
-                };
+            while i < N {
+                idx[i] = i * 2 + second as usize;
                 i += 1;
             }
             idx
@@ -354,14 +330,17 @@ pub fn deinterleave(self, other: Self) -> (Self, Self) {
         struct Even;
         struct Odd;
 
-        impl<const LANES: usize> Swizzle2<LANES, LANES> for Even {
-            const INDEX: [Which; LANES] = deinterleave::<LANES>(false);
+        impl<const N: usize> Swizzle<N> for Even {
+            const INDEX: [usize; N] = deinterleave::<N>(false);
         }
 
-        impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd {
-            const INDEX: [Which; LANES] = deinterleave::<LANES>(true);
+        impl<const N: usize> Swizzle<N> for Odd {
+            const INDEX: [usize; N] = deinterleave::<N>(true);
         }
 
-        (Even::swizzle2(self, other), Odd::swizzle2(self, other))
+        (
+            Even::concat_swizzle(self, other),
+            Odd::concat_swizzle(self, other),
+        )
     }
 }
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 70188337444..18a0bb0a77e 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -144,10 +144,10 @@ pub fn splat(value: T) -> Self {
         // This is preferred over `[value; N]`, since it's explicitly a splat:
         // https://github.com/rust-lang/rust/issues/97804
         struct Splat;
-        impl<const N: usize> Swizzle<1, N> for Splat {
+        impl<const N: usize> Swizzle<N> for Splat {
             const INDEX: [usize; N] = [0; N];
         }
-        Splat::swizzle(Simd::<T, 1>::from([value]))
+        Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
     }
 
     /// Returns an array reference containing the entire SIMD vector.
diff --git a/crates/core_simd/tests/swizzle.rs b/crates/core_simd/tests/swizzle.rs
index 8cd7c33e823..46aaf748ad8 100644
--- a/crates/core_simd/tests/swizzle.rs
+++ b/crates/core_simd/tests/swizzle.rs
@@ -11,10 +11,10 @@
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn swizzle() {
     struct Index;
-    impl Swizzle<4, 4> for Index {
+    impl Swizzle<4> for Index {
         const INDEX: [usize; 4] = [2, 1, 3, 0];
     }
-    impl Swizzle<4, 2> for Index {
+    impl Swizzle<2> for Index {
         const INDEX: [usize; 2] = [1, 1];
     }
 

From 7a7faf6cc6849a48a4ba2c82a64be3855e1460fa Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 1 Oct 2023 13:59:11 -0400
Subject: [PATCH 44/59] Fix formatting

---
 crates/core_simd/src/swizzle.rs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index fb257e34cf9..48aebba91fd 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -97,7 +97,10 @@ fn swizzle<T, const M: usize>(vector: Simd<T, M>) -> Simd<T, N>
                     while i < N {
                         let index = Self::INDEX[i];
                         assert!(index as u32 as usize == index);
-                        assert!(index < M, "source element index exceeds input vector length");
+                        assert!(
+                            index < M,
+                            "source element index exceeds input vector length"
+                        );
                         output[i] = index as u32;
                         i += 1;
                     }
@@ -130,7 +133,10 @@ fn concat_swizzle<T, const M: usize>(first: Simd<T, M>, second: Simd<T, M>) -> S
                     while i < N {
                         let index = Self::INDEX[i];
                         assert!(index as u32 as usize == index);
-                        assert!(index < 2 * M, "source element index exceeds input vector length");
+                        assert!(
+                            index < 2 * M,
+                            "source element index exceeds input vector length"
+                        );
                         output[i] = index as u32;
                         i += 1;
                     }

From 6e0de1983ca1861ca900c9a1b63b7b62e8babd02 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 20 Oct 2023 19:15:22 -0400
Subject: [PATCH 45/59] Fix variable and comment

---
 crates/core_simd/src/swizzle.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index 48aebba91fd..ed4bd72b9a5 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -146,20 +146,20 @@ fn concat_swizzle<T, const M: usize>(first: Simd<T, M>, second: Simd<T, M>) -> S
         }
     }
 
-    /// Create a new mask from the elements of `first` and `second`.
+    /// Create a new mask from the elements of `mask`.
     ///
     /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
     /// `first` and `second`.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original inputs"]
-    fn swizzle_mask<T, const M: usize>(vector: Mask<T, M>) -> Mask<T, N>
+    fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N>
     where
         T: MaskElement,
         LaneCount<N>: SupportedLaneCount,
         LaneCount<M>: SupportedLaneCount,
     {
         // SAFETY: all elements of this mask come from another mask
-        unsafe { Mask::from_int_unchecked(Self::swizzle(vector.to_int())) }
+        unsafe { Mask::from_int_unchecked(Self::swizzle(mask.to_int())) }
     }
 
     /// Create a new mask from the elements of `first` and `second`.

From b962b612e02fb7a4585adf5f1753771687aa8e06 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 20 Oct 2023 20:44:47 -0400
Subject: [PATCH 46/59] Change lane to element in documentation

---
 crates/core_simd/examples/matrix_inversion.rs | 38 ++++++------
 crates/core_simd/src/masks.rs                 | 60 +++++++++----------
 crates/core_simd/src/masks/to_bitmask.rs      |  4 +-
 crates/core_simd/src/select.rs                | 12 ++--
 crates/core_simd/src/simd/cmp/eq.rs           |  4 +-
 crates/core_simd/src/simd/cmp/ord.rs          | 26 ++++----
 crates/core_simd/src/simd/num/float.rs        | 60 +++++++++----------
 crates/core_simd/src/simd/num/int.rs          | 22 +++----
 crates/core_simd/src/simd/num/uint.rs         | 14 ++---
 crates/core_simd/src/simd/ptr/const_ptr.rs    | 24 ++++----
 crates/core_simd/src/simd/ptr/mut_ptr.rs      | 24 ++++----
 crates/core_simd/src/swizzle.rs               | 14 ++---
 crates/core_simd/tests/swizzle.rs             | 24 ++++----
 13 files changed, 163 insertions(+), 163 deletions(-)

diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs
index faf4a44467d..bad86414401 100644
--- a/crates/core_simd/examples/matrix_inversion.rs
+++ b/crates/core_simd/examples/matrix_inversion.rs
@@ -179,58 +179,58 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
     let row2 = simd_swizzle!(tmp, row3, SHUFFLE02);
     let row3 = simd_swizzle!(row3, tmp, SHUFFLE13);
 
-    let tmp = (row2 * row3).reverse().rotate_lanes_right::<2>();
+    let tmp = (row2 * row3).reverse().rotate_elements_right::<2>();
     let minor0 = row1 * tmp;
     let minor1 = row0 * tmp;
-    let tmp = tmp.rotate_lanes_right::<2>();
+    let tmp = tmp.rotate_elements_right::<2>();
     let minor0 = (row1 * tmp) - minor0;
     let minor1 = (row0 * tmp) - minor1;
-    let minor1 = minor1.rotate_lanes_right::<2>();
+    let minor1 = minor1.rotate_elements_right::<2>();
 
-    let tmp = (row1 * row2).reverse().rotate_lanes_right::<2>();
+    let tmp = (row1 * row2).reverse().rotate_elements_right::<2>();
     let minor0 = (row3 * tmp) + minor0;
     let minor3 = row0 * tmp;
-    let tmp = tmp.rotate_lanes_right::<2>();
+    let tmp = tmp.rotate_elements_right::<2>();
 
     let minor0 = minor0 - row3 * tmp;
     let minor3 = row0 * tmp - minor3;
-    let minor3 = minor3.rotate_lanes_right::<2>();
+    let minor3 = minor3.rotate_elements_right::<2>();
 
-    let tmp = (row3 * row1.rotate_lanes_right::<2>())
+    let tmp = (row3 * row1.rotate_elements_right::<2>())
         .reverse()
-        .rotate_lanes_right::<2>();
-    let row2 = row2.rotate_lanes_right::<2>();
+        .rotate_elements_right::<2>();
+    let row2 = row2.rotate_elements_right::<2>();
     let minor0 = row2 * tmp + minor0;
     let minor2 = row0 * tmp;
-    let tmp = tmp.rotate_lanes_right::<2>();
+    let tmp = tmp.rotate_elements_right::<2>();
     let minor0 = minor0 - row2 * tmp;
     let minor2 = row0 * tmp - minor2;
-    let minor2 = minor2.rotate_lanes_right::<2>();
+    let minor2 = minor2.rotate_elements_right::<2>();
 
-    let tmp = (row0 * row1).reverse().rotate_lanes_right::<2>();
+    let tmp = (row0 * row1).reverse().rotate_elements_right::<2>();
     let minor2 = minor2 + row3 * tmp;
     let minor3 = row2 * tmp - minor3;
-    let tmp = tmp.rotate_lanes_right::<2>();
+    let tmp = tmp.rotate_elements_right::<2>();
     let minor2 = row3 * tmp - minor2;
     let minor3 = minor3 - row2 * tmp;
 
-    let tmp = (row0 * row3).reverse().rotate_lanes_right::<2>();
+    let tmp = (row0 * row3).reverse().rotate_elements_right::<2>();
     let minor1 = minor1 - row2 * tmp;
     let minor2 = row1 * tmp + minor2;
-    let tmp = tmp.rotate_lanes_right::<2>();
+    let tmp = tmp.rotate_elements_right::<2>();
     let minor1 = row2 * tmp + minor1;
     let minor2 = minor2 - row1 * tmp;
 
-    let tmp = (row0 * row2).reverse().rotate_lanes_right::<2>();
+    let tmp = (row0 * row2).reverse().rotate_elements_right::<2>();
     let minor1 = row3 * tmp + minor1;
     let minor3 = minor3 - row1 * tmp;
-    let tmp = tmp.rotate_lanes_right::<2>();
+    let tmp = tmp.rotate_elements_right::<2>();
     let minor1 = minor1 - row3 * tmp;
     let minor3 = row1 * tmp + minor3;
 
     let det = row0 * minor0;
-    let det = det.rotate_lanes_right::<2>() + det;
-    let det = det.reverse().rotate_lanes_right::<2>() + det;
+    let det = det.rotate_elements_right::<2>() + det;
+    let det = det.reverse().rotate_elements_right::<2>() + det;
 
     if det.reduce_sum() == 0. {
         return None;
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 0a04cf66757..c3da4468757 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -1,4 +1,4 @@
-//! Types and traits associated with masking lanes of vectors.
+//! Types and traits associated with masking elements of vectors.
 //! Types representing
 #![allow(non_camel_case_types)]
 
@@ -82,7 +82,7 @@ unsafe impl MaskElement for $ty {}
 
 /// A SIMD vector mask for `LANES` elements of width specified by `Element`.
 ///
-/// Masks represent boolean inclusion/exclusion on a per-lane basis.
+/// Masks represent boolean inclusion/exclusion on a per-element basis.
 ///
 /// The layout of this type is unspecified, and may change between platforms
 /// and/or Rust versions, and code should not assume that it is equivalent to
@@ -116,7 +116,7 @@ impl<T, const LANES: usize> Mask<T, LANES>
     T: MaskElement,
     LaneCount<LANES>: SupportedLaneCount,
 {
-    /// Construct a mask by setting all lanes to the given value.
+    /// Construct a mask by setting all elements to the given value.
     #[inline]
     pub fn splat(value: bool) -> Self {
         Self(mask_impl::Mask::splat(value))
@@ -163,7 +163,7 @@ pub fn splat(value: bool) -> Self {
     /// represents `true`.
     ///
     /// # Safety
-    /// All lanes must be either 0 or -1.
+    /// All elements must be either 0 or -1.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
@@ -175,7 +175,7 @@ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
     /// represents `true`.
     ///
     /// # Panics
-    /// Panics if any lane is not 0 or -1.
+    /// Panics if any element is not 0 or -1.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     #[track_caller]
@@ -193,71 +193,71 @@ pub fn to_int(self) -> Simd<T, LANES> {
         self.0.to_int()
     }
 
-    /// Converts the mask to a mask of any other lane size.
+    /// Converts the mask to a mask of any other element size.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn cast<U: MaskElement>(self) -> Mask<U, LANES> {
         Mask(self.0.convert())
     }
 
-    /// Tests the value of the specified lane.
+    /// Tests the value of the specified element.
     ///
     /// # Safety
-    /// `lane` must be less than `LANES`.
+    /// `element` must be less than `self.len()`.
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
-    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+    pub unsafe fn test_unchecked(&self, index: usize) -> bool {
         // Safety: the caller must confirm this invariant
-        unsafe { self.0.test_unchecked(lane) }
+        unsafe { self.0.test_unchecked(index) }
     }
 
-    /// Tests the value of the specified lane.
+    /// Tests the value of the specified element.
     ///
     /// # Panics
-    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+    /// Panics if `index` is greater than or equal to the number of elements in the vector.
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
     #[track_caller]
-    pub fn test(&self, lane: usize) -> bool {
-        assert!(lane < LANES, "lane index out of range");
-        // Safety: the lane index has been checked
-        unsafe { self.test_unchecked(lane) }
+    pub fn test(&self, index: usize) -> bool {
+        assert!(index < LANES, "element index out of range");
+        // Safety: the element index has been checked
+        unsafe { self.test_unchecked(index) }
     }
 
-    /// Sets the value of the specified lane.
+    /// Sets the value of the specified element.
     ///
     /// # Safety
-    /// `lane` must be less than `LANES`.
+    /// `index` must be less than `self.len()`.
     #[inline]
-    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+    pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
         // Safety: the caller must confirm this invariant
         unsafe {
-            self.0.set_unchecked(lane, value);
+            self.0.set_unchecked(index, value);
         }
     }
 
-    /// Sets the value of the specified lane.
+    /// Sets the value of the specified element.
     ///
     /// # Panics
-    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+    /// Panics if `index` is greater than or equal to the number of elements in the vector.
     #[inline]
     #[track_caller]
-    pub fn set(&mut self, lane: usize, value: bool) {
-        assert!(lane < LANES, "lane index out of range");
-        // Safety: the lane index has been checked
+    pub fn set(&mut self, index: usize, value: bool) {
+        assert!(index < LANES, "element index out of range");
+        // Safety: the element index has been checked
         unsafe {
-            self.set_unchecked(lane, value);
+            self.set_unchecked(index, value);
         }
     }
 
-    /// Returns true if any lane is set, or false otherwise.
+    /// Returns true if any element is set, or false otherwise.
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub fn any(self) -> bool {
         self.0.any()
     }
 
-    /// Returns true if all lanes are set, or false otherwise.
+    /// Returns true if all elements are set, or false otherwise.
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub fn all(self) -> bool {
@@ -294,7 +294,7 @@ impl<T, const LANES: usize> Default for Mask<T, LANES>
     LaneCount<LANES>: SupportedLaneCount,
 {
     #[inline]
-    #[must_use = "method returns a defaulted mask with all lanes set to false (0)"]
+    #[must_use = "method returns a defaulted mask with all elements set to false (0)"]
     fn default() -> Self {
         Self::splat(false)
     }
@@ -332,7 +332,7 @@ impl<T, const LANES: usize> fmt::Debug for Mask<T, LANES>
     #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_list()
-            .entries((0..LANES).map(|lane| self.test(lane)))
+            .entries((0..LANES).map(|i| self.test(i)))
             .finish()
     }
 }
diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index 12cb1771ce1..382928ac308 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -16,7 +16,7 @@ impl<T, const LANES: usize> Sealed for Mask<T, LANES>
 
 /// Converts masks to and from integer bitmasks.
 ///
-/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
+/// Each bit of the bitmask corresponds to a mask element, starting with the LSB.
 pub trait ToBitMask: Sealed {
     /// The integer bitmask type.
     type BitMask;
@@ -30,7 +30,7 @@ pub trait ToBitMask: Sealed {
 
 /// Converts masks to and from byte array bitmasks.
 ///
-/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte.
+/// Each bit of the bitmask corresponds to a mask element, starting with the LSB of the first byte.
 pub trait ToBitMaskArray: Sealed {
     /// The bitmask array.
     type BitMaskArray: Copy
diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs
index 065c5987d3f..a1a26032e87 100644
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@@ -6,10 +6,10 @@ impl<T, const LANES: usize> Mask<T, LANES>
     T: MaskElement,
     LaneCount<LANES>: SupportedLaneCount,
 {
-    /// Choose lanes from two vectors.
+    /// Choose elements from two vectors.
     ///
-    /// For each lane in the mask, choose the corresponding lane from `true_values` if
-    /// that lane mask is true, and `false_values` if that lane mask is false.
+    /// For each element in the mask, choose the corresponding element from `true_values` if
+    /// that element mask is true, and `false_values` if that element mask is false.
     ///
     /// # Examples
     /// ```
@@ -36,10 +36,10 @@ pub fn select<U>(
         unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
     }
 
-    /// Choose lanes from two masks.
+    /// Choose elements from two masks.
     ///
-    /// For each lane in the mask, choose the corresponding lane from `true_values` if
-    /// that lane mask is true, and `false_values` if that lane mask is false.
+    /// For each element in the mask, choose the corresponding element from `true_values` if
+    /// that element mask is true, and `false_values` if that element mask is false.
     ///
     /// # Examples
     /// ```
diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs
index 627ceba3c6f..0ca0401b7ed 100644
--- a/crates/core_simd/src/simd/cmp/eq.rs
+++ b/crates/core_simd/src/simd/cmp/eq.rs
@@ -9,11 +9,11 @@ pub trait SimdPartialEq {
     /// The mask type returned by each comparison.
     type Mask;
 
-    /// Test if each lane is equal to the corresponding lane in `other`.
+    /// Test if each element is equal to the corresponding element in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_eq(self, other: Self) -> Self::Mask;
 
-    /// Test if each lane is equal to the corresponding lane in `other`.
+    /// Test if each element is equal to the corresponding element in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_ne(self, other: Self) -> Self::Mask;
 }
diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs
index 509f907785c..667eb00e111 100644
--- a/crates/core_simd/src/simd/cmp/ord.rs
+++ b/crates/core_simd/src/simd/cmp/ord.rs
@@ -7,41 +7,41 @@
 
 /// Parallel `PartialOrd`.
 pub trait SimdPartialOrd: SimdPartialEq {
-    /// Test if each lane is less than the corresponding lane in `other`.
+    /// Test if each element is less than the corresponding element in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_lt(self, other: Self) -> Self::Mask;
 
-    /// Test if each lane is less than or equal to the corresponding lane in `other`.
+    /// Test if each element is less than or equal to the corresponding element in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_le(self, other: Self) -> Self::Mask;
 
-    /// Test if each lane is greater than the corresponding lane in `other`.
+    /// Test if each element is greater than the corresponding element in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_gt(self, other: Self) -> Self::Mask;
 
-    /// Test if each lane is greater than or equal to the corresponding lane in `other`.
+    /// Test if each element is greater than or equal to the corresponding element in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_ge(self, other: Self) -> Self::Mask;
 }
 
 /// Parallel `Ord`.
 pub trait SimdOrd: SimdPartialOrd {
-    /// Returns the lane-wise maximum with `other`.
+    /// Returns the element-wise maximum with `other`.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn simd_max(self, other: Self) -> Self;
 
-    /// Returns the lane-wise minimum with `other`.
+    /// Returns the element-wise minimum with `other`.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn simd_min(self, other: Self) -> Self;
 
-    /// Restrict each lane to a certain interval.
+    /// Restrict each element to a certain interval.
     ///
-    /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is
+    /// For each element, returns `max` if `self` is greater than `max`, and `min` if `self` is
     /// less than `min`. Otherwise returns `self`.
     ///
     /// # Panics
     ///
-    /// Panics if `min > max` on any lane.
+    /// Panics if `min > max` on any element.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn simd_clamp(self, min: Self, max: Self) -> Self;
 }
@@ -101,7 +101,7 @@ fn simd_min(self, other: Self) -> Self {
             fn simd_clamp(self, min: Self, max: Self) -> Self {
                 assert!(
                     min.simd_le(max).all(),
-                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                    "each element in `min` must be less than or equal to the corresponding element in `max`",
                 );
                 self.simd_max(min).simd_min(max)
             }
@@ -208,7 +208,7 @@ fn simd_min(self, other: Self) -> Self {
             fn simd_clamp(self, min: Self, max: Self) -> Self {
                 assert!(
                     min.simd_le(max).all(),
-                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                    "each element in `min` must be less than or equal to the corresponding element in `max`",
                 );
                 self.simd_max(min).simd_min(max)
             }
@@ -263,7 +263,7 @@ fn simd_min(self, other: Self) -> Self {
     fn simd_clamp(self, min: Self, max: Self) -> Self {
         assert!(
             min.simd_le(max).all(),
-            "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+            "each element in `min` must be less than or equal to the corresponding element in `max`",
         );
         self.simd_max(min).simd_min(max)
     }
@@ -313,7 +313,7 @@ fn simd_min(self, other: Self) -> Self {
     fn simd_clamp(self, min: Self, max: Self) -> Self {
         assert!(
             min.simd_le(max).all(),
-            "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+            "each element in `min` must be less than or equal to the corresponding element in `max`",
         );
         self.simd_max(min).simd_min(max)
     }
diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs
index affc01d111f..e8378c3147a 100644
--- a/crates/core_simd/src/simd/num/float.rs
+++ b/crates/core_simd/src/simd/num/float.rs
@@ -63,64 +63,64 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
         Self::Scalar: core::convert::FloatToInt<I>;
 
     /// Raw transmutation to an unsigned integer vector type with the
-    /// same size and number of lanes.
+    /// same size and number of elements.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn to_bits(self) -> Self::Bits;
 
     /// Raw transmutation from an unsigned integer vector type with the
-    /// same size and number of lanes.
+    /// same size and number of elements.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn from_bits(bits: Self::Bits) -> Self;
 
-    /// Produces a vector where every lane has the absolute value of the
-    /// equivalently-indexed lane in `self`.
+    /// Produces a vector where every element has the absolute value of the
+    /// equivalently-indexed element in `self`.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn abs(self) -> Self;
 
-    /// Takes the reciprocal (inverse) of each lane, `1/x`.
+    /// Takes the reciprocal (inverse) of each element, `1/x`.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn recip(self) -> Self;
 
-    /// Converts each lane from radians to degrees.
+    /// Converts each element from radians to degrees.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn to_degrees(self) -> Self;
 
-    /// Converts each lane from degrees to radians.
+    /// Converts each element from degrees to radians.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn to_radians(self) -> Self;
 
-    /// Returns true for each lane if it has a positive sign, including
+    /// Returns true for each element if it has a positive sign, including
     /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn is_sign_positive(self) -> Self::Mask;
 
-    /// Returns true for each lane if it has a negative sign, including
+    /// Returns true for each element if it has a negative sign, including
     /// `-0.0`, `NaN`s with negative sign bit and negative infinity.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn is_sign_negative(self) -> Self::Mask;
 
-    /// Returns true for each lane if its value is `NaN`.
+    /// Returns true for each element if its value is `NaN`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn is_nan(self) -> Self::Mask;
 
-    /// Returns true for each lane if its value is positive infinity or negative infinity.
+    /// Returns true for each element if its value is positive infinity or negative infinity.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn is_infinite(self) -> Self::Mask;
 
-    /// Returns true for each lane if its value is neither infinite nor `NaN`.
+    /// Returns true for each element if its value is neither infinite nor `NaN`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn is_finite(self) -> Self::Mask;
 
-    /// Returns true for each lane if its value is subnormal.
+    /// Returns true for each element if its value is subnormal.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn is_subnormal(self) -> Self::Mask;
 
-    /// Returns true for each lane if its value is neither zero, infinite,
+    /// Returns true for each element if its value is neither zero, infinite,
     /// subnormal, nor `NaN`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn is_normal(self) -> Self::Mask;
 
-    /// Replaces each lane with a number that represents its sign.
+    /// Replaces each element with a number that represents its sign.
     ///
     /// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
     /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
@@ -128,33 +128,33 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn signum(self) -> Self;
 
-    /// Returns each lane with the magnitude of `self` and the sign of `sign`.
+    /// Returns each element with the magnitude of `self` and the sign of `sign`.
     ///
-    /// For any lane containing a `NAN`, a `NAN` with the sign of `sign` is returned.
+    /// For any element containing a `NAN`, a `NAN` with the sign of `sign` is returned.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn copysign(self, sign: Self) -> Self;
 
-    /// Returns the minimum of each lane.
+    /// Returns the minimum of each element.
     ///
     /// If one of the values is `NAN`, then the other value is returned.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn simd_min(self, other: Self) -> Self;
 
-    /// Returns the maximum of each lane.
+    /// Returns the maximum of each element.
     ///
     /// If one of the values is `NAN`, then the other value is returned.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn simd_max(self, other: Self) -> Self;
 
-    /// Restrict each lane to a certain interval unless it is NaN.
+    /// Restrict each element to a certain interval unless it is NaN.
     ///
-    /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
-    /// greater than `max`, and the corresponding lane in `min` if the lane is less
-    /// than `min`.  Otherwise returns the lane in `self`.
+    /// For each element in `self`, returns the corresponding element in `max` if the element is
+    /// greater than `max`, and the corresponding element in `min` if the element is less
+    /// than `min`.  Otherwise returns the element in `self`.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn simd_clamp(self, min: Self, max: Self) -> Self;
 
-    /// Returns the sum of the lanes of the vector.
+    /// Returns the sum of the elements of the vector.
     ///
     /// # Examples
     ///
@@ -168,7 +168,7 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     /// ```
     fn reduce_sum(self) -> Self::Scalar;
 
-    /// Reducing multiply.  Returns the product of the lanes of the vector.
+    /// Reducing multiply.  Returns the product of the elements of the vector.
     ///
     /// # Examples
     ///
@@ -182,12 +182,12 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     /// ```
     fn reduce_product(self) -> Self::Scalar;
 
-    /// Returns the maximum lane in the vector.
+    /// Returns the maximum element in the vector.
     ///
     /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
     /// return either.
     ///
-    /// This function will not return `NaN` unless all lanes are `NaN`.
+    /// This function will not return `NaN` unless all elements are `NaN`.
     ///
     /// # Examples
     ///
@@ -209,12 +209,12 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
     /// ```
     fn reduce_max(self) -> Self::Scalar;
 
-    /// Returns the minimum lane in the vector.
+    /// Returns the minimum element in the vector.
     ///
     /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
     /// return either.
     ///
-    /// This function will not return `NaN` unless all lanes are `NaN`.
+    /// This function will not return `NaN` unless all elements are `NaN`.
     ///
     /// # Examples
     ///
@@ -376,7 +376,7 @@ fn simd_max(self, other: Self) -> Self {
             fn simd_clamp(self, min: Self, max: Self) -> Self {
                 assert!(
                     min.simd_le(max).all(),
-                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                    "each element in `min` must be less than or equal to the corresponding element in `max`",
                 );
                 let mut x = self;
                 x = x.simd_lt(min).select(min, x);
diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs
index d1f8e856a53..5b0df597b24 100644
--- a/crates/core_simd/src/simd/num/int.rs
+++ b/crates/core_simd/src/simd/num/int.rs
@@ -61,7 +61,7 @@ pub trait SimdInt: Copy + Sealed {
     fn saturating_sub(self, second: Self) -> Self;
 
     /// Lanewise absolute value, implemented in Rust.
-    /// Every lane becomes its absolute value.
+    /// Every element becomes its absolute value.
     ///
     /// # Examples
     /// ```
@@ -111,19 +111,19 @@ pub trait SimdInt: Copy + Sealed {
     /// ```
     fn saturating_neg(self) -> Self;
 
-    /// Returns true for each positive lane and false if it is zero or negative.
+    /// Returns true for each positive element and false if it is zero or negative.
     fn is_positive(self) -> Self::Mask;
 
-    /// Returns true for each negative lane and false if it is zero or positive.
+    /// Returns true for each negative element and false if it is zero or positive.
     fn is_negative(self) -> Self::Mask;
 
-    /// Returns numbers representing the sign of each lane.
+    /// Returns numbers representing the sign of each element.
     /// * `0` if the number is zero
     /// * `1` if the number is positive
     /// * `-1` if the number is negative
     fn signum(self) -> Self;
 
-    /// Returns the sum of the lanes of the vector, with wrapping addition.
+    /// Returns the sum of the elements of the vector, with wrapping addition.
     ///
     /// # Examples
     ///
@@ -141,7 +141,7 @@ pub trait SimdInt: Copy + Sealed {
     /// ```
     fn reduce_sum(self) -> Self::Scalar;
 
-    /// Returns the product of the lanes of the vector, with wrapping multiplication.
+    /// Returns the product of the elements of the vector, with wrapping multiplication.
     ///
     /// # Examples
     ///
@@ -159,7 +159,7 @@ pub trait SimdInt: Copy + Sealed {
     /// ```
     fn reduce_product(self) -> Self::Scalar;
 
-    /// Returns the maximum lane in the vector.
+    /// Returns the maximum element in the vector.
     ///
     /// # Examples
     ///
@@ -173,7 +173,7 @@ pub trait SimdInt: Copy + Sealed {
     /// ```
     fn reduce_max(self) -> Self::Scalar;
 
-    /// Returns the minimum lane in the vector.
+    /// Returns the minimum element in the vector.
     ///
     /// # Examples
     ///
@@ -187,13 +187,13 @@ pub trait SimdInt: Copy + Sealed {
     /// ```
     fn reduce_min(self) -> Self::Scalar;
 
-    /// Returns the cumulative bitwise "and" across the lanes of the vector.
+    /// Returns the cumulative bitwise "and" across the elements of the vector.
     fn reduce_and(self) -> Self::Scalar;
 
-    /// Returns the cumulative bitwise "or" across the lanes of the vector.
+    /// Returns the cumulative bitwise "or" across the elements of the vector.
     fn reduce_or(self) -> Self::Scalar;
 
-    /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+    /// Returns the cumulative bitwise "xor" across the elements of the vector.
     fn reduce_xor(self) -> Self::Scalar;
 
     /// Reverses the byte order of each element.
diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs
index 7eadd2050b9..9c91e147a7a 100644
--- a/crates/core_simd/src/simd/num/uint.rs
+++ b/crates/core_simd/src/simd/num/uint.rs
@@ -57,25 +57,25 @@ pub trait SimdUint: Copy + Sealed {
     /// assert_eq!(sat, Simd::splat(0));
     fn saturating_sub(self, second: Self) -> Self;
 
-    /// Returns the sum of the lanes of the vector, with wrapping addition.
+    /// Returns the sum of the elements of the vector, with wrapping addition.
     fn reduce_sum(self) -> Self::Scalar;
 
-    /// Returns the product of the lanes of the vector, with wrapping multiplication.
+    /// Returns the product of the elements of the vector, with wrapping multiplication.
     fn reduce_product(self) -> Self::Scalar;
 
-    /// Returns the maximum lane in the vector.
+    /// Returns the maximum element in the vector.
     fn reduce_max(self) -> Self::Scalar;
 
-    /// Returns the minimum lane in the vector.
+    /// Returns the minimum element in the vector.
     fn reduce_min(self) -> Self::Scalar;
 
-    /// Returns the cumulative bitwise "and" across the lanes of the vector.
+    /// Returns the cumulative bitwise "and" across the elements of the vector.
     fn reduce_and(self) -> Self::Scalar;
 
-    /// Returns the cumulative bitwise "or" across the lanes of the vector.
+    /// Returns the cumulative bitwise "or" across the elements of the vector.
     fn reduce_or(self) -> Self::Scalar;
 
-    /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+    /// Returns the cumulative bitwise "xor" across the elements of the vector.
     fn reduce_xor(self) -> Self::Scalar;
 
     /// Reverses the byte order of each element.
diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs
index f82def1d377..246fc7ee381 100644
--- a/crates/core_simd/src/simd/ptr/const_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -5,13 +5,13 @@
 
 /// Operations on SIMD vectors of constant pointers.
 pub trait SimdConstPtr: Copy + Sealed {
-    /// Vector of `usize` with the same number of lanes.
+    /// Vector of `usize` with the same number of elements.
     type Usize;
 
-    /// Vector of `isize` with the same number of lanes.
+    /// Vector of `isize` with the same number of elements.
     type Isize;
 
-    /// Vector of const pointers with the same number of lanes.
+    /// Vector of const pointers with the same number of elements.
     type CastPtr<T>;
 
     /// Vector of mutable pointers to the same type.
@@ -20,17 +20,17 @@ pub trait SimdConstPtr: Copy + Sealed {
     /// Mask type used for manipulating this SIMD vector type.
     type Mask;
 
-    /// Returns `true` for each lane that is null.
+    /// Returns `true` for each element that is null.
     fn is_null(self) -> Self::Mask;
 
     /// Casts to a pointer of another type.
     ///
-    /// Equivalent to calling [`pointer::cast`] on each lane.
+    /// Equivalent to calling [`pointer::cast`] on each element.
     fn cast<T>(self) -> Self::CastPtr<T>;
 
     /// Changes constness without changing the type.
     ///
-    /// Equivalent to calling [`pointer::cast_mut`] on each lane.
+    /// Equivalent to calling [`pointer::cast_mut`] on each element.
     fn cast_mut(self) -> Self::MutPtr;
 
     /// Gets the "address" portion of the pointer.
@@ -41,7 +41,7 @@ pub trait SimdConstPtr: Copy + Sealed {
     /// This method semantically discards *provenance* and
     /// *address-space* information. To properly restore that information, use [`Self::with_addr`].
     ///
-    /// Equivalent to calling [`pointer::addr`] on each lane.
+    /// Equivalent to calling [`pointer::addr`] on each element.
     fn addr(self) -> Self::Usize;
 
     /// Creates a new pointer with the given address.
@@ -49,7 +49,7 @@ pub trait SimdConstPtr: Copy + Sealed {
     /// This performs the same operation as a cast, but copies the *address-space* and
     /// *provenance* of `self` to the new pointer.
     ///
-    /// Equivalent to calling [`pointer::with_addr`] on each lane.
+    /// Equivalent to calling [`pointer::with_addr`] on each element.
     fn with_addr(self, addr: Self::Usize) -> Self;
 
     /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use
@@ -58,22 +58,22 @@ pub trait SimdConstPtr: Copy + Sealed {
 
     /// Convert an address back to a pointer, picking up a previously "exposed" provenance.
     ///
-    /// Equivalent to calling [`core::ptr::from_exposed_addr`] on each lane.
+    /// Equivalent to calling [`core::ptr::from_exposed_addr`] on each element.
     fn from_exposed_addr(addr: Self::Usize) -> Self;
 
     /// Calculates the offset from a pointer using wrapping arithmetic.
     ///
-    /// Equivalent to calling [`pointer::wrapping_offset`] on each lane.
+    /// Equivalent to calling [`pointer::wrapping_offset`] on each element.
     fn wrapping_offset(self, offset: Self::Isize) -> Self;
 
     /// Calculates the offset from a pointer using wrapping arithmetic.
     ///
-    /// Equivalent to calling [`pointer::wrapping_add`] on each lane.
+    /// Equivalent to calling [`pointer::wrapping_add`] on each element.
     fn wrapping_add(self, count: Self::Usize) -> Self;
 
     /// Calculates the offset from a pointer using wrapping arithmetic.
     ///
-    /// Equivalent to calling [`pointer::wrapping_sub`] on each lane.
+    /// Equivalent to calling [`pointer::wrapping_sub`] on each element.
     fn wrapping_sub(self, count: Self::Usize) -> Self;
 }
 
diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs
index 283054dc8ce..69c927eb11a 100644
--- a/crates/core_simd/src/simd/ptr/mut_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -5,13 +5,13 @@
 
 /// Operations on SIMD vectors of mutable pointers.
 pub trait SimdMutPtr: Copy + Sealed {
-    /// Vector of `usize` with the same number of lanes.
+    /// Vector of `usize` with the same number of elements.
     type Usize;
 
-    /// Vector of `isize` with the same number of lanes.
+    /// Vector of `isize` with the same number of elements.
     type Isize;
 
-    /// Vector of const pointers with the same number of lanes.
+    /// Vector of const pointers with the same number of elements.
     type CastPtr<T>;
 
     /// Vector of constant pointers to the same type.
@@ -20,17 +20,17 @@ pub trait SimdMutPtr: Copy + Sealed {
     /// Mask type used for manipulating this SIMD vector type.
     type Mask;
 
-    /// Returns `true` for each lane that is null.
+    /// Returns `true` for each element that is null.
     fn is_null(self) -> Self::Mask;
 
     /// Casts to a pointer of another type.
     ///
-    /// Equivalent to calling [`pointer::cast`] on each lane.
+    /// Equivalent to calling [`pointer::cast`] on each element.
     fn cast<T>(self) -> Self::CastPtr<T>;
 
     /// Changes constness without changing the type.
     ///
-    /// Equivalent to calling [`pointer::cast_const`] on each lane.
+    /// Equivalent to calling [`pointer::cast_const`] on each element.
     fn cast_const(self) -> Self::ConstPtr;
 
     /// Gets the "address" portion of the pointer.
@@ -38,7 +38,7 @@ pub trait SimdMutPtr: Copy + Sealed {
     /// This method discards pointer semantic metadata, so the result cannot be
     /// directly cast into a valid pointer.
     ///
-    /// Equivalent to calling [`pointer::addr`] on each lane.
+    /// Equivalent to calling [`pointer::addr`] on each element.
     fn addr(self) -> Self::Usize;
 
     /// Creates a new pointer with the given address.
@@ -46,7 +46,7 @@ pub trait SimdMutPtr: Copy + Sealed {
     /// This performs the same operation as a cast, but copies the *address-space* and
     /// *provenance* of `self` to the new pointer.
     ///
-    /// Equivalent to calling [`pointer::with_addr`] on each lane.
+    /// Equivalent to calling [`pointer::with_addr`] on each element.
     fn with_addr(self, addr: Self::Usize) -> Self;
 
     /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use
@@ -55,22 +55,22 @@ pub trait SimdMutPtr: Copy + Sealed {
 
     /// Convert an address back to a pointer, picking up a previously "exposed" provenance.
     ///
-    /// Equivalent to calling [`core::ptr::from_exposed_addr_mut`] on each lane.
+    /// Equivalent to calling [`core::ptr::from_exposed_addr_mut`] on each element.
     fn from_exposed_addr(addr: Self::Usize) -> Self;
 
     /// Calculates the offset from a pointer using wrapping arithmetic.
     ///
-    /// Equivalent to calling [`pointer::wrapping_offset`] on each lane.
+    /// Equivalent to calling [`pointer::wrapping_offset`] on each element.
     fn wrapping_offset(self, offset: Self::Isize) -> Self;
 
     /// Calculates the offset from a pointer using wrapping arithmetic.
     ///
-    /// Equivalent to calling [`pointer::wrapping_add`] on each lane.
+    /// Equivalent to calling [`pointer::wrapping_add`] on each element.
     fn wrapping_add(self, count: Self::Usize) -> Self;
 
     /// Calculates the offset from a pointer using wrapping arithmetic.
     ///
-    /// Equivalent to calling [`pointer::wrapping_sub`] on each lane.
+    /// Equivalent to calling [`pointer::wrapping_sub`] on each element.
     fn wrapping_sub(self, count: Self::Usize) -> Self;
 }
 
diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index ed4bd72b9a5..cfd2cac576a 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -206,11 +206,11 @@ impl<const N: usize> Swizzle<N> for Reverse {
     }
 
     /// Rotates the vector such that the first `OFFSET` elements of the slice move to the end
-    /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`,
-    /// the element previously in lane `OFFSET` will become the first element in the slice.
+    /// while the last `self.len() - OFFSET` elements move to the front. After calling `rotate_elements_left`,
+    /// the element previously at index `OFFSET` will become the first element in the slice.
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn rotate_lanes_left<const OFFSET: usize>(self) -> Self {
+    pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self {
         struct Rotate<const OFFSET: usize>;
 
         impl<const OFFSET: usize, const N: usize> Swizzle<N> for Rotate<OFFSET> {
@@ -229,12 +229,12 @@ impl<const OFFSET: usize, const N: usize> Swizzle<N> for Rotate<OFFSET> {
         Rotate::<OFFSET>::swizzle(self)
     }
 
-    /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to
-    /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`,
-    /// the element previously at index `LANES - OFFSET` will become the first element in the slice.
+    /// Rotates the vector such that the first `self.len() - OFFSET` elements of the vector move to
+    /// the end while the last `OFFSET` elements move to the front. After calling `rotate_elements_right`,
+    /// the element previously at index `self.len() - OFFSET` will become the first element in the slice.
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn rotate_lanes_right<const OFFSET: usize>(self) -> Self {
+    pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self {
         struct Rotate<const OFFSET: usize>;
 
         impl<const OFFSET: usize, const N: usize> Swizzle<N> for Rotate<OFFSET> {
diff --git a/crates/core_simd/tests/swizzle.rs b/crates/core_simd/tests/swizzle.rs
index 46aaf748ad8..522d71439b7 100644
--- a/crates/core_simd/tests/swizzle.rs
+++ b/crates/core_simd/tests/swizzle.rs
@@ -34,18 +34,18 @@ fn reverse() {
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn rotate() {
     let a = Simd::from_array([1, 2, 3, 4]);
-    assert_eq!(a.rotate_lanes_left::<0>().to_array(), [1, 2, 3, 4]);
-    assert_eq!(a.rotate_lanes_left::<1>().to_array(), [2, 3, 4, 1]);
-    assert_eq!(a.rotate_lanes_left::<2>().to_array(), [3, 4, 1, 2]);
-    assert_eq!(a.rotate_lanes_left::<3>().to_array(), [4, 1, 2, 3]);
-    assert_eq!(a.rotate_lanes_left::<4>().to_array(), [1, 2, 3, 4]);
-    assert_eq!(a.rotate_lanes_left::<5>().to_array(), [2, 3, 4, 1]);
-    assert_eq!(a.rotate_lanes_right::<0>().to_array(), [1, 2, 3, 4]);
-    assert_eq!(a.rotate_lanes_right::<1>().to_array(), [4, 1, 2, 3]);
-    assert_eq!(a.rotate_lanes_right::<2>().to_array(), [3, 4, 1, 2]);
-    assert_eq!(a.rotate_lanes_right::<3>().to_array(), [2, 3, 4, 1]);
-    assert_eq!(a.rotate_lanes_right::<4>().to_array(), [1, 2, 3, 4]);
-    assert_eq!(a.rotate_lanes_right::<5>().to_array(), [4, 1, 2, 3]);
+    assert_eq!(a.rotate_elements_left::<0>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_elements_left::<1>().to_array(), [2, 3, 4, 1]);
+    assert_eq!(a.rotate_elements_left::<2>().to_array(), [3, 4, 1, 2]);
+    assert_eq!(a.rotate_elements_left::<3>().to_array(), [4, 1, 2, 3]);
+    assert_eq!(a.rotate_elements_left::<4>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_elements_left::<5>().to_array(), [2, 3, 4, 1]);
+    assert_eq!(a.rotate_elements_right::<0>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_elements_right::<1>().to_array(), [4, 1, 2, 3]);
+    assert_eq!(a.rotate_elements_right::<2>().to_array(), [3, 4, 1, 2]);
+    assert_eq!(a.rotate_elements_right::<3>().to_array(), [2, 3, 4, 1]);
+    assert_eq!(a.rotate_elements_right::<4>().to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.rotate_elements_right::<5>().to_array(), [4, 1, 2, 3]);
 }
 
 #[test]

From 0f594090645912a7d2bd3e238de1e2a8afd83741 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 20 Oct 2023 20:49:58 -0400
Subject: [PATCH 47/59] Change LANES to LEN and self.lanes() to self.len()

---
 crates/core_simd/src/vector.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 18a0bb0a77e..befbd71359c 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -112,7 +112,7 @@ impl<T, const N: usize> Simd<T, N>
     T: SimdElement,
 {
     /// Number of elements in this vector.
-    pub const LANES: usize = N;
+    pub const LEN: usize = N;
 
     /// Returns the number of elements in this SIMD vector.
     ///
@@ -122,11 +122,11 @@ impl<T, const N: usize> Simd<T, N>
     /// # #![feature(portable_simd)]
     /// # use core::simd::u32x4;
     /// let v = u32x4::splat(0);
-    /// assert_eq!(v.lanes(), 4);
+    /// assert_eq!(v.len(), 4);
     /// ```
     #[inline]
-    pub const fn lanes(&self) -> usize {
-        Self::LANES
+    pub const fn len(&self) -> usize {
+        Self::LEN
     }
 
     /// Constructs a new SIMD vector with all elements set to the given value.
@@ -273,7 +273,7 @@ impl<const N: usize> Swizzle<N> for Splat {
     #[track_caller]
     pub const fn from_slice(slice: &[T]) -> Self {
         assert!(
-            slice.len() >= Self::LANES,
+            slice.len() >= Self::LEN,
             "slice length must be at least the number of elements"
         );
         // SAFETY: We just checked that the slice contains
@@ -303,7 +303,7 @@ pub const fn from_slice(slice: &[T]) -> Self {
     #[track_caller]
     pub fn copy_to_slice(self, slice: &mut [T]) {
         assert!(
-            slice.len() >= Self::LANES,
+            slice.len() >= Self::LEN,
             "slice length must be at least the number of elements"
         );
         // SAFETY: We just checked that the slice contains

From 32b195ab2bf85c368d2998327b0289e7f563dd4c Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 20 Oct 2023 20:58:04 -0400
Subject: [PATCH 48/59] Change generic LANES to N

---
 crates/core_simd/src/fmt.rs                |   4 +-
 crates/core_simd/src/iter.rs               |  16 +-
 crates/core_simd/src/lane_count.rs         |   8 +-
 crates/core_simd/src/masks.rs              | 165 ++++++++++-----------
 crates/core_simd/src/masks/bitmask.rs      |  74 ++++-----
 crates/core_simd/src/masks/full_masks.rs   |  82 +++++-----
 crates/core_simd/src/masks/to_bitmask.rs   |   4 +-
 crates/core_simd/src/ops.rs                |   8 +-
 crates/core_simd/src/ops/assign.rs         |  26 ++--
 crates/core_simd/src/ops/deref.rs          |  46 +++---
 crates/core_simd/src/ops/unary.rs          |  46 +++---
 crates/core_simd/src/select.rs             |  10 +-
 crates/core_simd/src/simd/cmp/eq.rs        |  22 +--
 crates/core_simd/src/simd/cmp/ord.rs       |  36 ++---
 crates/core_simd/src/simd/num/float.rs     |  18 +--
 crates/core_simd/src/simd/num/int.rs       |  14 +-
 crates/core_simd/src/simd/num/uint.rs      |  10 +-
 crates/core_simd/src/simd/ptr/const_ptr.rs |  18 +--
 crates/core_simd/src/simd/ptr/mut_ptr.rs   |  16 +-
 crates/core_simd/src/swizzle.rs            |   4 +-
 20 files changed, 313 insertions(+), 314 deletions(-)

diff --git a/crates/core_simd/src/fmt.rs b/crates/core_simd/src/fmt.rs
index b7317969cbb..3a540f5a049 100644
--- a/crates/core_simd/src/fmt.rs
+++ b/crates/core_simd/src/fmt.rs
@@ -1,9 +1,9 @@
 use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
 use core::fmt;
 
-impl<T, const LANES: usize> fmt::Debug for Simd<T, LANES>
+impl<T, const N: usize> fmt::Debug for Simd<T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
     T: SimdElement + fmt::Debug,
 {
     /// A `Simd<T, N>` has a debug format like the one for `[T]`:
diff --git a/crates/core_simd/src/iter.rs b/crates/core_simd/src/iter.rs
index 328c995b81d..b3732fd74d5 100644
--- a/crates/core_simd/src/iter.rs
+++ b/crates/core_simd/src/iter.rs
@@ -6,9 +6,9 @@
 
 macro_rules! impl_traits {
     { $type:ty } => {
-        impl<const LANES: usize> Sum<Self> for Simd<$type, LANES>
+        impl<const N: usize> Sum<Self> for Simd<$type, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
@@ -16,9 +16,9 @@ fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
             }
         }
 
-        impl<const LANES: usize> Product<Self> for Simd<$type, LANES>
+        impl<const N: usize> Product<Self> for Simd<$type, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
@@ -26,9 +26,9 @@ fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
             }
         }
 
-        impl<'a, const LANES: usize> Sum<&'a Self> for Simd<$type, LANES>
+        impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
@@ -36,9 +36,9 @@ fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
             }
         }
 
-        impl<'a, const LANES: usize> Product<&'a Self> for Simd<$type, LANES>
+        impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs
index 2b91eb9e800..4cd7265ed67 100644
--- a/crates/core_simd/src/lane_count.rs
+++ b/crates/core_simd/src/lane_count.rs
@@ -4,11 +4,11 @@ pub trait Sealed {}
 use sealed::Sealed;
 
 /// Specifies the number of lanes in a SIMD vector as a type.
-pub struct LaneCount<const LANES: usize>;
+pub struct LaneCount<const N: usize>;
 
-impl<const LANES: usize> LaneCount<LANES> {
+impl<const N: usize> LaneCount<N> {
     /// The number of bytes in a bitmask with this many lanes.
-    pub const BITMASK_LEN: usize = (LANES + 7) / 8;
+    pub const BITMASK_LEN: usize = (N + 7) / 8;
 }
 
 /// Statically guarantees that a lane count is marked as supported.
@@ -21,7 +21,7 @@ pub trait SupportedLaneCount: Sealed {
     type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
 }
 
-impl<const LANES: usize> Sealed for LaneCount<LANES> {}
+impl<const N: usize> Sealed for LaneCount<N> {}
 
 macro_rules! supported_lane_count {
     ($($lanes:literal),+) => {
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index c3da4468757..bbfd6567cbf 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -31,9 +31,9 @@ mod sealed {
     /// prevent us from ever removing that bound, or from implementing `MaskElement` on
     /// non-`PartialEq` types in the future.
     pub trait Sealed {
-        fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool
+        fn valid<const N: usize>(values: Simd<Self, N>) -> bool
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
             Self: SimdElement;
 
         fn eq(self, other: Self) -> bool;
@@ -55,9 +55,9 @@ macro_rules! impl_element {
     { $ty:ty } => {
         impl Sealed for $ty {
             #[inline]
-            fn valid<const LANES: usize>(value: Simd<Self, LANES>) -> bool
+            fn valid<const N: usize>(value: Simd<Self, N>) -> bool
             where
-                LaneCount<LANES>: SupportedLaneCount,
+                LaneCount<N>: SupportedLaneCount,
             {
                 (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all()
             }
@@ -80,30 +80,30 @@ unsafe impl MaskElement for $ty {}
 impl_element! { i64 }
 impl_element! { isize }
 
-/// A SIMD vector mask for `LANES` elements of width specified by `Element`.
+/// A SIMD vector mask for `N` elements of width specified by `Element`.
 ///
 /// Masks represent boolean inclusion/exclusion on a per-element basis.
 ///
 /// The layout of this type is unspecified, and may change between platforms
 /// and/or Rust versions, and code should not assume that it is equivalent to
-/// `[T; LANES]`.
+/// `[T; N]`.
 #[repr(transparent)]
-pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)
+pub struct Mask<T, const N: usize>(mask_impl::Mask<T, N>)
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount;
+    LaneCount<N>: SupportedLaneCount;
 
-impl<T, const LANES: usize> Copy for Mask<T, LANES>
+impl<T, const N: usize> Copy for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
 }
 
-impl<T, const LANES: usize> Clone for Mask<T, LANES>
+impl<T, const N: usize> Clone for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn clone(&self) -> Self {
@@ -111,10 +111,10 @@ fn clone(&self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     /// Construct a mask by setting all elements to the given value.
     #[inline]
@@ -124,7 +124,7 @@ pub fn splat(value: bool) -> Self {
 
     /// Converts an array of bools to a SIMD mask.
     #[inline]
-    pub fn from_array(array: [bool; LANES]) -> Self {
+    pub fn from_array(array: [bool; N]) -> Self {
         // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
         //     true:    0b_0000_0001
         //     false:   0b_0000_0000
@@ -132,16 +132,15 @@ pub fn splat(value: bool) -> Self {
         // This would be hypothetically valid as an "in-place" transmute,
         // but these are "dependently-sized" types, so copy elision it is!
         unsafe {
-            let bytes: [u8; LANES] = mem::transmute_copy(&array);
-            let bools: Simd<i8, LANES> =
-                intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
+            let bytes: [u8; N] = mem::transmute_copy(&array);
+            let bools: Simd<i8, N> = intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
             Mask::from_int_unchecked(intrinsics::simd_cast(bools))
         }
     }
 
     /// Converts a SIMD mask to an array of bools.
     #[inline]
-    pub fn to_array(self) -> [bool; LANES] {
+    pub fn to_array(self) -> [bool; N] {
         // This follows mostly the same logic as from_array.
         // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
         //     true:    0b_0000_0001
@@ -153,7 +152,7 @@ pub fn splat(value: bool) -> Self {
         // This would be hypothetically valid as an "in-place" transmute,
         // but these are "dependently-sized" types, so copy elision it is!
         unsafe {
-            let mut bytes: Simd<i8, LANES> = intrinsics::simd_cast(self.to_int());
+            let mut bytes: Simd<i8, N> = intrinsics::simd_cast(self.to_int());
             bytes &= Simd::splat(1i8);
             mem::transmute_copy(&bytes)
         }
@@ -166,7 +165,7 @@ pub fn splat(value: bool) -> Self {
     /// All elements must be either 0 or -1.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+    pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
         // Safety: the caller must confirm this invariant
         unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
     }
@@ -179,7 +178,7 @@ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     #[track_caller]
-    pub fn from_int(value: Simd<T, LANES>) -> Self {
+    pub fn from_int(value: Simd<T, N>) -> Self {
         assert!(T::valid(value), "all values must be either 0 or -1",);
         // Safety: the validity has been checked
         unsafe { Self::from_int_unchecked(value) }
@@ -189,14 +188,14 @@ pub fn from_int(value: Simd<T, LANES>) -> Self {
     /// represents `true`.
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_int(self) -> Simd<T, LANES> {
+    pub fn to_int(self) -> Simd<T, N> {
         self.0.to_int()
     }
 
     /// Converts the mask to a mask of any other element size.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn cast<U: MaskElement>(self) -> Mask<U, LANES> {
+    pub fn cast<U: MaskElement>(self) -> Mask<U, N> {
         Mask(self.0.convert())
     }
 
@@ -219,7 +218,7 @@ pub unsafe fn test_unchecked(&self, index: usize) -> bool {
     #[must_use = "method returns a new bool and does not mutate the original value"]
     #[track_caller]
     pub fn test(&self, index: usize) -> bool {
-        assert!(index < LANES, "element index out of range");
+        assert!(index < N, "element index out of range");
         // Safety: the element index has been checked
         unsafe { self.test_unchecked(index) }
     }
@@ -243,7 +242,7 @@ pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
     #[inline]
     #[track_caller]
     pub fn set(&mut self, index: usize, value: bool) {
-        assert!(index < LANES, "element index out of range");
+        assert!(index < N, "element index out of range");
         // Safety: the element index has been checked
         unsafe {
             self.set_unchecked(index, value);
@@ -266,32 +265,32 @@ pub fn all(self) -> bool {
 }
 
 // vector/array conversion
-impl<T, const LANES: usize> From<[bool; LANES]> for Mask<T, LANES>
+impl<T, const N: usize> From<[bool; N]> for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
-    fn from(array: [bool; LANES]) -> Self {
+    fn from(array: [bool; N]) -> Self {
         Self::from_array(array)
     }
 }
 
-impl<T, const LANES: usize> From<Mask<T, LANES>> for [bool; LANES]
+impl<T, const N: usize> From<Mask<T, N>> for [bool; N]
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
-    fn from(vector: Mask<T, LANES>) -> Self {
+    fn from(vector: Mask<T, N>) -> Self {
         vector.to_array()
     }
 }
 
-impl<T, const LANES: usize> Default for Mask<T, LANES>
+impl<T, const N: usize> Default for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     #[must_use = "method returns a defaulted mask with all elements set to false (0)"]
@@ -300,10 +299,10 @@ fn default() -> Self {
     }
 }
 
-impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+impl<T, const N: usize> PartialEq for Mask<T, N>
 where
     T: MaskElement + PartialEq,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
@@ -312,10 +311,10 @@ fn eq(&self, other: &Self) -> bool {
     }
 }
 
-impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+impl<T, const N: usize> PartialOrd for Mask<T, N>
 where
     T: MaskElement + PartialOrd,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     #[must_use = "method returns a new Ordering and does not mutate the original value"]
@@ -324,23 +323,23 @@ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
     }
 }
 
-impl<T, const LANES: usize> fmt::Debug for Mask<T, LANES>
+impl<T, const N: usize> fmt::Debug for Mask<T, N>
 where
     T: MaskElement + fmt::Debug,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_list()
-            .entries((0..LANES).map(|i| self.test(i)))
+            .entries((0..N).map(|i| self.test(i)))
             .finish()
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -350,10 +349,10 @@ fn bitand(self, rhs: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitAnd<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd<bool> for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -363,23 +362,23 @@ fn bitand(self, rhs: bool) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitAnd<Mask<T, LANES>> for bool
+impl<T, const N: usize> core::ops::BitAnd<Mask<T, N>> for bool
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Output = Mask<T, LANES>;
+    type Output = Mask<T, N>;
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    fn bitand(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+    fn bitand(self, rhs: Mask<T, N>) -> Mask<T, N> {
         Mask::splat(self) & rhs
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -389,10 +388,10 @@ fn bitor(self, rhs: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitOr<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr<bool> for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -402,23 +401,23 @@ fn bitor(self, rhs: bool) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitOr<Mask<T, LANES>> for bool
+impl<T, const N: usize> core::ops::BitOr<Mask<T, N>> for bool
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Output = Mask<T, LANES>;
+    type Output = Mask<T, N>;
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    fn bitor(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+    fn bitor(self, rhs: Mask<T, N>) -> Mask<T, N> {
         Mask::splat(self) | rhs
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -428,10 +427,10 @@ fn bitxor(self, rhs: Self) -> Self::Output {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitXor<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor<bool> for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -441,25 +440,25 @@ fn bitxor(self, rhs: bool) -> Self::Output {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitXor<Mask<T, LANES>> for bool
+impl<T, const N: usize> core::ops::BitXor<Mask<T, N>> for bool
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Output = Mask<T, LANES>;
+    type Output = Mask<T, N>;
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    fn bitxor(self, rhs: Mask<T, LANES>) -> Self::Output {
+    fn bitxor(self, rhs: Mask<T, N>) -> Self::Output {
         Mask::splat(self) ^ rhs
     }
 }
 
-impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+impl<T, const N: usize> core::ops::Not for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Output = Mask<T, LANES>;
+    type Output = Mask<T, N>;
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn not(self) -> Self::Output {
@@ -467,10 +466,10 @@ fn not(self) -> Self::Output {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitAndAssign for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAndAssign for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn bitand_assign(&mut self, rhs: Self) {
@@ -478,10 +477,10 @@ fn bitand_assign(&mut self, rhs: Self) {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitAndAssign<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAndAssign<bool> for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn bitand_assign(&mut self, rhs: bool) {
@@ -489,10 +488,10 @@ fn bitand_assign(&mut self, rhs: bool) {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitOrAssign for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOrAssign for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn bitor_assign(&mut self, rhs: Self) {
@@ -500,10 +499,10 @@ fn bitor_assign(&mut self, rhs: Self) {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitOrAssign<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOrAssign<bool> for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn bitor_assign(&mut self, rhs: bool) {
@@ -511,10 +510,10 @@ fn bitor_assign(&mut self, rhs: bool) {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitXorAssign for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXorAssign for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn bitxor_assign(&mut self, rhs: Self) {
@@ -522,10 +521,10 @@ fn bitxor_assign(&mut self, rhs: Self) {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitXorAssign<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXorAssign<bool> for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn bitxor_assign(&mut self, rhs: bool) {
@@ -536,12 +535,12 @@ fn bitxor_assign(&mut self, rhs: bool) {
 macro_rules! impl_from {
     { $from:ty  => $($to:ty),* } => {
         $(
-        impl<const LANES: usize> From<Mask<$from, LANES>> for Mask<$to, LANES>
+        impl<const N: usize> From<Mask<$from, N>> for Mask<$to, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
-            fn from(value: Mask<$from, LANES>) -> Self {
+            fn from(value: Mask<$from, N>) -> Self {
                 value.cast()
             }
         }
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index a7df6304bc7..b54912afda5 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -6,25 +6,25 @@
 
 /// A mask where each lane is represented by a single bit.
 #[repr(transparent)]
-pub struct Mask<T, const LANES: usize>(
-    <LaneCount<LANES> as SupportedLaneCount>::BitMask,
+pub struct Mask<T, const N: usize>(
+    <LaneCount<N> as SupportedLaneCount>::BitMask,
     PhantomData<T>,
 )
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount;
+    LaneCount<N>: SupportedLaneCount;
 
-impl<T, const LANES: usize> Copy for Mask<T, LANES>
+impl<T, const N: usize> Copy for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
 }
 
-impl<T, const LANES: usize> Clone for Mask<T, LANES>
+impl<T, const N: usize> Clone for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn clone(&self) -> Self {
@@ -32,10 +32,10 @@ fn clone(&self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+impl<T, const N: usize> PartialEq for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn eq(&self, other: &Self) -> bool {
@@ -43,10 +43,10 @@ fn eq(&self, other: &Self) -> bool {
     }
 }
 
-impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+impl<T, const N: usize> PartialOrd for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -54,17 +54,17 @@ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
     }
 }
 
-impl<T, const LANES: usize> Eq for Mask<T, LANES>
+impl<T, const N: usize> Eq for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
 }
 
-impl<T, const LANES: usize> Ord for Mask<T, LANES>
+impl<T, const N: usize> Ord for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
@@ -72,22 +72,22 @@ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
     }
 }
 
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn splat(value: bool) -> Self {
-        let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
+        let mut mask = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
         if value {
             mask.as_mut().fill(u8::MAX)
         } else {
             mask.as_mut().fill(u8::MIN)
         }
-        if LANES % 8 > 0 {
-            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+        if N % 8 > 0 {
+            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
         }
         Self(mask, PhantomData)
     }
@@ -107,7 +107,7 @@ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
 
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_int(self) -> Simd<T, LANES> {
+    pub fn to_int(self) -> Simd<T, N> {
         unsafe {
             intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
         }
@@ -115,7 +115,7 @@ pub fn to_int(self) -> Simd<T, LANES> {
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+    pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
         unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
     }
 
@@ -140,7 +140,7 @@ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
     #[inline]
     pub fn to_bitmask_integer<U>(self) -> U
     where
-        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+        super::Mask<T, N>: ToBitMask<BitMask = U>,
     {
         // Safety: these are the same types
         unsafe { core::mem::transmute_copy(&self.0) }
@@ -149,7 +149,7 @@ pub fn to_bitmask_integer<U>(self) -> U
     #[inline]
     pub fn from_bitmask_integer<U>(bitmask: U) -> Self
     where
-        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+        super::Mask<T, N>: ToBitMask<BitMask = U>,
     {
         // Safety: these are the same types
         unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
@@ -157,7 +157,7 @@ pub fn from_bitmask_integer<U>(bitmask: U) -> Self
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn convert<U>(self) -> Mask<U, LANES>
+    pub fn convert<U>(self) -> Mask<U, N>
     where
         U: MaskElement,
     {
@@ -178,11 +178,11 @@ pub fn all(self) -> bool {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
-    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+    LaneCount<N>: SupportedLaneCount,
+    <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
 {
     type Output = Self;
     #[inline]
@@ -195,11 +195,11 @@ fn bitand(mut self, rhs: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
-    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+    LaneCount<N>: SupportedLaneCount,
+    <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
 {
     type Output = Self;
     #[inline]
@@ -212,10 +212,10 @@ fn bitor(mut self, rhs: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -228,10 +228,10 @@ fn bitxor(mut self, rhs: Self) -> Self::Output {
     }
 }
 
-impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+impl<T, const N: usize> core::ops::Not for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -240,8 +240,8 @@ fn not(mut self) -> Self::Output {
         for x in self.0.as_mut() {
             *x = !*x;
         }
-        if LANES % 8 > 0 {
-            *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+        if N % 8 > 0 {
+            *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
         }
         self
     }
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 4b36adece71..2aa9272ab46 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -5,22 +5,22 @@
 use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
 
 #[repr(transparent)]
-pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
+pub struct Mask<T, const N: usize>(Simd<T, N>)
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount;
+    LaneCount<N>: SupportedLaneCount;
 
-impl<T, const LANES: usize> Copy for Mask<T, LANES>
+impl<T, const N: usize> Copy for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
 }
 
-impl<T, const LANES: usize> Clone for Mask<T, LANES>
+impl<T, const N: usize> Clone for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
@@ -29,10 +29,10 @@ fn clone(&self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+impl<T, const N: usize> PartialEq for Mask<T, N>
 where
     T: MaskElement + PartialEq,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn eq(&self, other: &Self) -> bool {
@@ -40,10 +40,10 @@ fn eq(&self, other: &Self) -> bool {
     }
 }
 
-impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+impl<T, const N: usize> PartialOrd for Mask<T, N>
 where
     T: MaskElement + PartialOrd,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -51,17 +51,17 @@ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
     }
 }
 
-impl<T, const LANES: usize> Eq for Mask<T, LANES>
+impl<T, const N: usize> Eq for Mask<T, N>
 where
     T: MaskElement + Eq,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
 }
 
-impl<T, const LANES: usize> Ord for Mask<T, LANES>
+impl<T, const N: usize> Ord for Mask<T, N>
 where
     T: MaskElement + Ord,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
@@ -98,10 +98,10 @@ fn reverse_bits(self, n: usize) -> Self {
 
 impl_reverse_bits! { u8, u16, u32, u64 }
 
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
@@ -122,19 +122,19 @@ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
 
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_int(self) -> Simd<T, LANES> {
+    pub fn to_int(self) -> Simd<T, N> {
         self.0
     }
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+    pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
         Self(value)
     }
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn convert<U>(self) -> Mask<U, LANES>
+    pub fn convert<U>(self) -> Mask<U, N>
     where
         U: MaskElement,
     {
@@ -144,18 +144,18 @@ pub fn convert<U>(self) -> Mask<U, LANES>
 
     #[inline]
     #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask_array<const N: usize>(self) -> [u8; N]
+    pub fn to_bitmask_array<const M: usize>(self) -> [u8; M]
     where
-        super::Mask<T, LANES>: ToBitMaskArray,
+        super::Mask<T, N>: ToBitMaskArray,
     {
         // Safety: Bytes is the right size array
         unsafe {
             // Compute the bitmask
-            let bitmask: <super::Mask<T, LANES> as ToBitMaskArray>::BitMaskArray =
+            let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray =
                 intrinsics::simd_bitmask(self.0);
 
             // Transmute to the return type
-            let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask);
+            let mut bitmask: [u8; M] = core::mem::transmute_copy(&bitmask);
 
             // LLVM assumes bit order should match endianness
             if cfg!(target_endian = "big") {
@@ -170,9 +170,9 @@ pub fn convert<U>(self) -> Mask<U, LANES>
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_array<const N: usize>(mut bitmask: [u8; N]) -> Self
+    pub fn from_bitmask_array<const M: usize>(mut bitmask: [u8; M]) -> Self
     where
-        super::Mask<T, LANES>: ToBitMaskArray,
+        super::Mask<T, N>: ToBitMaskArray,
     {
         // Safety: Bytes is the right size array
         unsafe {
@@ -184,7 +184,7 @@ pub fn convert<U>(self) -> Mask<U, LANES>
             }
 
             // Transmute to the bitmask
-            let bitmask: <super::Mask<T, LANES> as ToBitMaskArray>::BitMaskArray =
+            let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray =
                 core::mem::transmute_copy(&bitmask);
 
             // Compute the regular mask
@@ -199,14 +199,14 @@ pub fn convert<U>(self) -> Mask<U, LANES>
     #[inline]
     pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
     where
-        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+        super::Mask<T, N>: ToBitMask<BitMask = U>,
     {
         // Safety: U is required to be the appropriate bitmask type
         let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
 
         // LLVM assumes bit order should match endianness
         if cfg!(target_endian = "big") {
-            bitmask.reverse_bits(LANES)
+            bitmask.reverse_bits(N)
         } else {
             bitmask
         }
@@ -215,11 +215,11 @@ pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
     #[inline]
     pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
     where
-        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+        super::Mask<T, N>: ToBitMask<BitMask = U>,
     {
         // LLVM assumes bit order should match endianness
         let bitmask = if cfg!(target_endian = "big") {
-            bitmask.reverse_bits(LANES)
+            bitmask.reverse_bits(N)
         } else {
             bitmask
         };
@@ -249,21 +249,21 @@ pub fn all(self) -> bool {
     }
 }
 
-impl<T, const LANES: usize> From<Mask<T, LANES>> for Simd<T, LANES>
+impl<T, const N: usize> From<Mask<T, N>> for Simd<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
-    fn from(value: Mask<T, LANES>) -> Self {
+    fn from(value: Mask<T, N>) -> Self {
         value.0
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -274,10 +274,10 @@ fn bitand(self, rhs: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -288,10 +288,10 @@ fn bitor(self, rhs: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
@@ -302,10 +302,10 @@ fn bitxor(self, rhs: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+impl<T, const N: usize> core::ops::Not for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     type Output = Self;
     #[inline]
diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index 382928ac308..06f09c65aca 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -7,10 +7,10 @@ pub trait Sealed {}
 }
 pub use sealed::Sealed;
 
-impl<T, const LANES: usize> Sealed for Mask<T, LANES>
+impl<T, const N: usize> Sealed for Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
 }
 
diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index d1b4a504884..8a1b083f039 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -9,10 +9,10 @@
 mod shift_scalar;
 mod unary;
 
-impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
+impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
 where
     T: SimdElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
     I: core::slice::SliceIndex<[T]>,
 {
     type Output = I::Output;
@@ -22,10 +22,10 @@ fn index(&self, index: I) -> &Self::Output {
     }
 }
 
-impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
+impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
 where
     T: SimdElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
     I: core::slice::SliceIndex<[T]>,
 {
     #[inline]
diff --git a/crates/core_simd/src/ops/assign.rs b/crates/core_simd/src/ops/assign.rs
index d2b48614fc9..0e87785025a 100644
--- a/crates/core_simd/src/ops/assign.rs
+++ b/crates/core_simd/src/ops/assign.rs
@@ -8,7 +8,7 @@
 // Arithmetic
 
 macro_rules! assign_ops {
-    ($(impl<T, U, const LANES: usize> $assignTrait:ident<U> for Simd<T, LANES>
+    ($(impl<T, U, const N: usize> $assignTrait:ident<U> for Simd<T, N>
         where
             Self: $trait:ident,
         {
@@ -16,11 +16,11 @@ fn $assign_call:ident(rhs: U) {
                 $call:ident
             }
         })*) => {
-        $(impl<T, U, const LANES: usize> $assignTrait<U> for Simd<T, LANES>
+        $(impl<T, U, const N: usize> $assignTrait<U> for Simd<T, N>
         where
             Self: $trait<U, Output = Self>,
             T: SimdElement,
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn $assign_call(&mut self, rhs: U) {
@@ -32,7 +32,7 @@ fn $assign_call(&mut self, rhs: U) {
 
 assign_ops! {
     // Arithmetic
-    impl<T, U, const LANES: usize> AddAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> AddAssign<U> for Simd<T, N>
     where
         Self: Add,
     {
@@ -41,7 +41,7 @@ fn add_assign(rhs: U) {
         }
     }
 
-    impl<T, U, const LANES: usize> MulAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> MulAssign<U> for Simd<T, N>
     where
         Self: Mul,
     {
@@ -50,7 +50,7 @@ fn mul_assign(rhs: U) {
         }
     }
 
-    impl<T, U, const LANES: usize> SubAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> SubAssign<U> for Simd<T, N>
     where
         Self: Sub,
     {
@@ -59,7 +59,7 @@ fn sub_assign(rhs: U) {
         }
     }
 
-    impl<T, U, const LANES: usize> DivAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> DivAssign<U> for Simd<T, N>
     where
         Self: Div,
     {
@@ -67,7 +67,7 @@ fn div_assign(rhs: U) {
             div
         }
     }
-    impl<T, U, const LANES: usize> RemAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> RemAssign<U> for Simd<T, N>
     where
         Self: Rem,
     {
@@ -77,7 +77,7 @@ fn rem_assign(rhs: U) {
     }
 
     // Bitops
-    impl<T, U, const LANES: usize> BitAndAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> BitAndAssign<U> for Simd<T, N>
     where
         Self: BitAnd,
     {
@@ -86,7 +86,7 @@ fn bitand_assign(rhs: U) {
         }
     }
 
-    impl<T, U, const LANES: usize> BitOrAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> BitOrAssign<U> for Simd<T, N>
     where
         Self: BitOr,
     {
@@ -95,7 +95,7 @@ fn bitor_assign(rhs: U) {
         }
     }
 
-    impl<T, U, const LANES: usize> BitXorAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> BitXorAssign<U> for Simd<T, N>
     where
         Self: BitXor,
     {
@@ -104,7 +104,7 @@ fn bitxor_assign(rhs: U) {
         }
     }
 
-    impl<T, U, const LANES: usize> ShlAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> ShlAssign<U> for Simd<T, N>
     where
         Self: Shl,
     {
@@ -113,7 +113,7 @@ fn shl_assign(rhs: U) {
         }
     }
 
-    impl<T, U, const LANES: usize> ShrAssign<U> for Simd<T, LANES>
+    impl<T, U, const N: usize> ShrAssign<U> for Simd<T, N>
     where
         Self: Shr,
     {
diff --git a/crates/core_simd/src/ops/deref.rs b/crates/core_simd/src/ops/deref.rs
index 302bf148bd3..89a60ba1141 100644
--- a/crates/core_simd/src/ops/deref.rs
+++ b/crates/core_simd/src/ops/deref.rs
@@ -5,16 +5,16 @@
 use super::*;
 
 macro_rules! deref_lhs {
-    (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+    (impl<T, const N: usize> $trait:ident for $simd:ty {
             fn $call:ident
         }) => {
-        impl<T, const LANES: usize> $trait<$simd> for &$simd
+        impl<T, const N: usize> $trait<$simd> for &$simd
         where
             T: SimdElement,
             $simd: $trait<$simd, Output = $simd>,
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
-            type Output = Simd<T, LANES>;
+            type Output = Simd<T, N>;
 
             #[inline]
             #[must_use = "operator returns a new vector without mutating the inputs"]
@@ -26,16 +26,16 @@ fn $call(self, rhs: $simd) -> Self::Output {
 }
 
 macro_rules! deref_rhs {
-    (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+    (impl<T, const N: usize> $trait:ident for $simd:ty {
             fn $call:ident
         }) => {
-        impl<T, const LANES: usize> $trait<&$simd> for $simd
+        impl<T, const N: usize> $trait<&$simd> for $simd
         where
             T: SimdElement,
             $simd: $trait<$simd, Output = $simd>,
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
-            type Output = Simd<T, LANES>;
+            type Output = Simd<T, N>;
 
             #[inline]
             #[must_use = "operator returns a new vector without mutating the inputs"]
@@ -47,25 +47,25 @@ fn $call(self, rhs: &$simd) -> Self::Output {
 }
 
 macro_rules! deref_ops {
-    ($(impl<T, const LANES: usize> $trait:ident for $simd:ty {
+    ($(impl<T, const N: usize> $trait:ident for $simd:ty {
             fn $call:ident
         })*) => {
         $(
             deref_rhs! {
-                impl<T, const LANES: usize> $trait for $simd {
+                impl<T, const N: usize> $trait for $simd {
                     fn $call
                 }
             }
             deref_lhs! {
-                impl<T, const LANES: usize> $trait for $simd {
+                impl<T, const N: usize> $trait for $simd {
                     fn $call
                 }
             }
-            impl<'lhs, 'rhs, T, const LANES: usize> $trait<&'rhs $simd> for &'lhs $simd
+            impl<'lhs, 'rhs, T, const N: usize> $trait<&'rhs $simd> for &'lhs $simd
             where
                 T: SimdElement,
                 $simd: $trait<$simd, Output = $simd>,
-                LaneCount<LANES>: SupportedLaneCount,
+                LaneCount<N>: SupportedLaneCount,
             {
                 type Output = $simd;
 
@@ -81,44 +81,44 @@ fn $call(self, rhs: &'rhs $simd) -> Self::Output {
 
 deref_ops! {
     // Arithmetic
-    impl<T, const LANES: usize> Add for Simd<T, LANES> {
+    impl<T, const N: usize> Add for Simd<T, N> {
         fn add
     }
 
-    impl<T, const LANES: usize> Mul for Simd<T, LANES> {
+    impl<T, const N: usize> Mul for Simd<T, N> {
         fn mul
     }
 
-    impl<T, const LANES: usize> Sub for Simd<T, LANES> {
+    impl<T, const N: usize> Sub for Simd<T, N> {
         fn sub
     }
 
-    impl<T, const LANES: usize> Div for Simd<T, LANES> {
+    impl<T, const N: usize> Div for Simd<T, N> {
         fn div
     }
 
-    impl<T, const LANES: usize> Rem for Simd<T, LANES> {
+    impl<T, const N: usize> Rem for Simd<T, N> {
         fn rem
     }
 
     // Bitops
-    impl<T, const LANES: usize> BitAnd for Simd<T, LANES> {
+    impl<T, const N: usize> BitAnd for Simd<T, N> {
         fn bitand
     }
 
-    impl<T, const LANES: usize> BitOr for Simd<T, LANES> {
+    impl<T, const N: usize> BitOr for Simd<T, N> {
         fn bitor
     }
 
-    impl<T, const LANES: usize> BitXor for Simd<T, LANES> {
+    impl<T, const N: usize> BitXor for Simd<T, N> {
         fn bitxor
     }
 
-    impl<T, const LANES: usize> Shl for Simd<T, LANES> {
+    impl<T, const N: usize> Shl for Simd<T, N> {
         fn shl
     }
 
-    impl<T, const LANES: usize> Shr for Simd<T, LANES> {
+    impl<T, const N: usize> Shr for Simd<T, N> {
         fn shr
     }
 }
diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs
index 4ad02215034..a651aa73e95 100644
--- a/crates/core_simd/src/ops/unary.rs
+++ b/crates/core_simd/src/ops/unary.rs
@@ -3,11 +3,11 @@
 use core::ops::{Neg, Not}; // unary ops
 
 macro_rules! neg {
-    ($(impl<const LANES: usize> Neg for Simd<$scalar:ty, LANES>)*) => {
-        $(impl<const LANES: usize> Neg for Simd<$scalar, LANES>
+    ($(impl<const N: usize> Neg for Simd<$scalar:ty, N>)*) => {
+        $(impl<const N: usize> Neg for Simd<$scalar, N>
         where
             $scalar: SimdElement,
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             type Output = Self;
 
@@ -22,27 +22,27 @@ fn neg(self) -> Self::Output {
 }
 
 neg! {
-    impl<const LANES: usize> Neg for Simd<f32, LANES>
+    impl<const N: usize> Neg for Simd<f32, N>
 
-    impl<const LANES: usize> Neg for Simd<f64, LANES>
+    impl<const N: usize> Neg for Simd<f64, N>
 
-    impl<const LANES: usize> Neg for Simd<i8, LANES>
+    impl<const N: usize> Neg for Simd<i8, N>
 
-    impl<const LANES: usize> Neg for Simd<i16, LANES>
+    impl<const N: usize> Neg for Simd<i16, N>
 
-    impl<const LANES: usize> Neg for Simd<i32, LANES>
+    impl<const N: usize> Neg for Simd<i32, N>
 
-    impl<const LANES: usize> Neg for Simd<i64, LANES>
+    impl<const N: usize> Neg for Simd<i64, N>
 
-    impl<const LANES: usize> Neg for Simd<isize, LANES>
+    impl<const N: usize> Neg for Simd<isize, N>
 }
 
 macro_rules! not {
-    ($(impl<const LANES: usize> Not for Simd<$scalar:ty, LANES>)*) => {
-        $(impl<const LANES: usize> Not for Simd<$scalar, LANES>
+    ($(impl<const N: usize> Not for Simd<$scalar:ty, N>)*) => {
+        $(impl<const N: usize> Not for Simd<$scalar, N>
         where
             $scalar: SimdElement,
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             type Output = Self;
 
@@ -56,23 +56,23 @@ fn not(self) -> Self::Output {
 }
 
 not! {
-    impl<const LANES: usize> Not for Simd<i8, LANES>
+    impl<const N: usize> Not for Simd<i8, N>
 
-    impl<const LANES: usize> Not for Simd<i16, LANES>
+    impl<const N: usize> Not for Simd<i16, N>
 
-    impl<const LANES: usize> Not for Simd<i32, LANES>
+    impl<const N: usize> Not for Simd<i32, N>
 
-    impl<const LANES: usize> Not for Simd<i64, LANES>
+    impl<const N: usize> Not for Simd<i64, N>
 
-    impl<const LANES: usize> Not for Simd<isize, LANES>
+    impl<const N: usize> Not for Simd<isize, N>
 
-    impl<const LANES: usize> Not for Simd<u8, LANES>
+    impl<const N: usize> Not for Simd<u8, N>
 
-    impl<const LANES: usize> Not for Simd<u16, LANES>
+    impl<const N: usize> Not for Simd<u16, N>
 
-    impl<const LANES: usize> Not for Simd<u32, LANES>
+    impl<const N: usize> Not for Simd<u32, N>
 
-    impl<const LANES: usize> Not for Simd<u64, LANES>
+    impl<const N: usize> Not for Simd<u64, N>
 
-    impl<const LANES: usize> Not for Simd<usize, LANES>
+    impl<const N: usize> Not for Simd<usize, N>
 }
diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs
index a1a26032e87..2345f53a0de 100644
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@@ -1,10 +1,10 @@
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
 
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
 where
     T: MaskElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     /// Choose elements from two vectors.
     ///
@@ -25,9 +25,9 @@ impl<T, const LANES: usize> Mask<T, LANES>
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
     pub fn select<U>(
         self,
-        true_values: Simd<U, LANES>,
-        false_values: Simd<U, LANES>,
-    ) -> Simd<U, LANES>
+        true_values: Simd<U, N>,
+        false_values: Simd<U, N>,
+    ) -> Simd<U, N>
     where
         U: SimdElement<Mask = T>,
     {
diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs
index 0ca0401b7ed..f132fa2cc0c 100644
--- a/crates/core_simd/src/simd/cmp/eq.rs
+++ b/crates/core_simd/src/simd/cmp/eq.rs
@@ -21,11 +21,11 @@ pub trait SimdPartialEq {
 macro_rules! impl_number {
     { $($number:ty),* } => {
         $(
-        impl<const LANES: usize> SimdPartialEq for Simd<$number, LANES>
+        impl<const N: usize> SimdPartialEq for Simd<$number, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
-            type Mask = Mask<<$number as SimdElement>::Mask, LANES>;
+            type Mask = Mask<<$number as SimdElement>::Mask, N>;
 
             #[inline]
             fn simd_eq(self, other: Self) -> Self::Mask {
@@ -50,9 +50,9 @@ fn simd_ne(self, other: Self) -> Self::Mask {
 macro_rules! impl_mask {
     { $($integer:ty),* } => {
         $(
-        impl<const LANES: usize> SimdPartialEq for Mask<$integer, LANES>
+        impl<const N: usize> SimdPartialEq for Mask<$integer, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             type Mask = Self;
 
@@ -76,11 +76,11 @@ fn simd_ne(self, other: Self) -> Self::Mask {
 
 impl_mask! { i8, i16, i32, i64, isize }
 
-impl<T, const LANES: usize> SimdPartialEq for Simd<*const T, LANES>
+impl<T, const N: usize> SimdPartialEq for Simd<*const T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Mask = Mask<isize, LANES>;
+    type Mask = Mask<isize, N>;
 
     #[inline]
     fn simd_eq(self, other: Self) -> Self::Mask {
@@ -93,11 +93,11 @@ fn simd_ne(self, other: Self) -> Self::Mask {
     }
 }
 
-impl<T, const LANES: usize> SimdPartialEq for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdPartialEq for Simd<*mut T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Mask = Mask<isize, LANES>;
+    type Mask = Mask<isize, N>;
 
     #[inline]
     fn simd_eq(self, other: Self) -> Self::Mask {
diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs
index 667eb00e111..4e9d49ea221 100644
--- a/crates/core_simd/src/simd/cmp/ord.rs
+++ b/crates/core_simd/src/simd/cmp/ord.rs
@@ -49,9 +49,9 @@ pub trait SimdOrd: SimdPartialOrd {
 macro_rules! impl_integer {
     { $($integer:ty),* } => {
         $(
-        impl<const LANES: usize> SimdPartialOrd for Simd<$integer, LANES>
+        impl<const N: usize> SimdPartialOrd for Simd<$integer, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn simd_lt(self, other: Self) -> Self::Mask {
@@ -82,9 +82,9 @@ fn simd_ge(self, other: Self) -> Self::Mask {
             }
         }
 
-        impl<const LANES: usize> SimdOrd for Simd<$integer, LANES>
+        impl<const N: usize> SimdOrd for Simd<$integer, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn simd_max(self, other: Self) -> Self {
@@ -115,9 +115,9 @@ fn simd_clamp(self, min: Self, max: Self) -> Self {
 macro_rules! impl_float {
     { $($float:ty),* } => {
         $(
-        impl<const LANES: usize> SimdPartialOrd for Simd<$float, LANES>
+        impl<const N: usize> SimdPartialOrd for Simd<$float, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn simd_lt(self, other: Self) -> Self::Mask {
@@ -156,9 +156,9 @@ fn simd_ge(self, other: Self) -> Self::Mask {
 macro_rules! impl_mask {
     { $($integer:ty),* } => {
         $(
-        impl<const LANES: usize> SimdPartialOrd for Mask<$integer, LANES>
+        impl<const N: usize> SimdPartialOrd for Mask<$integer, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn simd_lt(self, other: Self) -> Self::Mask {
@@ -189,9 +189,9 @@ fn simd_ge(self, other: Self) -> Self::Mask {
             }
         }
 
-        impl<const LANES: usize> SimdOrd for Mask<$integer, LANES>
+        impl<const N: usize> SimdOrd for Mask<$integer, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             #[inline]
             fn simd_max(self, other: Self) -> Self {
@@ -219,9 +219,9 @@ fn simd_clamp(self, min: Self, max: Self) -> Self {
 
 impl_mask! { i8, i16, i32, i64, isize }
 
-impl<T, const LANES: usize> SimdPartialOrd for Simd<*const T, LANES>
+impl<T, const N: usize> SimdPartialOrd for Simd<*const T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn simd_lt(self, other: Self) -> Self::Mask {
@@ -244,9 +244,9 @@ fn simd_ge(self, other: Self) -> Self::Mask {
     }
 }
 
-impl<T, const LANES: usize> SimdOrd for Simd<*const T, LANES>
+impl<T, const N: usize> SimdOrd for Simd<*const T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn simd_max(self, other: Self) -> Self {
@@ -269,9 +269,9 @@ fn simd_clamp(self, min: Self, max: Self) -> Self {
     }
 }
 
-impl<T, const LANES: usize> SimdPartialOrd for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdPartialOrd for Simd<*mut T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn simd_lt(self, other: Self) -> Self::Mask {
@@ -294,9 +294,9 @@ fn simd_ge(self, other: Self) -> Self::Mask {
     }
 }
 
-impl<T, const LANES: usize> SimdOrd for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdOrd for Simd<*mut T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     #[inline]
     fn simd_max(self, other: Self) -> Self {
diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs
index e8378c3147a..fc0b99e87a6 100644
--- a/crates/core_simd/src/simd/num/float.rs
+++ b/crates/core_simd/src/simd/num/float.rs
@@ -240,20 +240,20 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
 macro_rules! impl_trait {
     { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => {
         $(
-        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        impl<const N: usize> Sealed for Simd<$ty, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
         }
 
-        impl<const LANES: usize> SimdFloat for Simd<$ty, LANES>
+        impl<const N: usize> SimdFloat for Simd<$ty, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
-            type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>;
+            type Mask = Mask<<$mask_ty as SimdElement>::Mask, N>;
             type Scalar = $ty;
-            type Bits = Simd<$bits_ty, LANES>;
-            type Cast<T: SimdElement> = Simd<T, LANES>;
+            type Bits = Simd<$bits_ty, N>;
+            type Cast<T: SimdElement> = Simd<T, N>;
 
             #[inline]
             fn cast<T: SimdCast>(self) -> Self::Cast<T>
@@ -273,14 +273,14 @@ unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
             }
 
             #[inline]
-            fn to_bits(self) -> Simd<$bits_ty, LANES> {
+            fn to_bits(self) -> Simd<$bits_ty, N> {
                 assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
                 // Safety: transmuting between vector types is safe
                 unsafe { core::mem::transmute_copy(&self) }
             }
 
             #[inline]
-            fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
+            fn from_bits(bits: Simd<$bits_ty, N>) -> Self {
                 assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
                 // Safety: transmuting between vector types is safe
                 unsafe { core::mem::transmute_copy(&bits) }
diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs
index 5b0df597b24..1f1aa272782 100644
--- a/crates/core_simd/src/simd/num/int.rs
+++ b/crates/core_simd/src/simd/num/int.rs
@@ -219,20 +219,20 @@ pub trait SimdInt: Copy + Sealed {
 macro_rules! impl_trait {
     { $($ty:ident ($unsigned:ident)),* } => {
         $(
-        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        impl<const N: usize> Sealed for Simd<$ty, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
         }
 
-        impl<const LANES: usize> SimdInt for Simd<$ty, LANES>
+        impl<const N: usize> SimdInt for Simd<$ty, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
-            type Mask = Mask<<$ty as SimdElement>::Mask, LANES>;
+            type Mask = Mask<<$ty as SimdElement>::Mask, N>;
             type Scalar = $ty;
-            type Unsigned = Simd<$unsigned, LANES>;
-            type Cast<T: SimdElement> = Simd<T, LANES>;
+            type Unsigned = Simd<$unsigned, N>;
+            type Cast<T: SimdElement> = Simd<T, N>;
 
             #[inline]
             fn cast<T: SimdCast>(self) -> Self::Cast<T> {
diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs
index 9c91e147a7a..c955ee8fe8b 100644
--- a/crates/core_simd/src/simd/num/uint.rs
+++ b/crates/core_simd/src/simd/num/uint.rs
@@ -101,18 +101,18 @@ pub trait SimdUint: Copy + Sealed {
 macro_rules! impl_trait {
     { $($ty:ident ($signed:ident)),* } => {
         $(
-        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        impl<const N: usize> Sealed for Simd<$ty, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
         }
 
-        impl<const LANES: usize> SimdUint for Simd<$ty, LANES>
+        impl<const N: usize> SimdUint for Simd<$ty, N>
         where
-            LaneCount<LANES>: SupportedLaneCount,
+            LaneCount<N>: SupportedLaneCount,
         {
             type Scalar = $ty;
-            type Cast<T: SimdElement> = Simd<T, LANES>;
+            type Cast<T: SimdElement> = Simd<T, N>;
 
             #[inline]
             fn cast<T: SimdCast>(self) -> Self::Cast<T> {
diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs
index 246fc7ee381..dd4525e991b 100644
--- a/crates/core_simd/src/simd/ptr/const_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -77,20 +77,20 @@ pub trait SimdConstPtr: Copy + Sealed {
     fn wrapping_sub(self, count: Self::Usize) -> Self;
 }
 
-impl<T, const LANES: usize> Sealed for Simd<*const T, LANES> where
-    LaneCount<LANES>: SupportedLaneCount
+impl<T, const N: usize> Sealed for Simd<*const T, N> where
+    LaneCount<N>: SupportedLaneCount
 {
 }
 
-impl<T, const LANES: usize> SimdConstPtr for Simd<*const T, LANES>
+impl<T, const N: usize> SimdConstPtr for Simd<*const T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Usize = Simd<usize, LANES>;
-    type Isize = Simd<isize, LANES>;
-    type CastPtr<U> = Simd<*const U, LANES>;
-    type MutPtr = Simd<*mut T, LANES>;
-    type Mask = Mask<isize, LANES>;
+    type Usize = Simd<usize, N>;
+    type Isize = Simd<isize, N>;
+    type CastPtr<U> = Simd<*const U, N>;
+    type MutPtr = Simd<*mut T, N>;
+    type Mask = Mask<isize, N>;
 
     #[inline]
     fn is_null(self) -> Self::Mask {
diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs
index 69c927eb11a..8cdec74dda3 100644
--- a/crates/core_simd/src/simd/ptr/mut_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -74,18 +74,18 @@ pub trait SimdMutPtr: Copy + Sealed {
     fn wrapping_sub(self, count: Self::Usize) -> Self;
 }
 
-impl<T, const LANES: usize> Sealed for Simd<*mut T, LANES> where LaneCount<LANES>: SupportedLaneCount
+impl<T, const N: usize> Sealed for Simd<*mut T, N> where LaneCount<N>: SupportedLaneCount
 {}
 
-impl<T, const LANES: usize> SimdMutPtr for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N>
 where
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
-    type Usize = Simd<usize, LANES>;
-    type Isize = Simd<isize, LANES>;
-    type CastPtr<U> = Simd<*mut U, LANES>;
-    type ConstPtr = Simd<*const T, LANES>;
-    type Mask = Mask<isize, LANES>;
+    type Usize = Simd<usize, N>;
+    type Isize = Simd<isize, N>;
+    type CastPtr<U> = Simd<*mut U, N>;
+    type ConstPtr = Simd<*const T, N>;
+    type Mask = Mask<isize, N>;
 
     #[inline]
     fn is_null(self) -> Self::Mask {
diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index cfd2cac576a..6af882c0a0e 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -179,10 +179,10 @@ fn concat_swizzle_mask<T, const M: usize>(first: Mask<T, M>, second: Mask<T, M>)
     }
 }
 
-impl<T, const LANES: usize> Simd<T, LANES>
+impl<T, const N: usize> Simd<T, N>
 where
     T: SimdElement,
-    LaneCount<LANES>: SupportedLaneCount,
+    LaneCount<N>: SupportedLaneCount,
 {
     /// Reverse the order of the elements in the vector.
     #[inline]

From 4f7b0252e0aa503326f85cb4b59cae0bb5f3e166 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 20 Oct 2023 21:07:18 -0400
Subject: [PATCH 49/59] Fix formatting

---
 crates/core_simd/src/select.rs             | 6 +-----
 crates/core_simd/src/simd/ptr/const_ptr.rs | 5 +----
 crates/core_simd/src/simd/ptr/mut_ptr.rs   | 3 +--
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs
index 2345f53a0de..cdcf8eeec81 100644
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@@ -23,11 +23,7 @@ impl<T, const N: usize> Mask<T, N>
     /// ```
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn select<U>(
-        self,
-        true_values: Simd<U, N>,
-        false_values: Simd<U, N>,
-    ) -> Simd<U, N>
+    pub fn select<U>(self, true_values: Simd<U, N>, false_values: Simd<U, N>) -> Simd<U, N>
     where
         U: SimdElement<Mask = T>,
     {
diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs
index dd4525e991b..97fe3fb600d 100644
--- a/crates/core_simd/src/simd/ptr/const_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -77,10 +77,7 @@ pub trait SimdConstPtr: Copy + Sealed {
     fn wrapping_sub(self, count: Self::Usize) -> Self;
 }
 
-impl<T, const N: usize> Sealed for Simd<*const T, N> where
-    LaneCount<N>: SupportedLaneCount
-{
-}
+impl<T, const N: usize> Sealed for Simd<*const T, N> where LaneCount<N>: SupportedLaneCount {}
 
 impl<T, const N: usize> SimdConstPtr for Simd<*const T, N>
 where
diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs
index 8cdec74dda3..e35633d0433 100644
--- a/crates/core_simd/src/simd/ptr/mut_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -74,8 +74,7 @@ pub trait SimdMutPtr: Copy + Sealed {
     fn wrapping_sub(self, count: Self::Usize) -> Self;
 }
 
-impl<T, const N: usize> Sealed for Simd<*mut T, N> where LaneCount<N>: SupportedLaneCount
-{}
+impl<T, const N: usize> Sealed for Simd<*mut T, N> where LaneCount<N>: SupportedLaneCount {}
 
 impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N>
 where

From 56b6ee01f89fdd8ed8c608ae29e0ae665761f811 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 20 Oct 2023 21:15:28 -0400
Subject: [PATCH 50/59] Fix test import

---
 crates/core_simd/src/vector.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index befbd71359c..6b7c7f1436a 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -120,7 +120,9 @@ impl<T, const N: usize> Simd<T, N>
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::u32x4;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::u32x4;
     /// let v = u32x4::splat(0);
     /// assert_eq!(v.len(), 4);
     /// ```
@@ -135,7 +137,9 @@ pub const fn len(&self) -> usize {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::u32x4;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::u32x4;
     /// let v = u32x4::splat(8);
     /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
     /// ```

From d06dc5c269413371482f34d5444ed857d71c9e1c Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 20 Oct 2023 21:28:19 -0400
Subject: [PATCH 51/59] Fix bitmasks

---
 crates/core_simd/src/masks/bitmask.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index b54912afda5..aaae28a07be 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -121,8 +121,8 @@ pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
 
     #[inline]
     #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] {
-        assert!(core::mem::size_of::<Self>() == N);
+    pub fn to_bitmask_array<const M: usize>(self) -> [u8; M] {
+        assert!(core::mem::size_of::<Self>() == M);
 
         // Safety: converting an integer to an array of bytes of the same size is safe
         unsafe { core::mem::transmute_copy(&self.0) }
@@ -130,8 +130,8 @@ pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_array<const N: usize>(bitmask: [u8; N]) -> Self {
-        assert!(core::mem::size_of::<Self>() == N);
+    pub fn from_bitmask_array<const M: usize>(bitmask: [u8; M]) -> Self {
+        assert!(core::mem::size_of::<Self>() == M);
 
         // Safety: converting an array of bytes to an integer of the same size is safe
         Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)

From 4cc260e497114b4bae3c095eddaf58dfc07dea68 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 23 Oct 2023 11:03:05 -0400
Subject: [PATCH 52/59] Update crates/core_simd/src/masks.rs

Co-authored-by: Jacob Lifshay <programmerjake@gmail.com>
---
 crates/core_simd/src/masks.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index bbfd6567cbf..1199153a5bd 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -202,7 +202,7 @@ pub fn cast<U: MaskElement>(self) -> Mask<U, N> {
     /// Tests the value of the specified element.
     ///
     /// # Safety
-    /// `element` must be less than `self.len()`.
+    /// `index` must be less than `self.len()`.
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub unsafe fn test_unchecked(&self, index: usize) -> bool {

From 8d9bcda64cfe5f4dd172620d5d0eacadbdb13751 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 17 Nov 2023 00:48:35 -0500
Subject: [PATCH 53/59] Fix or silence lints

---
 crates/core_simd/examples/nbody.rs    |  1 +
 crates/core_simd/src/mod.rs           |  1 -
 crates/core_simd/src/to_bytes.rs      | 10 ++++++++--
 crates/core_simd/src/vector.rs        |  1 +
 crates/core_simd/tests/ops_macros.rs  |  2 ++
 crates/core_simd/tests/swizzle_dyn.rs |  1 -
 6 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs
index 154e24c460e..65820d1340b 100644
--- a/crates/core_simd/examples/nbody.rs
+++ b/crates/core_simd/examples/nbody.rs
@@ -1,4 +1,5 @@
 #![feature(portable_simd)]
+#![allow(clippy::excessive_precision)]
 extern crate std_float;
 
 /// Benchmarks game nbody code
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index 6fd458d24e7..fd016f1c6f7 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -34,7 +34,6 @@ pub mod simd {
     pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
     pub use crate::core_simd::masks::*;
     pub use crate::core_simd::swizzle::*;
-    pub use crate::core_simd::swizzle_dyn::*;
     pub use crate::core_simd::to_bytes::ToBytes;
     pub use crate::core_simd::vector::*;
 }
diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index dd01929551c..222526c4ab3 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -68,7 +68,10 @@ impl ToBytes for Simd<$ty, $elems> {
             #[inline]
             fn to_ne_bytes(self) -> Self::Bytes {
                 // Safety: transmuting between vectors is safe
-                unsafe { core::mem::transmute(self) }
+                unsafe {
+                    #![allow(clippy::useless_transmute)]
+                    core::mem::transmute(self)
+                }
             }
 
             #[inline]
@@ -90,7 +93,10 @@ fn to_le_bytes(mut self) -> Self::Bytes {
             #[inline]
             fn from_ne_bytes(bytes: Self::Bytes) -> Self {
                 // Safety: transmuting between vectors is safe
-                unsafe { core::mem::transmute(bytes) }
+                unsafe {
+                    #![allow(clippy::useless_transmute)]
+                    core::mem::transmute(bytes)
+                }
             }
 
             #[inline]
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 6b7c7f1436a..105c06741c5 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -127,6 +127,7 @@ impl<T, const N: usize> Simd<T, N>
     /// assert_eq!(v.len(), 4);
     /// ```
     #[inline]
+    #[allow(clippy::len_without_is_empty)]
     pub const fn len(&self) -> usize {
         Self::LEN
     }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 50faba04991..aa565a13752 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -68,6 +68,7 @@ mod $fn {
 
             test_helpers::test_lanes! {
                 fn normal<const LANES: usize>() {
+                    #![allow(clippy::redundant_closure_call)]
                     test_helpers::test_binary_elementwise(
                         &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                         &$scalar_fn,
@@ -76,6 +77,7 @@ fn normal<const LANES: usize>() {
                 }
 
                 fn assign<const LANES: usize>() {
+                    #![allow(clippy::redundant_closure_call)]
                     test_helpers::test_binary_elementwise(
                         &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
                         &$scalar_fn,
diff --git a/crates/core_simd/tests/swizzle_dyn.rs b/crates/core_simd/tests/swizzle_dyn.rs
index 646cd5f3383..f21a937f01c 100644
--- a/crates/core_simd/tests/swizzle_dyn.rs
+++ b/crates/core_simd/tests/swizzle_dyn.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
 use core::{fmt, ops::RangeInclusive};
-use proptest;
 use test_helpers::{self, biteq, make_runner, prop_assert_biteq};
 
 fn swizzle_dyn_scalar_ver<const N: usize>(values: [u8; N], idxs: [u8; N]) -> [u8; N] {

From 4ca9f04db5e9d551d69ffa27357a44435fd3af98 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 17 Nov 2023 00:25:16 -0500
Subject: [PATCH 54/59] Simplify bitmasks

---
 crates/core_simd/src/masks.rs            |  42 ++++++++-
 crates/core_simd/src/masks/bitmask.rs    |  69 +++++++++-----
 crates/core_simd/src/masks/full_masks.rs |  97 +++++++++++---------
 crates/core_simd/src/masks/to_bitmask.rs | 111 -----------------------
 crates/core_simd/src/swizzle.rs          |  35 +++++++
 crates/core_simd/tests/masks.rs          |  18 ++--
 6 files changed, 183 insertions(+), 189 deletions(-)
 delete mode 100644 crates/core_simd/src/masks/to_bitmask.rs

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 1199153a5bd..5c0ae303162 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -12,9 +12,6 @@
 )]
 mod mask_impl;
 
-mod to_bitmask;
-pub use to_bitmask::{ToBitMask, ToBitMaskArray};
-
 use crate::simd::{
     cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount,
 };
@@ -262,6 +259,45 @@ pub fn any(self) -> bool {
     pub fn all(self) -> bool {
         self.0.all()
     }
+
+    /// Create a bitmask from a mask.
+    ///
+    /// Each bit is set if the corresponding element in the mask is `true`.
+    /// If the mask contains more than 64 elements, the bitmask is truncated to the first 64.
+    #[inline]
+    #[must_use = "method returns a new integer and does not mutate the original value"]
+    pub fn to_bitmask(self) -> u64 {
+        self.0.to_bitmask_integer()
+    }
+
+    /// Create a mask from a bitmask.
+    ///
+    /// For each bit, if it is set, the corresponding element in the mask is set to `true`.
+    /// If the mask contains more than 64 elements, the remainder are set to `false`.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn from_bitmask(bitmask: u64) -> Self {
+        Self(mask_impl::Mask::from_bitmask_integer(bitmask))
+    }
+
+    /// Create a bitmask vector from a mask.
+    ///
+    /// Each bit is set if the corresponding element in the mask is `true`.
+    /// The remaining bits are unset.
+    #[inline]
+    #[must_use = "method returns a new integer and does not mutate the original value"]
+    pub fn to_bitmask_vector(self) -> Simd<T, N> {
+        self.0.to_bitmask_vector()
+    }
+
+    /// Create a mask from a bitmask vector.
+    ///
+    /// For each bit, if it is set, the corresponding element in the mask is set to `true`.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self {
+        Self(mask_impl::Mask::from_bitmask_vector(bitmask))
+    }
 }
 
 // vector/array conversion
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index aaae28a07be..21d9e49a1b5 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
 use core::marker::PhantomData;
 
 /// A mask where each lane is represented by a single bit.
@@ -120,39 +120,64 @@ pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
     }
 
     #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask_array<const M: usize>(self) -> [u8; M] {
-        assert!(core::mem::size_of::<Self>() == M);
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    pub fn to_bitmask_vector(self) -> Simd<T, N> {
+        let mut bitmask = Self::splat(false).to_int();
 
-        // Safety: converting an integer to an array of bytes of the same size is safe
-        unsafe { core::mem::transmute_copy(&self.0) }
+        assert!(
+            core::mem::size_of::<Simd<T, N>>()
+                >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
+        );
+
+        // Safety: the bitmask vector is big enough to hold the bitmask
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                self.0.as_ref().as_ptr(),
+                bitmask.as_mut_array().as_mut_ptr() as _,
+                self.0.as_ref().len(),
+            );
+        }
+
+        bitmask
     }
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_array<const M: usize>(bitmask: [u8; M]) -> Self {
-        assert!(core::mem::size_of::<Self>() == M);
+    pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self {
+        let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
 
-        // Safety: converting an array of bytes to an integer of the same size is safe
-        Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+        assert!(
+            core::mem::size_of::<Simd<T, N>>()
+                >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
+        );
+
+        // Safety: the bitmask vector is big enough to hold the bitmask
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                bitmask.as_array().as_ptr() as _,
+                bytes.as_mut().as_mut_ptr(),
+                bytes.as_ref().len(),
+            );
+        }
+
+        Self(bytes, PhantomData)
     }
 
     #[inline]
-    pub fn to_bitmask_integer<U>(self) -> U
-    where
-        super::Mask<T, N>: ToBitMask<BitMask = U>,
-    {
-        // Safety: these are the same types
-        unsafe { core::mem::transmute_copy(&self.0) }
+    pub fn to_bitmask_integer(self) -> u64 {
+        let mut bitmask = [0u8; 8];
+        bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
+        u64::from_ne_bytes(bitmask)
     }
 
     #[inline]
-    pub fn from_bitmask_integer<U>(bitmask: U) -> Self
-    where
-        super::Mask<T, N>: ToBitMask<BitMask = U>,
-    {
-        // Safety: these are the same types
-        unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
+    pub fn from_bitmask_integer(bitmask: u64) -> Self {
+        let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
+        let len = bytes.as_mut().len();
+        bytes
+            .as_mut()
+            .copy_from_slice(&bitmask.to_ne_bytes()[..len]);
+        Self(bytes, PhantomData)
     }
 
     #[inline]
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 2aa9272ab46..73a0d898700 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -1,8 +1,7 @@
 //! Masks that take up full SIMD vector registers.
 
-use super::{to_bitmask::ToBitMaskArray, MaskElement};
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount};
 
 #[repr(transparent)]
 pub struct Mask<T, const N: usize>(Simd<T, N>)
@@ -143,53 +142,64 @@ pub fn convert<U>(self) -> Mask<U, N>
     }
 
     #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask_array<const M: usize>(self) -> [u8; M]
-    where
-        super::Mask<T, N>: ToBitMaskArray,
-    {
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    pub fn to_bitmask_vector(self) -> Simd<T, N> {
+        let mut bitmask = Self::splat(false).to_int();
+
         // Safety: Bytes is the right size array
         unsafe {
             // Compute the bitmask
-            let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray =
+            let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask =
                 intrinsics::simd_bitmask(self.0);
 
-            // Transmute to the return type
-            let mut bitmask: [u8; M] = core::mem::transmute_copy(&bitmask);
-
             // LLVM assumes bit order should match endianness
             if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
-                    *x = x.reverse_bits();
+                for x in bytes.as_mut() {
+                    *x = x.reverse_bits()
                 }
-            };
+            }
 
-            bitmask
+            assert!(
+                core::mem::size_of::<Simd<T, N>>()
+                    >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
+            );
+            core::ptr::copy_nonoverlapping(
+                bytes.as_ref().as_ptr(),
+                bitmask.as_mut_array().as_mut_ptr() as _,
+                bytes.as_ref().len(),
+            );
         }
+
+        bitmask
     }
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_array<const M: usize>(mut bitmask: [u8; M]) -> Self
-    where
-        super::Mask<T, N>: ToBitMaskArray,
-    {
+    pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self {
+        let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
+
         // Safety: Bytes is the right size array
         unsafe {
+            assert!(
+                core::mem::size_of::<Simd<T, N>>()
+                    >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
+            );
+            core::ptr::copy_nonoverlapping(
+                bitmask.as_array().as_ptr() as _,
+                bytes.as_mut().as_mut_ptr(),
+                bytes.as_mut().len(),
+            );
+
             // LLVM assumes bit order should match endianness
             if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
+                for x in bytes.as_mut() {
                     *x = x.reverse_bits();
                 }
             }
 
-            // Transmute to the bitmask
-            let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray =
-                core::mem::transmute_copy(&bitmask);
-
             // Compute the regular mask
             Self::from_int_unchecked(intrinsics::simd_select_bitmask(
-                bitmask,
+                bytes,
                 Self::splat(true).to_int(),
                 Self::splat(false).to_int(),
             ))
@@ -197,41 +207,40 @@ pub fn convert<U>(self) -> Mask<U, N>
     }
 
     #[inline]
-    pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
-    where
-        super::Mask<T, N>: ToBitMask<BitMask = U>,
-    {
-        // Safety: U is required to be the appropriate bitmask type
-        let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
+    pub(crate) fn to_bitmask_integer(self) -> u64 {
+        let resized = self.to_int().extend::<64>(T::FALSE);
+
+        // SAFETY: `resized` is an integer vector with length 64
+        let bitmask: u64 = unsafe { intrinsics::simd_bitmask(resized) };
 
         // LLVM assumes bit order should match endianness
         if cfg!(target_endian = "big") {
-            bitmask.reverse_bits(N)
+            bitmask.reverse_bits()
         } else {
             bitmask
         }
     }
 
     #[inline]
-    pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
-    where
-        super::Mask<T, N>: ToBitMask<BitMask = U>,
-    {
+    pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
         // LLVM assumes bit order should match endianness
         let bitmask = if cfg!(target_endian = "big") {
-            bitmask.reverse_bits(N)
+            bitmask.reverse_bits()
         } else {
             bitmask
         };
 
-        // Safety: U is required to be the appropriate bitmask type
-        unsafe {
-            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+        // SAFETY: `mask` is the correct bitmask type for a u64 bitmask
+        let mask: Simd<T, 64> = unsafe {
+            intrinsics::simd_select_bitmask(
                 bitmask,
-                Self::splat(true).to_int(),
-                Self::splat(false).to_int(),
-            ))
-        }
+                Simd::<T, 64>::splat(T::TRUE),
+                Simd::<T, 64>::splat(T::FALSE),
+            )
+        };
+
+        // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE`
+        unsafe { Self::from_int_unchecked(mask.extend::<N>(T::FALSE)) }
     }
 
     #[inline]
diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
deleted file mode 100644
index 06f09c65aca..00000000000
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-use super::{mask_impl, Mask, MaskElement};
-use crate::simd::{LaneCount, SupportedLaneCount};
-use core::borrow::{Borrow, BorrowMut};
-
-mod sealed {
-    pub trait Sealed {}
-}
-pub use sealed::Sealed;
-
-impl<T, const N: usize> Sealed for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-}
-
-/// Converts masks to and from integer bitmasks.
-///
-/// Each bit of the bitmask corresponds to a mask element, starting with the LSB.
-pub trait ToBitMask: Sealed {
-    /// The integer bitmask type.
-    type BitMask;
-
-    /// Converts a mask to a bitmask.
-    fn to_bitmask(self) -> Self::BitMask;
-
-    /// Converts a bitmask to a mask.
-    fn from_bitmask(bitmask: Self::BitMask) -> Self;
-}
-
-/// Converts masks to and from byte array bitmasks.
-///
-/// Each bit of the bitmask corresponds to a mask element, starting with the LSB of the first byte.
-pub trait ToBitMaskArray: Sealed {
-    /// The bitmask array.
-    type BitMaskArray: Copy
-        + Unpin
-        + Send
-        + Sync
-        + AsRef<[u8]>
-        + AsMut<[u8]>
-        + Borrow<[u8]>
-        + BorrowMut<[u8]>
-        + 'static;
-
-    /// Converts a mask to a bitmask.
-    fn to_bitmask_array(self) -> Self::BitMaskArray;
-
-    /// Converts a bitmask to a mask.
-    fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self;
-}
-
-macro_rules! impl_integer {
-    { $(impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
-        $(
-        impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
-            type BitMask = $int;
-
-            #[inline]
-            fn to_bitmask(self) -> $int {
-                self.0.to_bitmask_integer()
-            }
-
-            #[inline]
-            fn from_bitmask(bitmask: $int) -> Self {
-                Self(mask_impl::Mask::from_bitmask_integer(bitmask))
-            }
-        }
-        )*
-    }
-}
-
-macro_rules! impl_array {
-    { $(impl ToBitMaskArray<Bytes=$int:literal> for Mask<_, $lanes:literal>)* } => {
-        $(
-        impl<T: MaskElement> ToBitMaskArray for Mask<T, $lanes> {
-            type BitMaskArray = [u8; $int];
-
-            #[inline]
-            fn to_bitmask_array(self) -> Self::BitMaskArray {
-                self.0.to_bitmask_array()
-            }
-
-            #[inline]
-            fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self {
-                Self(mask_impl::Mask::from_bitmask_array(bitmask))
-            }
-        }
-        )*
-    }
-}
-
-impl_integer! {
-    impl ToBitMask<BitMask=u8> for Mask<_, 1>
-    impl ToBitMask<BitMask=u8> for Mask<_, 2>
-    impl ToBitMask<BitMask=u8> for Mask<_, 4>
-    impl ToBitMask<BitMask=u8> for Mask<_, 8>
-    impl ToBitMask<BitMask=u16> for Mask<_, 16>
-    impl ToBitMask<BitMask=u32> for Mask<_, 32>
-    impl ToBitMask<BitMask=u64> for Mask<_, 64>
-}
-
-impl_array! {
-    impl ToBitMaskArray<Bytes=1> for Mask<_, 1>
-    impl ToBitMaskArray<Bytes=1> for Mask<_, 2>
-    impl ToBitMaskArray<Bytes=1> for Mask<_, 4>
-    impl ToBitMaskArray<Bytes=1> for Mask<_, 8>
-    impl ToBitMaskArray<Bytes=2> for Mask<_, 16>
-    impl ToBitMaskArray<Bytes=4> for Mask<_, 32>
-    impl ToBitMaskArray<Bytes=8> for Mask<_, 64>
-}
diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index 6af882c0a0e..e5b3d4444d8 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -349,4 +349,39 @@ impl<const N: usize> Swizzle<N> for Odd {
             Odd::concat_swizzle(self, other),
         )
     }
+
+    /// Extend a vector.
+    ///
+    /// Extends the length of a vector, setting the new elements to `value`.
+    /// If `M` < `N`, truncates the vector to the first `M` elements.
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::u32x4;
+    /// let x = u32x4::from_array([0, 1, 2, 3]);
+    /// assert_eq!(x.extend::<8>(9).to_array(), [0, 1, 2, 3, 9, 9, 9, 9]);
+    /// assert_eq!(x.extend::<2>(9).to_array(), [0, 1]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn extend<const M: usize>(self, value: T) -> Simd<T, M>
+    where
+        LaneCount<M>: SupportedLaneCount,
+    {
+        struct Extend<const N: usize>;
+        impl<const N: usize, const M: usize> Swizzle<M> for Extend<N> {
+            const INDEX: [usize; M] = const {
+                let mut index = [0; M];
+                let mut i = 0;
+                while i < M {
+                    index[i] = if i < N { i } else { N };
+                    i += 1;
+                }
+                index
+            };
+        }
+        Extend::<N>::concat_swizzle(self, Simd::splat(value))
+    }
 }
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 7c1d4c7dd3f..92ee53b3e55 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -13,7 +13,7 @@ mod $type {
             #[cfg(target_arch = "wasm32")]
             use wasm_bindgen_test::*;
 
-            use core_simd::simd::Mask;
+            use core_simd::simd::{Mask, Simd};
 
             #[test]
             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
@@ -72,7 +72,6 @@ fn roundtrip_int_conversion() {
 
             #[test]
             fn roundtrip_bitmask_conversion() {
-                use core_simd::simd::ToBitMask;
                 let values = [
                     true, false, false, true, false, false, true, false,
                     true, true, false, false, false, false, false, true,
@@ -85,8 +84,6 @@ fn roundtrip_bitmask_conversion() {
 
             #[test]
             fn roundtrip_bitmask_conversion_short() {
-                use core_simd::simd::ToBitMask;
-
                 let values = [
                     false, false, false, true,
                 ];
@@ -126,16 +123,19 @@ fn cast_impl<T: core_simd::simd::MaskElement>()
             }
 
             #[test]
-            fn roundtrip_bitmask_array_conversion() {
-                use core_simd::simd::ToBitMaskArray;
+            fn roundtrip_bitmask_vector_conversion() {
                 let values = [
                     true, false, false, true, false, false, true, false,
                     true, true, false, false, false, false, false, true,
                 ];
                 let mask = Mask::<$type, 16>::from_array(values);
-                let bitmask = mask.to_bitmask_array();
-                assert_eq!(bitmask, [0b01001001, 0b10000011]);
-                assert_eq!(Mask::<$type, 16>::from_bitmask_array(bitmask), mask);
+                let bitmask = mask.to_bitmask_vector();
+                if core::mem::size_of::<$type>() == 1 {
+                    assert_eq!(bitmask, Simd::from_array([0b01001001 as _, 0b10000011 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]));
+                } else {
+                    assert_eq!(bitmask, Simd::from_array([0b1000001101001001 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]));
+                }
+                assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
             }
         }
     }

From 082e3c8a5da8146b4e3d382d4f84a8a6847dd783 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 17 Nov 2023 10:15:12 -0500
Subject: [PATCH 55/59] Workaround simd_bitmask limitations

---
 crates/core_simd/src/masks/full_masks.rs | 90 +++++++++++++++++++++---
 crates/core_simd/src/swizzle.rs          | 16 ++---
 crates/core_simd/tests/masks.rs          |  9 +--
 3 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 73a0d898700..a529490f3a2 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -207,40 +207,108 @@ pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self {
     }
 
     #[inline]
-    pub(crate) fn to_bitmask_integer(self) -> u64 {
-        let resized = self.to_int().extend::<64>(T::FALSE);
+    unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
+    where
+        LaneCount<M>: SupportedLaneCount,
+    {
+        let resized = self.to_int().resize::<M>(T::FALSE);
 
-        // SAFETY: `resized` is an integer vector with length 64
-        let bitmask: u64 = unsafe { intrinsics::simd_bitmask(resized) };
+        // Safety: `resized` is an integer vector with length M, which must match T
+        let bitmask: U = unsafe { intrinsics::simd_bitmask(resized) };
 
         // LLVM assumes bit order should match endianness
         if cfg!(target_endian = "big") {
-            bitmask.reverse_bits()
+            bitmask.reverse_bits(M)
         } else {
             bitmask
         }
     }
 
     #[inline]
-    pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
+    unsafe fn from_bitmask_impl<U: ReverseBits, const M: usize>(bitmask: U) -> Self
+    where
+        LaneCount<M>: SupportedLaneCount,
+    {
         // LLVM assumes bit order should match endianness
         let bitmask = if cfg!(target_endian = "big") {
-            bitmask.reverse_bits()
+            bitmask.reverse_bits(M)
         } else {
             bitmask
         };
 
         // SAFETY: `mask` is the correct bitmask type for a u64 bitmask
-        let mask: Simd<T, 64> = unsafe {
+        let mask: Simd<T, M> = unsafe {
             intrinsics::simd_select_bitmask(
                 bitmask,
-                Simd::<T, 64>::splat(T::TRUE),
-                Simd::<T, 64>::splat(T::FALSE),
+                Simd::<T, M>::splat(T::TRUE),
+                Simd::<T, M>::splat(T::FALSE),
             )
         };
 
         // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE`
-        unsafe { Self::from_int_unchecked(mask.extend::<N>(T::FALSE)) }
+        unsafe { Self::from_int_unchecked(mask.resize::<N>(T::FALSE)) }
+    }
+
+    #[inline]
+    pub(crate) fn to_bitmask_integer(self) -> u64 {
+        // TODO modify simd_bitmask to zero-extend output, making this unnecessary
+        macro_rules! bitmask {
+            { $($ty:ty: $($len:literal),*;)* } => {
+                match N {
+                    $($(
+                    // Safety: bitmask matches length
+                    $len => unsafe { self.to_bitmask_impl::<$ty, $len>() as u64 },
+                    )*)*
+                    // Safety: bitmask matches length
+                    _ => unsafe { self.to_bitmask_impl::<u64, 64>() },
+                }
+            }
+        }
+        #[cfg(all_lane_counts)]
+        bitmask! {
+            u8: 1, 2, 3, 4, 5, 6, 7, 8;
+            u16: 9, 10, 11, 12, 13, 14, 15, 16;
+            u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32;
+            u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64;
+        }
+        #[cfg(not(all_lane_counts))]
+        bitmask! {
+            u8: 1, 2, 4, 8;
+            u16: 16;
+            u32: 32;
+            u64: 64;
+        }
+    }
+
+    #[inline]
+    pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
+        // TODO modify simd_bitmask_select to truncate input, making this unnecessary
+        macro_rules! bitmask {
+            { $($ty:ty: $($len:literal),*;)* } => {
+                match N {
+                    $($(
+                    // Safety: bitmask matches length
+                    $len => unsafe { Self::from_bitmask_impl::<$ty, $len>(bitmask as $ty) },
+                    )*)*
+                    // Safety: bitmask matches length
+                    _ => unsafe { Self::from_bitmask_impl::<u64, 64>(bitmask) },
+                }
+            }
+        }
+        #[cfg(all_lane_counts)]
+        bitmask! {
+            u8: 1, 2, 3, 4, 5, 6, 7, 8;
+            u16: 9, 10, 11, 12, 13, 14, 15, 16;
+            u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32;
+            u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64;
+        }
+        #[cfg(not(all_lane_counts))]
+        bitmask! {
+            u8: 1, 2, 4, 8;
+            u16: 16;
+            u32: 32;
+            u64: 64;
+        }
     }
 
     #[inline]
diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index e5b3d4444d8..ec8548d5574 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -350,9 +350,9 @@ impl<const N: usize> Swizzle<N> for Odd {
         )
     }
 
-    /// Extend a vector.
+    /// Resize a vector.
     ///
-    /// Extends the length of a vector, setting the new elements to `value`.
+    /// If `M` > `N`, extends the length of a vector, setting the new elements to `value`.
     /// If `M` < `N`, truncates the vector to the first `M` elements.
     ///
     /// ```
@@ -361,17 +361,17 @@ impl<const N: usize> Swizzle<N> for Odd {
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
     /// # use simd::u32x4;
     /// let x = u32x4::from_array([0, 1, 2, 3]);
-    /// assert_eq!(x.extend::<8>(9).to_array(), [0, 1, 2, 3, 9, 9, 9, 9]);
-    /// assert_eq!(x.extend::<2>(9).to_array(), [0, 1]);
+    /// assert_eq!(x.resize::<8>(9).to_array(), [0, 1, 2, 3, 9, 9, 9, 9]);
+    /// assert_eq!(x.resize::<2>(9).to_array(), [0, 1]);
     /// ```
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn extend<const M: usize>(self, value: T) -> Simd<T, M>
+    pub fn resize<const M: usize>(self, value: T) -> Simd<T, M>
     where
         LaneCount<M>: SupportedLaneCount,
     {
-        struct Extend<const N: usize>;
-        impl<const N: usize, const M: usize> Swizzle<M> for Extend<N> {
+        struct Resize<const N: usize>;
+        impl<const N: usize, const M: usize> Swizzle<M> for Resize<N> {
             const INDEX: [usize; M] = const {
                 let mut index = [0; M];
                 let mut i = 0;
@@ -382,6 +382,6 @@ impl<const N: usize, const M: usize> Swizzle<M> for Extend<N> {
                 index
             };
         }
-        Extend::<N>::concat_swizzle(self, Simd::splat(value))
+        Resize::<N>::concat_swizzle(self, Simd::splat(value))
     }
 }
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 92ee53b3e55..00fc2a24e27 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -13,7 +13,7 @@ mod $type {
             #[cfg(target_arch = "wasm32")]
             use wasm_bindgen_test::*;
 
-            use core_simd::simd::{Mask, Simd};
+            use core_simd::simd::Mask;
 
             #[test]
             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
@@ -124,17 +124,14 @@ fn cast_impl<T: core_simd::simd::MaskElement>()
 
             #[test]
             fn roundtrip_bitmask_vector_conversion() {
+                use core_simd::simd::ToBytes;
                 let values = [
                     true, false, false, true, false, false, true, false,
                     true, true, false, false, false, false, false, true,
                 ];
                 let mask = Mask::<$type, 16>::from_array(values);
                 let bitmask = mask.to_bitmask_vector();
-                if core::mem::size_of::<$type>() == 1 {
-                    assert_eq!(bitmask, Simd::from_array([0b01001001 as _, 0b10000011 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]));
-                } else {
-                    assert_eq!(bitmask, Simd::from_array([0b1000001101001001 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]));
-                }
+                assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]);
                 assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
             }
         }

From 0ad68db91a3149885bc62ae11d2d83e7d401fc25 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 17 Nov 2023 19:17:03 -0500
Subject: [PATCH 56/59] Use u8xN for bitmasks

---
 crates/core_simd/src/masks.rs            |  4 +--
 crates/core_simd/src/masks/bitmask.rs    | 39 ++++--------------------
 crates/core_simd/src/masks/full_masks.rs | 27 ++++------------
 3 files changed, 14 insertions(+), 56 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 5c0ae303162..63731342423 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -286,7 +286,7 @@ pub fn from_bitmask(bitmask: u64) -> Self {
     /// The remaining bits are unset.
     #[inline]
     #[must_use = "method returns a new integer and does not mutate the original value"]
-    pub fn to_bitmask_vector(self) -> Simd<T, N> {
+    pub fn to_bitmask_vector(self) -> Simd<u8, N> {
         self.0.to_bitmask_vector()
     }
 
@@ -295,7 +295,7 @@ pub fn to_bitmask_vector(self) -> Simd<T, N> {
     /// For each bit, if it is set, the corresponding element in the mask is set to `true`.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self {
+    pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
         Self(mask_impl::Mask::from_bitmask_vector(bitmask))
     }
 }
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index 21d9e49a1b5..6ddff07fea2 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -121,45 +121,18 @@ pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
 
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_bitmask_vector(self) -> Simd<T, N> {
-        let mut bitmask = Self::splat(false).to_int();
-
-        assert!(
-            core::mem::size_of::<Simd<T, N>>()
-                >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
-        );
-
-        // Safety: the bitmask vector is big enough to hold the bitmask
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0.as_ref().as_ptr(),
-                bitmask.as_mut_array().as_mut_ptr() as _,
-                self.0.as_ref().len(),
-            );
-        }
-
+    pub fn to_bitmask_vector(self) -> Simd<u8, N> {
+        let mut bitmask = Simd::splat(0);
+        bitmask.as_mut_array()[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
         bitmask
     }
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self {
+    pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
         let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
-
-        assert!(
-            core::mem::size_of::<Simd<T, N>>()
-                >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
-        );
-
-        // Safety: the bitmask vector is big enough to hold the bitmask
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                bitmask.as_array().as_ptr() as _,
-                bytes.as_mut().as_mut_ptr(),
-                bytes.as_ref().len(),
-            );
-        }
-
+        let len = bytes.as_ref().len();
+        bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
         Self(bytes, PhantomData)
     }
 
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index a529490f3a2..0d17e90c128 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -143,8 +143,8 @@ pub fn convert<U>(self) -> Mask<U, N>
 
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_bitmask_vector(self) -> Simd<T, N> {
-        let mut bitmask = Self::splat(false).to_int();
+    pub fn to_bitmask_vector(self) -> Simd<u8, N> {
+        let mut bitmask = Simd::splat(0);
 
         // Safety: Bytes is the right size array
         unsafe {
@@ -159,15 +159,7 @@ pub fn to_bitmask_vector(self) -> Simd<T, N> {
                 }
             }
 
-            assert!(
-                core::mem::size_of::<Simd<T, N>>()
-                    >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
-            );
-            core::ptr::copy_nonoverlapping(
-                bytes.as_ref().as_ptr(),
-                bitmask.as_mut_array().as_mut_ptr() as _,
-                bytes.as_ref().len(),
-            );
+            bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref());
         }
 
         bitmask
@@ -175,20 +167,13 @@ pub fn to_bitmask_vector(self) -> Simd<T, N> {
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self {
+    pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
         let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
 
         // Safety: Bytes is the right size array
         unsafe {
-            assert!(
-                core::mem::size_of::<Simd<T, N>>()
-                    >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>()
-            );
-            core::ptr::copy_nonoverlapping(
-                bitmask.as_array().as_ptr() as _,
-                bytes.as_mut().as_mut_ptr(),
-                bytes.as_mut().len(),
-            );
+            let len = bytes.as_ref().len();
+            bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
 
             // LLVM assumes bit order should match endianness
             if cfg!(target_endian = "big") {

From 62bbb360add75e3ae99b8b6745469671b049269b Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 17 Nov 2023 18:16:24 -0500
Subject: [PATCH 57/59] Add first_set

---
 crates/core_simd/src/masks.rs | 88 +++++++++++++++++++++++++++++++----
 1 file changed, 80 insertions(+), 8 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 63731342423..7af4517226a 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -13,7 +13,7 @@
 mod mask_impl;
 
 use crate::simd::{
-    cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount,
+    cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount,
 };
 use core::cmp::Ordering;
 use core::{fmt, mem};
@@ -35,6 +35,10 @@ fn valid<const N: usize>(values: Simd<Self, N>) -> bool
 
         fn eq(self, other: Self) -> bool;
 
+        fn as_usize(self) -> usize;
+
+        type Unsigned: SimdElement;
+
         const TRUE: Self;
 
         const FALSE: Self;
@@ -46,10 +50,10 @@ fn valid<const N: usize>(values: Simd<Self, N>) -> bool
 ///
 /// # Safety
 /// Type must be a signed integer.
-pub unsafe trait MaskElement: SimdElement + Sealed {}
+pub unsafe trait MaskElement: SimdElement<Mask = Self> + SimdCast + Sealed {}
 
 macro_rules! impl_element {
-    { $ty:ty } => {
+    { $ty:ty, $unsigned:ty } => {
         impl Sealed for $ty {
             #[inline]
             fn valid<const N: usize>(value: Simd<Self, N>) -> bool
@@ -62,6 +66,13 @@ fn valid<const N: usize>(value: Simd<Self, N>) -> bool
             #[inline]
             fn eq(self, other: Self) -> bool { self == other }
 
+            #[inline]
+            fn as_usize(self) -> usize {
+                self as usize
+            }
+
+            type Unsigned = $unsigned;
+
             const TRUE: Self = -1;
             const FALSE: Self = 0;
         }
@@ -71,11 +82,11 @@ unsafe impl MaskElement for $ty {}
     }
 }
 
-impl_element! { i8 }
-impl_element! { i16 }
-impl_element! { i32 }
-impl_element! { i64 }
-impl_element! { isize }
+impl_element! { i8, u8 }
+impl_element! { i16, u16 }
+impl_element! { i32, u32 }
+impl_element! { i64, u64 }
+impl_element! { isize, usize }
 
 /// A SIMD vector mask for `N` elements of width specified by `Element`.
 ///
@@ -298,6 +309,67 @@ pub fn to_bitmask_vector(self) -> Simd<u8, N> {
     pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
         Self(mask_impl::Mask::from_bitmask_vector(bitmask))
     }
+
+    /// Find the index of the first set element.
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::mask32x8;
+    /// assert_eq!(mask32x8::splat(false).first_set(), None);
+    /// assert_eq!(mask32x8::splat(true).first_set(), Some(0));
+    ///
+    /// let mask = mask32x8::from_array([false, true, false, false, true, false, false, true]);
+    /// assert_eq!(mask.first_set(), Some(1));
+    /// ```
+    #[inline]
+    #[must_use = "method returns the index and does not mutate the original value"]
+    pub fn first_set(self) -> Option<usize> {
+        // If bitmasks are efficient, using them is better
+        if cfg!(target_feature = "sse") && N <= 64 {
+            let tz = self.to_bitmask().trailing_zeros();
+            return if tz == 64 { None } else { Some(tz as usize) };
+        }
+
+        // To find the first set index:
+        // * create a vector 0..N
+        // * replace unset mask elements in that vector with -1
+        // * perform _unsigned_ reduce-min
+        // * check if the result is -1 or an index
+
+        let index = Simd::from_array(
+            const {
+                let mut index = [0; N];
+                let mut i = 0;
+                while i < N {
+                    index[i] = i;
+                    i += 1;
+                }
+                index
+            },
+        );
+
+        // Safety: the input and output are integer vectors
+        let index: Simd<T, N> = unsafe { intrinsics::simd_cast(index) };
+
+        let masked_index = self.select(index, Self::splat(true).to_int());
+
+        // Safety: the input and output are integer vectors
+        let masked_index: Simd<T::Unsigned, N> = unsafe { intrinsics::simd_cast(masked_index) };
+
+        // Safety: the input is an integer vector
+        let min_index: T::Unsigned = unsafe { intrinsics::simd_reduce_min(masked_index) };
+
+        // Safety: the return value is the unsigned version of T
+        let min_index: T = unsafe { core::mem::transmute_copy(&min_index) };
+
+        if min_index.eq(T::TRUE) {
+            None
+        } else {
+            Some(min_index.as_usize())
+        }
+    }
 }
 
 // vector/array conversion

From 64ea0884efbae271c9b0d1e4364bab9222f54d67 Mon Sep 17 00:00:00 2001
From: cui fliter <imcusg@gmail.com>
Date: Sun, 16 Jul 2023 00:37:30 +0800
Subject: [PATCH 58/59] remove repetitive words

Signed-off-by: cui fliter <imcusg@gmail.com>
---
 crates/core_simd/examples/dot_product.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs
index e5815888bb7..f047010a65c 100644
--- a/crates/core_simd/examples/dot_product.rs
+++ b/crates/core_simd/examples/dot_product.rs
@@ -130,7 +130,7 @@ pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 {
 }
 
 // This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that.
-// Notice the the use of `mul_add`, which can do a multiply and an add operation ber iteration.
+// Notice the use of `mul_add`, which can do a multiply and an add operation ber iteration.
 pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 {
     a.array_chunks::<4>()
         .map(|&a| f32x4::from_array(a))

From 5739caae279262440c28321845bfbf286e6dd1c1 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 19 Nov 2023 00:56:00 -0500
Subject: [PATCH 59/59] Follow-up fixes for to_bitmask

---
 crates/core_simd/src/masks.rs            | 23 ++++++++
 crates/core_simd/src/masks/full_masks.rs | 74 ++++++++----------------
 2 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 7af4517226a..0623d2bf3d1 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -295,6 +295,16 @@ pub fn from_bitmask(bitmask: u64) -> Self {
     ///
     /// Each bit is set if the corresponding element in the mask is `true`.
     /// The remaining bits are unset.
+    ///
+    /// The bits are packed into the first N bits of the vector:
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::mask32x8;
+    /// let mask = mask32x8::from_array([true, false, true, false, false, false, true, false]);
+    /// assert_eq!(mask.to_bitmask_vector()[0], 0b01000101);
+    /// ```
     #[inline]
     #[must_use = "method returns a new integer and does not mutate the original value"]
     pub fn to_bitmask_vector(self) -> Simd<u8, N> {
@@ -304,6 +314,19 @@ pub fn to_bitmask_vector(self) -> Simd<u8, N> {
     /// Create a mask from a bitmask vector.
     ///
     /// For each bit, if it is set, the corresponding element in the mask is set to `true`.
+    ///
+    /// The bits are packed into the first N bits of the vector:
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{mask32x8, u8x8};
+    /// let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]);
+    /// assert_eq!(
+    ///     mask32x8::from_bitmask_vector(bitmask),
+    ///     mask32x8::from_array([true, false, true, false, false, false, true, false]),
+    /// );
+    /// ```
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 0d17e90c128..63964f455e0 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -237,62 +237,36 @@ unsafe fn from_bitmask_impl<U: ReverseBits, const M: usize>(bitmask: U) -> Self
     #[inline]
     pub(crate) fn to_bitmask_integer(self) -> u64 {
         // TODO modify simd_bitmask to zero-extend output, making this unnecessary
-        macro_rules! bitmask {
-            { $($ty:ty: $($len:literal),*;)* } => {
-                match N {
-                    $($(
-                    // Safety: bitmask matches length
-                    $len => unsafe { self.to_bitmask_impl::<$ty, $len>() as u64 },
-                    )*)*
-                    // Safety: bitmask matches length
-                    _ => unsafe { self.to_bitmask_impl::<u64, 64>() },
-                }
-            }
-        }
-        #[cfg(all_lane_counts)]
-        bitmask! {
-            u8: 1, 2, 3, 4, 5, 6, 7, 8;
-            u16: 9, 10, 11, 12, 13, 14, 15, 16;
-            u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32;
-            u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64;
-        }
-        #[cfg(not(all_lane_counts))]
-        bitmask! {
-            u8: 1, 2, 4, 8;
-            u16: 16;
-            u32: 32;
-            u64: 64;
+        if N <= 8 {
+            // Safety: bitmask matches length
+            unsafe { self.to_bitmask_impl::<u8, 8>() as u64 }
+        } else if N <= 16 {
+            // Safety: bitmask matches length
+            unsafe { self.to_bitmask_impl::<u16, 16>() as u64 }
+        } else if N <= 32 {
+            // Safety: bitmask matches length
+            unsafe { self.to_bitmask_impl::<u32, 32>() as u64 }
+        } else {
+            // Safety: bitmask matches length
+            unsafe { self.to_bitmask_impl::<u64, 64>() }
         }
     }
 
     #[inline]
     pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
         // TODO modify simd_bitmask_select to truncate input, making this unnecessary
-        macro_rules! bitmask {
-            { $($ty:ty: $($len:literal),*;)* } => {
-                match N {
-                    $($(
-                    // Safety: bitmask matches length
-                    $len => unsafe { Self::from_bitmask_impl::<$ty, $len>(bitmask as $ty) },
-                    )*)*
-                    // Safety: bitmask matches length
-                    _ => unsafe { Self::from_bitmask_impl::<u64, 64>(bitmask) },
-                }
-            }
-        }
-        #[cfg(all_lane_counts)]
-        bitmask! {
-            u8: 1, 2, 3, 4, 5, 6, 7, 8;
-            u16: 9, 10, 11, 12, 13, 14, 15, 16;
-            u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32;
-            u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64;
-        }
-        #[cfg(not(all_lane_counts))]
-        bitmask! {
-            u8: 1, 2, 4, 8;
-            u16: 16;
-            u32: 32;
-            u64: 64;
+        if N <= 8 {
+            // Safety: bitmask matches length
+            unsafe { Self::from_bitmask_impl::<u8, 8>(bitmask as u8) }
+        } else if N <= 16 {
+            // Safety: bitmask matches length
+            unsafe { Self::from_bitmask_impl::<u16, 16>(bitmask as u16) }
+        } else if N <= 32 {
+            // Safety: bitmask matches length
+            unsafe { Self::from_bitmask_impl::<u32, 32>(bitmask as u32) }
+        } else {
+            // Safety: bitmask matches length
+            unsafe { Self::from_bitmask_impl::<u64, 64>(bitmask) }
         }
     }