From 11c43c0c160539b6d040539b668e0142769537a5 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 21 Jul 2023 15:33:09 -0400 Subject: [PATCH 01/59] Fix is_subnormal on architectures that flush subnormals to zero --- crates/core_simd/src/elements/float.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index 501c1c5ddd3..d700011ff9c 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -336,7 +336,10 @@ fn is_finite(self) -> Self::Mask { #[inline] fn is_subnormal(self) -> Self::Mask { - self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0)) + // On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero, + // so this comparison must be done with integers. + let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits()); + not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0)) } #[inline] From dc0ba7836528b7d8720b868e331a378ceaf8fa95 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 21 Jul 2023 15:44:15 -0400 Subject: [PATCH 02/59] Don't require strict equality when subnormals are flushed --- crates/test_helpers/Cargo.toml | 7 +++---- crates/test_helpers/src/biteq.rs | 2 ++ crates/test_helpers/src/lib.rs | 13 +++++++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index 1d2bc8b519a..bd481db6bac 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -4,10 +4,9 @@ version = "0.1.0" edition = "2021" publish = false -[dependencies.proptest] -version = "0.10" -default-features = false -features = ["alloc"] +[dependencies] +float_eq = "1.0" +proptest = { version = "0.10", default-features = false, features = ["alloc"] } [features] all_lane_counts = [] diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs index 7d91260d838..515eaf1c048 100644 --- a/crates/test_helpers/src/biteq.rs +++ b/crates/test_helpers/src/biteq.rs @@ -40,6 +40,8 @@ impl BitEq for $type { fn biteq(&self, other: &Self) -> bool { if self.is_nan() && other.is_nan() { true // exact nan bits don't matter + } else if crate::flush_subnormals::() { + self.to_bits() == other.to_bits() || float_eq::float_eq!(self, other, abs <= 2. * <$type>::EPSILON) } else { self.to_bits() == other.to_bits() } diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index b26cdc311a2..1b98bccf706 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -6,6 +6,19 @@ #[macro_use] pub mod biteq; +/// Indicates if subnormal floats are flushed to zero. +pub fn flush_subnormals() -> bool { + let is_f32 = core::mem::size_of::() == 4; + let ppc_flush = is_f32 + && cfg!(all( + target_arch = "powerpc64", + target_endian = "big", + not(target_feature = "vsx") + )); + let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon")); + ppc_flush || arm_flush +} + /// Specifies the default strategy for testing a type. /// /// This strategy should be what "makes sense" to test. From 38c7ba09dd6c81ff76477a7e54b561c07f1d1db0 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 00:41:21 -0400 Subject: [PATCH 03/59] Handle subnormal numbers exactly --- crates/core_simd/tests/ops_macros.rs | 23 ++++-- crates/test_helpers/Cargo.toml | 1 - crates/test_helpers/src/biteq.rs | 34 +++++++- crates/test_helpers/src/lib.rs | 108 ++++++++++++++++++++++---- crates/test_helpers/src/subnormals.rs | 39 ++++++++++ 5 files changed, 181 insertions(+), 24 deletions(-) create mode 100644 crates/test_helpers/src/subnormals.rs diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 3a02f3f01e1..1f5d9488ad0 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test { { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => { test_helpers::test_lanes! { fn $fn() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( & as core::ops::$trait>::$fn, &$scalar_fn, &|_| true, @@ -31,7 +31,7 @@ mod $fn { test_helpers::test_lanes! { fn normal() { - test_helpers::test_binary_elementwise( + test_helpers::test_binary_elementwise_flush_subnormals( & as core::ops::$trait>::$fn, &$scalar_fn, &|_, _| true, @@ -39,7 +39,7 @@ fn normal() { } fn assign() { - test_helpers::test_binary_elementwise( + test_helpers::test_binary_elementwise_flush_subnormals( &|mut a, b| { as core::ops::$trait_assign>::$fn_assign(&mut a, b); a }, &$scalar_fn, &|_, _| true, @@ -433,7 +433,7 @@ fn recip() { } fn to_degrees() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::to_degrees, &Scalar::to_degrees, &|_| true, @@ -441,7 +441,7 @@ fn to_degrees() { } fn to_radians() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::to_radians, &Scalar::to_radians, &|_| true, @@ -512,6 +512,7 @@ fn simd_max() { fn simd_clamp() { test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { + use test_helpers::subnormals::FlushSubnormals; for (min, max) in min.iter_mut().zip(max.iter_mut()) { if max < min { core::mem::swap(min, max); @@ -528,8 +529,18 @@ fn simd_clamp() { for i in 0..LANES { result_scalar[i] = value[i].clamp(min[i], max[i]); } + let mut result_scalar_flush = [Scalar::default(); LANES]; + for i in 0..LANES { + result_scalar_flush[i] = value[i]; + if FlushSubnormals::flush(value[i]) < FlushSubnormals::flush(min[i]) { + result_scalar_flush[i] = min[i]; + } + if FlushSubnormals::flush(value[i]) > FlushSubnormals::flush(max[i]) { + result_scalar_flush[i] = max[i]; + } + } let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array(); - test_helpers::prop_assert_biteq!(result_scalar, result_vector); + test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush); Ok(()) }) } diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index bd481db6bac..23dae7c9338 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -5,7 +5,6 @@ edition = "2021" publish = false [dependencies] -float_eq = "1.0" proptest = { version = "0.10", default-features = false, features = ["alloc"] } [features] diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs index 515eaf1c048..cbc20cda0d6 100644 --- a/crates/test_helpers/src/biteq.rs +++ b/crates/test_helpers/src/biteq.rs @@ -40,8 +40,6 @@ impl BitEq for $type { fn biteq(&self, other: &Self) -> bool { if self.is_nan() && other.is_nan() { true // exact nan bits don't matter - } else if crate::flush_subnormals::() { - self.to_bits() == other.to_bits() || float_eq::float_eq!(self, other, abs <= 2. * <$type>::EPSILON) } else { self.to_bits() == other.to_bits() } @@ -115,6 +113,27 @@ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { } } +#[doc(hidden)] +pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T); + +impl PartialEq> for BitEqWrapper<'_, T> { + fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool { + self.0.biteq(other.0) || self.0.biteq(other.1) + } +} + +impl core::fmt::Debug for BitEqEitherWrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.0.biteq(self.1) { + self.0.fmt(f) + } else { + self.0.fmt(f)?; + write!(f, " or ")?; + self.1.fmt(f) + } + } +} + #[macro_export] macro_rules! prop_assert_biteq { { $a:expr, $b:expr $(,)? } => { @@ -124,5 +143,14 @@ macro_rules! prop_assert_biteq { let b = $b; proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b)); } - } + }; + { $a:expr, $b:expr, $c:expr $(,)? } => { + { + use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper}; + let a = $a; + let b = $b; + let c = $c; + proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c)); + } + }; } diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 1b98bccf706..63308a2ca33 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -6,18 +6,8 @@ #[macro_use] pub mod biteq; -/// Indicates if subnormal floats are flushed to zero. -pub fn flush_subnormals() -> bool { - let is_f32 = core::mem::size_of::() == 4; - let ppc_flush = is_f32 - && cfg!(all( - target_arch = "powerpc64", - target_endian = "big", - not(target_feature = "vsx") - )); - let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon")); - ppc_flush || arm_flush -} +pub mod subnormals; +use subnormals::FlushSubnormals; /// Specifies the default strategy for testing a type. /// @@ -164,7 +154,6 @@ pub fn test_3< } /// Test a unary vector function against a unary scalar function, applied elementwise. -#[inline(never)] pub fn test_unary_elementwise( fv: &dyn Fn(Vector) -> VectorResult, fs: &dyn Fn(Scalar) -> ScalarResult, @@ -190,6 +179,48 @@ pub fn test_unary_elementwise( + fv: &dyn Fn(Vector) -> VectorResult, + fs: &dyn Fn(Scalar) -> ScalarResult, + check: &dyn Fn([Scalar; LANES]) -> bool, +) where + Scalar: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + let flush = |x: Scalar| FlushSubnormals::flush(fs(FlushSubnormals::flush(x))); + test_1(&|x: [Scalar; LANES]| { + proptest::prop_assume!(check(x)); + let result_v: [ScalarResult; LANES] = fv(x.into()).into(); + let result_s: [ScalarResult; LANES] = x + .iter() + .copied() + .map(fs) + .collect::>() + .try_into() + .unwrap(); + let result_sf: [ScalarResult; LANES] = x + .iter() + .copied() + .map(flush) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_biteq!(result_v, result_s, result_sf); + Ok(()) + }); +} + /// Test a unary vector function against a unary scalar function, applied elementwise. #[inline(never)] pub fn test_unary_mask_elementwise( @@ -217,7 +248,6 @@ pub fn test_unary_mask_elementwise( } /// Test a binary vector function against a binary scalar function, applied elementwise. -#[inline(never)] pub fn test_binary_elementwise< Scalar1, Scalar2, @@ -254,6 +284,56 @@ pub fn test_binary_elementwise< }); } +/// Test a binary vector function against a binary scalar function, applied elementwise. +/// +/// Where subnormals are flushed, use approximate equality. +pub fn test_binary_elementwise_flush_subnormals< + Scalar1, + Scalar2, + ScalarResult, + Vector1, + Vector2, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector1, Vector2) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, +) where + Scalar1: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + Scalar2: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + let flush = |x: Scalar1, y: Scalar2| { + FlushSubnormals::flush(fs(FlushSubnormals::flush(x), FlushSubnormals::flush(y))) + }; + test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_v: [ScalarResult; LANES] = fv(x.into(), y.into()).into(); + let result_s: [ScalarResult; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| fs(x, y)) + .collect::>() + .try_into() + .unwrap(); + let result_sf: [ScalarResult; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| flush(x, y)) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_biteq!(result_v, result_s, result_sf); + Ok(()) + }); +} + /// Test a binary vector-scalar function against a binary scalar function, applied elementwise. #[inline(never)] pub fn test_binary_scalar_rhs_elementwise< diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs new file mode 100644 index 00000000000..122304f96db --- /dev/null +++ b/crates/test_helpers/src/subnormals.rs @@ -0,0 +1,39 @@ +pub trait FlushSubnormals: Sized { + fn flush(self) -> Self { + self + } +} + +impl FlushSubnormals for *const T {} +impl FlushSubnormals for *mut T {} + +macro_rules! impl_float { + { $($ty:ty),* } => { + $( + impl FlushSubnormals for $ty { + fn flush(self) -> Self { + let is_f32 = core::mem::size_of::() == 4; + let ppc_flush = is_f32 && cfg!(all(target_arch = "powerpc64", target_endian = "big", not(target_feature = "vsx"))); + let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon")); + let flush = ppc_flush || arm_flush; + if flush && self.is_subnormal() { + <$ty>::copysign(0., self) + } else { + self + } + } + } + )* + } +} + +macro_rules! impl_else { + { $($ty:ty),* } => { + $( + impl FlushSubnormals for $ty {} + )* + } +} + +impl_float! { f32, f64 } +impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } From 8c89a7240c668b92cbd66d8afc1d939ae45141c4 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 00:44:48 -0400 Subject: [PATCH 04/59] Enable more cross tests --- .github/workflows/ci.yml | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1ff377fce34..5ae654bef3b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -171,36 +171,19 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false - # TODO: Sadly, we cant configure target-feature in a meaningful way - # because `cross` doesn't tell qemu to enable any non-default cpu - # features, nor does it give us a way to do so. - # - # Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch]. - # This is a lot more complex... but in practice it's likely that we can just - # snarf the docker config from around [here][1000-dockerfiles]. - # - # [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67 - # [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker matrix: target: - - i586-unknown-linux-gnu - # 32-bit arm has a few idiosyncracies like having subnormal flushing - # to zero on by default. Ideally we'd set - armv7-unknown-linux-gnueabihf - aarch64-unknown-linux-gnu - # Note: The issue above means neither of these mips targets will use - # MSA (mips simd) but MIPS uses a nonstandard binary representation - # for NaNs which makes it worth testing on despite that. + - powerpc-unknown-linux-gnu + - powerpc64-unknown-linux-gnu + - powerpc64le-unknown-linux-gnu + - riscv64gc-unknown-linux-gnu + # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing # - mips-unknown-linux-gnu # - mips64-unknown-linux-gnuabi64 - - riscv64gc-unknown-linux-gnu - # TODO this test works, but it appears to time out - # - powerpc-unknown-linux-gnu - # TODO this test is broken, but it appears to be a problem with QEMU, not us. - # - powerpc64le-unknown-linux-gnu - # TODO enable this once a new version of cross is released - # - powerpc64-unknown-linux-gnu + target_feature: ["", "+native"] steps: - uses: actions/checkout@v2 @@ -217,11 +200,14 @@ jobs: # being part of the tarball means we can't just use the download/latest # URL :( run: | - CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz + CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz mkdir -p "$HOME/.bin" curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin" echo "$HOME/.bin" >> $GITHUB_PATH + - name: Configure RUSTFLAGS + run: echo "-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV + - name: Test (debug) run: cross test --verbose --target=${{ matrix.target }} From 36c8bf363fa8f5afe6e56641ae7fd4ae65bb1deb Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 08:55:55 -0400 Subject: [PATCH 05/59] Improve cross tests --- .github/workflows/ci.yml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5ae654bef3b..7758409a92d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -167,7 +167,7 @@ jobs: RUSTFLAGS: ${{ matrix.rustflags }} cross-tests: - name: "${{ matrix.target }} (via cross)" + name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)" runs-on: ubuntu-latest strategy: fail-fast: false @@ -175,6 +175,7 @@ jobs: matrix: target: - armv7-unknown-linux-gnueabihf + - thumbv7neon-unknown-linux-gnueabihf - aarch64-unknown-linux-gnu - powerpc-unknown-linux-gnu - powerpc64-unknown-linux-gnu @@ -183,7 +184,11 @@ jobs: # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing # - mips-unknown-linux-gnu # - mips64-unknown-linux-gnuabi64 - target_feature: ["", "+native"] + target_feature: "default" + include: + - { target: powerpc64-unknown-linux-gnu, target_feature: "native" } + - { target: powerpc64le-unknown-linux-gnu, target_feature: "native" } + - { target: riscv64gc-unknown-linux-gnu, target_feature: "native" } steps: - uses: actions/checkout@v2 @@ -206,7 +211,18 @@ jobs: echo "$HOME/.bin" >> $GITHUB_PATH - name: Configure RUSTFLAGS - run: echo "-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV + shell: bash + run: | + case "${{ matrix.target_feature }}" in + default) + echo "RUSTFLAGS=" >> $GITHUB_ENV;; + native) + echo "RUSTFLAGS=-Ctarget-cpu=native" >> $GITHUB_ENV + ;; + *) + echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV + ;; + esac - name: Test (debug) run: cross test --verbose --target=${{ matrix.target }} From 40f04353d8fa40484a1df8ca15091b07413b5655 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 08:58:37 -0400 Subject: [PATCH 06/59] Fix workflow --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7758409a92d..73d38b41a38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -184,7 +184,7 @@ jobs: # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing # - mips-unknown-linux-gnu # - mips64-unknown-linux-gnuabi64 - target_feature: "default" + target_feature: [default] include: - { target: powerpc64-unknown-linux-gnu, target_feature: "native" } - { target: powerpc64le-unknown-linux-gnu, target_feature: "native" } From 415b50f6715c7967fc83c5a05dd6917a344f0b5f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 09:16:42 -0400 Subject: [PATCH 07/59] Flush subnormals in f32::fract test --- crates/core_simd/tests/round.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index aacf7bd3bcc..191c39e2370 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -43,7 +43,7 @@ fn trunc() { } fn fract() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::fract, &Scalar::fract, &|_| true, From 1948b02e40293c56e9498a5e553d29d48fcc3836 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 09:35:18 -0400 Subject: [PATCH 08/59] Don't use native cpu in cross --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 73d38b41a38..d36cad638af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -186,9 +186,9 @@ jobs: # - mips64-unknown-linux-gnuabi64 target_feature: [default] include: - - { target: powerpc64-unknown-linux-gnu, target_feature: "native" } - - { target: powerpc64le-unknown-linux-gnu, target_feature: "native" } - - { target: riscv64gc-unknown-linux-gnu, target_feature: "native" } + - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } + - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } + - { target: riscv64gc-unknown-linux-gnu, target_feature: "+zvl128b" } steps: - uses: actions/checkout@v2 From 94f20143a9d6f69f5472565c6694d05277112ad3 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 09:48:26 -0400 Subject: [PATCH 09/59] Enable v extension on riscv --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d36cad638af..89c355e1190 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -188,7 +188,7 @@ jobs: include: - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } - - { target: riscv64gc-unknown-linux-gnu, target_feature: "+zvl128b" } + - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } steps: - uses: actions/checkout@v2 From 5c6405ba8956d5a0252789b7152fb0c5b684b67f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 10:24:32 -0400 Subject: [PATCH 10/59] Disable riscv v extension --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 89c355e1190..c0429a1332c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -188,7 +188,8 @@ jobs: include: - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } - - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } + # We should test this, but cross currently can't run it + # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } steps: - uses: actions/checkout@v2 From 49e92a2918f9f3d95ad8a0060a2d63d6f7b52950 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 14:18:16 -0400 Subject: [PATCH 11/59] Improve powerpc subnormal flushing check --- .github/workflows/ci.yml | 4 +--- crates/test_helpers/src/subnormals.rs | 6 +++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0429a1332c..42172968341 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -186,6 +186,7 @@ jobs: # - mips64-unknown-linux-gnuabi64 target_feature: [default] include: + - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" } - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } # We should test this, but cross currently can't run it @@ -217,9 +218,6 @@ jobs: case "${{ matrix.target_feature }}" in default) echo "RUSTFLAGS=" >> $GITHUB_ENV;; - native) - echo "RUSTFLAGS=-Ctarget-cpu=native" >> $GITHUB_ENV - ;; *) echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV ;; diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs index 122304f96db..d46e8524116 100644 --- a/crates/test_helpers/src/subnormals.rs +++ b/crates/test_helpers/src/subnormals.rs @@ -13,7 +13,11 @@ macro_rules! impl_float { impl FlushSubnormals for $ty { fn flush(self) -> Self { let is_f32 = core::mem::size_of::() == 4; - let ppc_flush = is_f32 && cfg!(all(target_arch = "powerpc64", target_endian = "big", not(target_feature = "vsx"))); + let ppc_flush = is_f32 && cfg!(all( + any(target_arch = "powerpc", all(target_arch = "powerpc64", target_endian = "big")), + target_feature = "altivec", + not(target_feature = "vsx"), + )); let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon")); let flush = ppc_flush || arm_flush; if flush && self.is_subnormal() { From e73d02929abe6c24b1223a007333d7799e50bb57 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 16:02:00 -0400 Subject: [PATCH 12/59] Specify emulated CPUs --- .github/workflows/ci.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42172968341..42e2ba55c28 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -187,8 +187,8 @@ jobs: target_feature: [default] include: - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" } - - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } - - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } + - { target: powerpc64-unknown-linux-gnu, target_feature: "+power10-vector" } + - { target: powerpc64le-unknown-linux-gnu, target_feature: "+power10-vector" } # We should test this, but cross currently can't run it # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } @@ -212,6 +212,12 @@ jobs: curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin" echo "$HOME/.bin" >> $GITHUB_PATH + - name: Configure Emulated CPUs + run: | + echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV + echo "CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64 -cpu power10" >> $GITHUB_ENV + echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV + - name: Configure RUSTFLAGS shell: bash run: | From d07ce3cef9223c918ae2381fd46ff53ce0cf38b4 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 17:56:21 -0400 Subject: [PATCH 13/59] Account for possible qemu bug --- crates/test_helpers/src/lib.rs | 6 +++-- crates/test_helpers/src/subnormals.rs | 35 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 63308a2ca33..d032ded576d 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(stdsimd, powerpc_target_feature)] + pub mod array; #[cfg(target_arch = "wasm32")] @@ -198,7 +200,7 @@ pub fn test_unary_elementwise_flush_subnormals< Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, { - let flush = |x: Scalar| FlushSubnormals::flush(fs(FlushSubnormals::flush(x))); + let flush = |x: Scalar| subnormals::flush(fs(subnormals::flush_in(x))); test_1(&|x: [Scalar; LANES]| { proptest::prop_assume!(check(x)); let result_v: [ScalarResult; LANES] = fv(x.into()).into(); @@ -308,7 +310,7 @@ pub fn test_binary_elementwise_flush_subnormals< VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, { let flush = |x: Scalar1, y: Scalar2| { - FlushSubnormals::flush(fs(FlushSubnormals::flush(x), FlushSubnormals::flush(y))) + subnormals::flush(fs(subnormals::flush_in(x), subnormals::flush_in(y))) }; test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { proptest::prop_assume!(check(x, y)); diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs index d46e8524116..585b80bb6c7 100644 --- a/crates/test_helpers/src/subnormals.rs +++ b/crates/test_helpers/src/subnormals.rs @@ -41,3 +41,38 @@ impl FlushSubnormals for $ty {} impl_float! { f32, f64 } impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } + +/// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs. +/// https://gitlab.com/qemu-project/qemu/-/issues/1779 +#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))] +fn in_buggy_qemu() -> bool { + use std::sync::OnceLock; + static BUGGY: OnceLock = OnceLock::new(); + + fn add(x: f32, y: f32) -> f32 { + use core::arch::powerpc::*; + let array: [f32; 4] = + unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) }; + array[0] + } + + *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative()) +} + +#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))] +pub fn flush_in(x: T) -> T { + if in_buggy_qemu() { + x + } else { + x.flush() + } +} + +#[cfg(not(all(target_arch = "powerpc", target_feature = "altivec")))] +pub fn flush_in(x: T) -> T { + x.flush() +} + +pub fn flush(x: T) -> T { + x.flush() +} From ad747af0bb3f2249ec2a87b9f25f22b238dc700a Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 18:33:33 -0400 Subject: [PATCH 14/59] Apply workaround to ppc64 --- crates/test_helpers/src/subnormals.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs index 585b80bb6c7..ec0f1fb24b9 100644 --- a/crates/test_helpers/src/subnormals.rs +++ b/crates/test_helpers/src/subnormals.rs @@ -44,13 +44,20 @@ impl FlushSubnormals for $ty {} /// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs. /// https://gitlab.com/qemu-project/qemu/-/issues/1779 -#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))] +#[cfg(all( + any(target_arch = "powerpc", target_arch = "powerpc64"), + target_feature = "altivec" +))] fn in_buggy_qemu() -> bool { use std::sync::OnceLock; static BUGGY: OnceLock = OnceLock::new(); fn add(x: f32, y: f32) -> f32 { + #[cfg(target_arch = "powerpc")] use core::arch::powerpc::*; + #[cfg(target_arch = "powerpc64")] + use core::arch::powerpc64::*; + let array: [f32; 4] = unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) }; array[0] @@ -59,7 +66,10 @@ fn add(x: f32, y: f32) -> f32 { *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative()) } -#[cfg(all(target_arch = "powerpc", target_feature = "altivec"))] +#[cfg(all( + any(target_arch = "powerpc", target_arch = "powerpc64"), + target_feature = "altivec" +))] pub fn flush_in(x: T) -> T { if in_buggy_qemu() { x @@ -68,7 +78,10 @@ pub fn flush_in(x: T) -> T { } } -#[cfg(not(all(target_arch = "powerpc", target_feature = "altivec")))] +#[cfg(not(all( + any(target_arch = "powerpc", target_arch = "powerpc64"), + target_feature = "altivec" +)))] pub fn flush_in(x: T) -> T { x.flush() } From ca12492584e77fbb2d982942c9cf2843b8f99487 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 18:35:04 -0400 Subject: [PATCH 15/59] Revert some CI changes --- .github/workflows/ci.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42e2ba55c28..4dd334a1344 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -187,8 +187,8 @@ jobs: target_feature: [default] include: - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" } - - { target: powerpc64-unknown-linux-gnu, target_feature: "+power10-vector" } - - { target: powerpc64le-unknown-linux-gnu, target_feature: "+power10-vector" } + - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } + - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } # We should test this, but cross currently can't run it # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } @@ -215,8 +215,7 @@ jobs: - name: Configure Emulated CPUs run: | echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV - echo "CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64 -cpu power10" >> $GITHUB_ENV - echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV + # echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV - name: Configure RUSTFLAGS shell: bash From 52d6397da7d75deb32efa6636839ed4bcc6b2fdc Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 19:22:30 -0400 Subject: [PATCH 16/59] Flush subnormals in reduce tests --- crates/core_simd/tests/ops_macros.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 1f5d9488ad0..22265b8cf86 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -96,9 +96,11 @@ macro_rules! impl_common_integer_tests { test_helpers::test_lanes! { fn reduce_sum() { test_helpers::test_1(&|x| { + use test_helpers::subnormals::{flush, flush_in}; test_helpers::prop_assert_biteq! ( $vector::::from_array(x).reduce_sum(), x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), + flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)), ); Ok(()) }); @@ -106,9 +108,11 @@ fn reduce_sum() { fn reduce_product() { test_helpers::test_1(&|x| { + use test_helpers::subnormals::{flush, flush_in}; test_helpers::prop_assert_biteq! ( $vector::::from_array(x).reduce_product(), x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), + flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)), ); Ok(()) }); From bd4e6616f36a47f74059803fbfd0b8ddeff1b46f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 20:50:49 -0400 Subject: [PATCH 17/59] Adjust clamp test --- crates/core_simd/tests/ops_macros.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 22265b8cf86..7e705892b53 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -516,7 +516,7 @@ fn simd_max() { fn simd_clamp() { test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { - use test_helpers::subnormals::FlushSubnormals; + use test_helpers::subnormals::flush_in; for (min, max) in min.iter_mut().zip(max.iter_mut()) { if max < min { core::mem::swap(min, max); @@ -535,13 +535,14 @@ fn simd_clamp() { } let mut result_scalar_flush = [Scalar::default(); LANES]; for i in 0..LANES { - result_scalar_flush[i] = value[i]; - if FlushSubnormals::flush(value[i]) < FlushSubnormals::flush(min[i]) { - result_scalar_flush[i] = min[i]; + let mut value = flush_in(value[i]); + if value < flush_in(min[i]) { + value = min[i]; } - if FlushSubnormals::flush(value[i]) > FlushSubnormals::flush(max[i]) { - result_scalar_flush[i] = max[i]; + if value > flush_in(max[i]) { + value = max[i]; } + result_scalar_flush[i] = value } let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array(); test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush); From 616cb6e9e6de58dd9409d815078451ee550fb946 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 23:00:51 -0400 Subject: [PATCH 18/59] Disable simd_clamp test on ppc64 --- crates/core_simd/tests/ops_macros.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 7e705892b53..a6d67986728 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -514,6 +514,8 @@ fn simd_max() { assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.)); } + #[cfg(not(all(target_arch = "powerpc64", target_feature = "vsx")))] + // https://gitlab.com/qemu-project/qemu/-/issues/1780 fn simd_clamp() { test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { use test_helpers::subnormals::flush_in; From fd712fe3f39470160baf95ff8eb6fb00ebf31453 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 22 Jul 2023 23:38:20 -0400 Subject: [PATCH 19/59] Fix test skip --- crates/core_simd/tests/ops_macros.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index a6d67986728..7b309df22cb 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -514,9 +514,11 @@ fn simd_max() { assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.)); } - #[cfg(not(all(target_arch = "powerpc64", target_feature = "vsx")))] - // https://gitlab.com/qemu-project/qemu/-/issues/1780 fn simd_clamp() { + if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) { + // https://gitlab.com/qemu-project/qemu/-/issues/1780 + return; + } test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { use test_helpers::subnormals::flush_in; for (min, max) in min.iter_mut().zip(max.iter_mut()) { From dbcbc3e4c50e4127e7034aef962f4241143e6c79 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 27 Jul 2023 00:07:28 -0400 Subject: [PATCH 20/59] Disable misbehaving targets --- .github/workflows/ci.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4dd334a1344..ca1ab996a7b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -175,20 +175,24 @@ jobs: matrix: target: - armv7-unknown-linux-gnueabihf - - thumbv7neon-unknown-linux-gnueabihf - - aarch64-unknown-linux-gnu + - thumbv7neon-unknown-linux-gnueabihf # includes neon by default + - aarch64-unknown-linux-gnu # includes neon by default - powerpc-unknown-linux-gnu - - powerpc64-unknown-linux-gnu - - powerpc64le-unknown-linux-gnu + - powerpc64le-unknown-linux-gnu # includes altivec by default - riscv64gc-unknown-linux-gnu # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing + # non-nightly since https://github.com/rust-lang/rust/pull/113274 # - mips-unknown-linux-gnu # - mips64-unknown-linux-gnuabi64 + # Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen. + # - powerpc64-unknown-linux-gnu target_feature: [default] include: - - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" } - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } + # Fails due to QEMU floating point errors, probably handling subnormals incorrectly. + # This target is somewhat redundant, since ppc64le has altivec as well. + # - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" } # We should test this, but cross currently can't run it # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } From cb461aceb3da91c116f13cdc5a3574bdf028923a Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 27 Jul 2023 01:02:26 -0400 Subject: [PATCH 21/59] Only flush comparison in test --- crates/core_simd/tests/ops_macros.rs | 6 +++--- crates/test_helpers/src/lib.rs | 29 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 7b309df22cb..8386850cb90 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -539,11 +539,11 @@ fn simd_clamp() { } let mut result_scalar_flush = [Scalar::default(); LANES]; for i in 0..LANES { - let mut value = flush_in(value[i]); - if value < flush_in(min[i]) { + let mut value = value[i]; + if flush_in(value) < flush_in(min[i]) { value = min[i]; } - if value > flush_in(max[i]) { + if flush_in(value) > flush_in(max[i]) { value = max[i]; } result_scalar_flush[i] = value diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index d032ded576d..b80c745aaf2 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -336,6 +336,35 @@ pub fn test_binary_elementwise_flush_subnormals< }); } +/// Test a unary vector function against a unary scalar function, applied elementwise. +#[inline(never)] +pub fn test_binary_mask_elementwise( + fv: &dyn Fn(Vector1, Vector2) -> Mask, + fs: &dyn Fn(Scalar1, Scalar2) -> bool, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, +) where + Scalar1: Copy + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + core::fmt::Debug + DefaultStrategy, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + Mask: Into<[bool; LANES]> + From<[bool; LANES]> + Copy, +{ + test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_v: [bool; LANES] = fv(x.into(), y.into()).into(); + let result_s: [bool; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| fs(x, y)) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_biteq!(result_v, result_s); + Ok(()) + }); +} + /// Test a binary vector-scalar function against a binary scalar function, applied elementwise. #[inline(never)] pub fn test_binary_scalar_rhs_elementwise< From baa5791a453dccf5d09404e601cc6403c6a9cd3b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 27 Jul 2023 01:03:43 -0400 Subject: [PATCH 22/59] Document odd test behavior --- crates/core_simd/tests/ops_macros.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 8386850cb90..f6ded66e9fc 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -539,6 +539,7 @@ fn simd_clamp() { } let mut result_scalar_flush = [Scalar::default(); LANES]; for i in 0..LANES { + // Comparisons flush-to-zero, but return value selection is _not_ flushed. let mut value = value[i]; if flush_in(value) < flush_in(min[i]) { value = min[i]; From 5e5745318a7efdfd3f927102550ba4697c4f5863 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 27 Jul 2023 10:15:05 -0400 Subject: [PATCH 23/59] Disable big endian ppc64 --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca1ab996a7b..ed1589be4f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -188,7 +188,6 @@ jobs: # - powerpc64-unknown-linux-gnu target_feature: [default] include: - - { target: powerpc64-unknown-linux-gnu, target_feature: "+vsx" } - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } # Fails due to QEMU floating point errors, probably handling subnormals incorrectly. # This target is somewhat redundant, since ppc64le has altivec as well. From 6e8d21ee760d4672ed6c374c9be1687c531499fb Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 27 Jul 2023 13:21:56 -0400 Subject: [PATCH 24/59] Define portability --- crates/core_simd/src/core_simd_docs.md | 33 ++++++++++++++++++++++++++ crates/core_simd/src/mod.rs | 3 ++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md index 15e8ed0253e..3d2e737e2af 100644 --- a/crates/core_simd/src/core_simd_docs.md +++ b/crates/core_simd/src/core_simd_docs.md @@ -2,3 +2,36 @@ Portable SIMD module. This module offers a portable abstraction for SIMD operations that is not bound to any particular hardware architecture. + +# What is "portable"? + +This module provides a SIMD implementation that is fast and predictable on any target. + +### Portable SIMD works on every target + +Unlike target-specific SIMD in `std::arch`, portable SIMD compiles for every target. +In this regard, it is just like "regular" Rust. + +### Portable SIMD is consistent between targets + +A program using portable SIMD can expect identical behavior on any target. +In most regards, [`Simd`] can be thought of as a parallelized `[T; N]` and operates like a sequence of `T`. + +This has one notable exception: a handful of older architectures (e.g. `armv7` and `powerpc`) flush [subnormal](`f32::is_subnormal`) `f32` values to zero. +On these architectures, subnormal `f32` input values are replaced with zeros, and any operation producing subnormal `f32` values produces zeros instead. +This doesn't affect most architectures or programs. + +### Operations use the best instructions available + +Operations provided by this module compile to the best available SIMD instructions. + +Portable SIMD is not a low-level vendor library, and operations in portable SIMD _do not_ necessarily map to a single instruction. +Instead, they map to a reasonable implementation of the operation for the target. + +Consistency between targets is not compromised to use faster or fewer instructions. +In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent. +For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`) can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`]. +When necessary, [`Simd`] can be converted to the types provided by `std::arch` to make use of target-specific functions. + +Many targets simply don't have SIMD, or don't support SIMD for a particular element type. +In those cases, regular scalar operations are generated instead. diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index f9891a3b7c1..dd954b7cc48 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -21,8 +21,9 @@ mod vector; mod vendor; -#[doc = include_str!("core_simd_docs.md")] pub mod simd { + #![doc = include_str!("core_simd_docs.md")] + pub mod prelude; pub(crate) use crate::core_simd::intrinsics; From 927139d1e711a9fd276632616feea393693258f4 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 27 Jul 2023 14:15:20 -0400 Subject: [PATCH 25/59] Add scalar shifts --- crates/core_simd/src/ops.rs | 1 + crates/core_simd/src/ops/shift_scalar.rs | 58 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 crates/core_simd/src/ops/shift_scalar.rs diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index b007456cf2c..63a96106283 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -6,6 +6,7 @@ mod assign; mod deref; +mod shift_scalar; mod unary; impl core::ops::Index for Simd diff --git a/crates/core_simd/src/ops/shift_scalar.rs b/crates/core_simd/src/ops/shift_scalar.rs new file mode 100644 index 00000000000..77aac656395 --- /dev/null +++ b/crates/core_simd/src/ops/shift_scalar.rs @@ -0,0 +1,58 @@ +// Shift operations uniquely typically only have a scalar on the right-hand side. +// Here, we implement shifts for scalar RHS arguments. + +use crate::simd::{LaneCount, Simd, SupportedLaneCount}; + +macro_rules! impl_splatted_shifts { + { impl $trait:ident :: $trait_fn:ident for $ty:ty } => { + impl core::ops::$trait<$ty> for Simd<$ty, N> + where + LaneCount: SupportedLaneCount, + { + type Output = Self; + fn $trait_fn(self, rhs: $ty) -> Self::Output { + self.$trait_fn(Simd::splat(rhs)) + } + } + + impl core::ops::$trait<&$ty> for Simd<$ty, N> + where + LaneCount: SupportedLaneCount, + { + type Output = Self; + fn $trait_fn(self, rhs: &$ty) -> Self::Output { + self.$trait_fn(Simd::splat(*rhs)) + } + } + + impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N> + where + LaneCount: SupportedLaneCount, + { + type Output = Simd<$ty, N>; + fn $trait_fn(self, rhs: $ty) -> Self::Output { + self.$trait_fn(Simd::splat(rhs)) + } + } + + impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N> + where + LaneCount: SupportedLaneCount, + { + type Output = Simd<$ty, N>; + fn $trait_fn(self, rhs: &$ty) -> Self::Output { + self.$trait_fn(Simd::splat(*rhs)) + } + } + }; + { $($ty:ty),* } => { + $( + impl_splatted_shifts! { impl Shl::shl for $ty } + impl_splatted_shifts! { impl Shr::shr for $ty } + )* + } +} + +// In the past there were inference issues when generically splatting arguments. +// Enumerate them instead. +impl_splatted_shifts! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } From 5c97c0db2457872ef83a2b30c9d30f24963a1752 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 27 Jul 2023 16:26:22 -0400 Subject: [PATCH 26/59] Add wrapping negation --- crates/core_simd/src/elements/uint.rs | 16 ++++++++++++++-- crates/core_simd/tests/ops_macros.rs | 10 ++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs index 3926c395ec9..c8bf24998dd 100644 --- a/crates/core_simd/src/elements/uint.rs +++ b/crates/core_simd/src/elements/uint.rs @@ -16,6 +16,12 @@ pub trait SimdUint: Copy + Sealed { #[must_use] fn cast(self) -> Self::Cast; + /// Wrapping negation. + /// + /// Like [`u32::wrapping_neg`], all applications of this function will wrap, with the exception + /// of `-0`. + fn wrapping_neg(self) -> Self; + /// Lanewise saturating add. /// /// # Examples @@ -74,7 +80,7 @@ pub trait SimdUint: Copy + Sealed { } macro_rules! impl_trait { - { $($ty:ty),* } => { + { $($ty:ident ($signed:ident)),* } => { $( impl Sealed for Simd<$ty, LANES> where @@ -95,6 +101,12 @@ fn cast(self) -> Self::Cast { unsafe { intrinsics::simd_as(self) } } + #[inline] + fn wrapping_neg(self) -> Self { + use crate::simd::SimdInt; + (-self.cast::<$signed>()).cast() + } + #[inline] fn saturating_add(self, second: Self) -> Self { // Safety: `self` is a vector @@ -153,4 +165,4 @@ fn reduce_xor(self) -> Self::Scalar { } } -impl_trait! { u8, u16, u32, u64, usize } +impl_trait! { u8 (i8), u16 (i16), u32 (i32), u64 (i64), usize (isize) } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 3a02f3f01e1..ee0d3ce2f5a 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -327,6 +327,16 @@ fn rem_zero_panic() { } } + test_helpers::test_lanes! { + fn wrapping_neg() { + test_helpers::test_unary_elementwise( + &Vector::::wrapping_neg, + &Scalar::wrapping_neg, + &|_| true, + ); + } + } + impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add); impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub); impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul); From e51ee248c33d8c1662c61fcfcb8cf1843979007f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 28 Jul 2023 21:26:14 -0400 Subject: [PATCH 27/59] Add tests --- crates/core_simd/tests/ops_macros.rs | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 3a02f3f01e1..dfc0e1a3708 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -94,6 +94,36 @@ fn assign() { macro_rules! impl_common_integer_tests { { $vector:ident, $scalar:ident } => { test_helpers::test_lanes! { + fn shr() { + use core::ops::Shr; + let shr = |x: $scalar, y: $scalar| x.wrapping_shr(y as _); + test_helpers::test_binary_elementwise( + &<$vector:: as Shr<$vector::>>::shr, + &shr, + &|_, _| true, + ); + test_helpers::test_binary_scalar_rhs_elementwise( + &<$vector:: as Shr<$scalar>>::shr, + &shr, + &|_, _| true, + ); + } + + fn shl() { + use core::ops::Shl; + let shl = |x: $scalar, y: $scalar| x.wrapping_shl(y as _); + test_helpers::test_binary_elementwise( + &<$vector:: as Shl<$vector::>>::shl, + &shl, + &|_, _| true, + ); + test_helpers::test_binary_scalar_rhs_elementwise( + &<$vector:: as Shl<$scalar>>::shl, + &shl, + &|_, _| true, + ); + } + fn reduce_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( From 3da60554e59f0e9ffe85c717253dd0c3c681296b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 28 Jul 2023 21:38:58 -0400 Subject: [PATCH 28/59] Add footnote --- crates/core_simd/src/core_simd_docs.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md index 3d2e737e2af..8acdeb04427 100644 --- a/crates/core_simd/src/core_simd_docs.md +++ b/crates/core_simd/src/core_simd_docs.md @@ -30,8 +30,10 @@ Instead, they map to a reasonable implementation of the operation for the target Consistency between targets is not compromised to use faster or fewer instructions. In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent. -For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`) can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`]. +For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`]. When necessary, [`Simd`] can be converted to the types provided by `std::arch` to make use of target-specific functions. Many targets simply don't have SIMD, or don't support SIMD for a particular element type. In those cases, regular scalar operations are generated instead. + +[^1]: `_mm_min_ps(x, y)` is equivalent to `x.simd_lt(y).select(x, y)` From 8101074e2e1b8fb3a7469446746625b7febb7f33 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 30 Jul 2023 15:53:32 -0400 Subject: [PATCH 29/59] Add various integer ops --- crates/core_simd/src/elements/int.rs | 59 ++++++++++++++++++++++++++- crates/core_simd/src/elements/uint.rs | 53 ++++++++++++++++++++++++ crates/core_simd/src/intrinsics.rs | 6 +++ crates/core_simd/src/to_bytes.rs | 44 ++++++++++++++++++++ crates/core_simd/tests/ops_macros.rs | 48 ++++++++++++++++++++++ crates/core_simd/tests/to_bytes.rs | 16 ++++++-- 6 files changed, 220 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs index 6db89ff9a65..6992b679515 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/elements/int.rs @@ -191,10 +191,29 @@ pub trait SimdInt: Copy + Sealed { /// Returns the cumulative bitwise "xor" across the lanes of the vector. fn reduce_xor(self) -> Self::Scalar; + + /// Reverses the byte order of each element. + fn swap_bytes(self) -> Self; + + /// Reverses the order of bits in each elemnent. + /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc. + fn reverse_bits(self) -> Self; + + /// Returns the number of leading zeros in the binary representation of each element. + fn leading_zeros(self) -> Self; + + /// Returns the number of trailing zeros in the binary representation of each element. + fn trailing_zeros(self) -> Self; + + /// Returns the number of leading ones in the binary representation of each element. + fn leading_ones(self) -> Self; + + /// Returns the number of trailing ones in the binary representation of each element. + fn trailing_ones(self) -> Self; } macro_rules! impl_trait { - { $($ty:ty),* } => { + { $($ty:ident ($unsigned:ident)),* } => { $( impl Sealed for Simd<$ty, LANES> where @@ -307,9 +326,45 @@ fn reduce_xor(self) -> Self::Scalar { // Safety: `self` is an integer vector unsafe { intrinsics::simd_reduce_xor(self) } } + + #[inline] + fn swap_bytes(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_bswap(self) } + } + + #[inline] + fn reverse_bits(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_bitreverse(self) } + } + + #[inline] + fn leading_zeros(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_ctlz(self) } + } + + #[inline] + fn trailing_zeros(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_cttz(self) } + } + + #[inline] + fn leading_ones(self) -> Self { + use crate::simd::SimdUint; + self.cast::<$unsigned>().leading_ones().cast() + } + + #[inline] + fn trailing_ones(self) -> Self { + use crate::simd::SimdUint; + self.cast::<$unsigned>().trailing_ones().cast() + } } )* } } -impl_trait! { i8, i16, i32, i64, isize } +impl_trait! { i8 (u8), i16 (u16), i32 (u32), i64 (u64), isize (usize) } diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs index 3926c395ec9..7490340e4f4 100644 --- a/crates/core_simd/src/elements/uint.rs +++ b/crates/core_simd/src/elements/uint.rs @@ -71,6 +71,25 @@ pub trait SimdUint: Copy + Sealed { /// Returns the cumulative bitwise "xor" across the lanes of the vector. fn reduce_xor(self) -> Self::Scalar; + + /// Reverses the byte order of each element. + fn swap_bytes(self) -> Self; + + /// Reverses the order of bits in each elemnent. + /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc. + fn reverse_bits(self) -> Self; + + /// Returns the number of leading zeros in the binary representation of each element. + fn leading_zeros(self) -> Self; + + /// Returns the number of trailing zeros in the binary representation of each element. + fn trailing_zeros(self) -> Self; + + /// Returns the number of leading ones in the binary representation of each element. + fn leading_ones(self) -> Self; + + /// Returns the number of trailing ones in the binary representation of each element. + fn trailing_ones(self) -> Self; } macro_rules! impl_trait { @@ -148,6 +167,40 @@ fn reduce_xor(self) -> Self::Scalar { // Safety: `self` is an integer vector unsafe { intrinsics::simd_reduce_xor(self) } } + + #[inline] + fn swap_bytes(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_bswap(self) } + } + + #[inline] + fn reverse_bits(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_bitreverse(self) } + } + + #[inline] + fn leading_zeros(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_ctlz(self) } + } + + #[inline] + fn trailing_zeros(self) -> Self { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_cttz(self) } + } + + #[inline] + fn leading_ones(self) -> Self { + (!self).leading_zeros() + } + + #[inline] + fn trailing_ones(self) -> Self { + (!self).trailing_zeros() + } } )* } diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index dd6698e2ba5..b27893bc729 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -160,4 +160,10 @@ /// convert an exposed address back to a pointer pub(crate) fn simd_from_exposed_addr(addr: T) -> U; + + // Integer operations + pub(crate) fn simd_bswap(x: T) -> T; + pub(crate) fn simd_bitreverse(x: T) -> T; + pub(crate) fn simd_ctlz(x: T) -> T; + pub(crate) fn simd_cttz(x: T) -> T; } diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index b36b1a347b2..563b0c95a8a 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -1,3 +1,5 @@ +use crate::simd::SimdUint; + macro_rules! impl_to_bytes { { $ty:ty, $size:literal } => { impl crate::simd::Simd<$ty, LANES> @@ -12,12 +14,54 @@ pub fn to_ne_bytes(self) -> crate::simd::Simd { unsafe { core::mem::transmute_copy(&self) } } + /// Return the memory representation of this integer as a byte array in big-endian + /// (network) byte order. + pub fn to_be_bytes(self) -> crate::simd::Simd { + let bytes = self.to_ne_bytes(); + if cfg!(target_endian = "big") { + bytes + } else { + bytes.swap_bytes() + } + } + + /// Return the memory representation of this integer as a byte array in little-endian + /// byte order. + pub fn to_le_bytes(self) -> crate::simd::Simd { + let bytes = self.to_ne_bytes(); + if cfg!(target_endian = "little") { + bytes + } else { + bytes.swap_bytes() + } + } + /// Create a native endian integer value from its memory representation as a byte array /// in native endianness. pub fn from_ne_bytes(bytes: crate::simd::Simd) -> Self { // Safety: transmuting between vectors is safe unsafe { core::mem::transmute_copy(&bytes) } } + + /// Create an integer value from its representation as a byte array in big endian. + pub fn from_be_bytes(bytes: crate::simd::Simd) -> Self { + let bytes = if cfg!(target_endian = "big") { + bytes + } else { + bytes.swap_bytes() + }; + Self::from_ne_bytes(bytes) + } + + /// Create an integer value from its representation as a byte array in little endian. + pub fn from_le_bytes(bytes: crate::simd::Simd) -> Self { + let bytes = if cfg!(target_endian = "little") { + bytes + } else { + bytes.swap_bytes() + }; + Self::from_ne_bytes(bytes) + } } } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index dfc0e1a3708..bd1856e1bcc 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -193,6 +193,54 @@ fn reduce_min() { Ok(()) }); } + + fn swap_bytes() { + test_helpers::test_unary_elementwise( + &$vector::::swap_bytes, + &$scalar::swap_bytes, + &|_| true, + ) + } + + fn reverse_bits() { + test_helpers::test_unary_elementwise( + &$vector::::reverse_bits, + &$scalar::reverse_bits, + &|_| true, + ) + } + + fn leading_zeros() { + test_helpers::test_unary_elementwise( + &$vector::::leading_zeros, + &|x| x.leading_zeros() as $scalar, + &|_| true, + ) + } + + fn trailing_zeros() { + test_helpers::test_unary_elementwise( + &$vector::::leading_zeros, + &|x| x.trailing_zeros() as $scalar, + &|_| true, + ) + } + + fn leading_ones() { + test_helpers::test_unary_elementwise( + &$vector::::leading_ones, + &|x| x.leading_ones() as $scalar, + &|_| true, + ) + } + + fn trailing_ones() { + test_helpers::test_unary_elementwise( + &$vector::::leading_ones, + &|x| x.trailing_ones() as $scalar, + &|_| true, + ) + } } } } diff --git a/crates/core_simd/tests/to_bytes.rs b/crates/core_simd/tests/to_bytes.rs index be0ee4349c5..7dd740d65dd 100644 --- a/crates/core_simd/tests/to_bytes.rs +++ b/crates/core_simd/tests/to_bytes.rs @@ -7,8 +7,16 @@ #[test] fn byte_convert() { let int = Simd::::from_array([0xdeadbeef, 0x8badf00d]); - let bytes = int.to_ne_bytes(); - assert_eq!(int[0].to_ne_bytes(), bytes[..4]); - assert_eq!(int[1].to_ne_bytes(), bytes[4..]); - assert_eq!(Simd::::from_ne_bytes(bytes), int); + let ne_bytes = int.to_ne_bytes(); + let be_bytes = int.to_be_bytes(); + let le_bytes = int.to_le_bytes(); + assert_eq!(int[0].to_ne_bytes(), ne_bytes[..4]); + assert_eq!(int[1].to_ne_bytes(), ne_bytes[4..]); + assert_eq!(int[0].to_be_bytes(), be_bytes[..4]); + assert_eq!(int[1].to_be_bytes(), be_bytes[4..]); + assert_eq!(int[0].to_le_bytes(), le_bytes[..4]); + assert_eq!(int[1].to_le_bytes(), le_bytes[4..]); + assert_eq!(Simd::::from_ne_bytes(ne_bytes), int); + assert_eq!(Simd::::from_be_bytes(be_bytes), int); + assert_eq!(Simd::::from_le_bytes(le_bytes), int); } From b1245ffb1277ad4274f600607f9058281baf3bc6 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 30 Jul 2023 16:20:20 -0400 Subject: [PATCH 30/59] Fix bad copy-paste --- crates/core_simd/tests/ops_macros.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index bd1856e1bcc..23e914e64b5 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -220,7 +220,7 @@ fn leading_zeros() { fn trailing_zeros() { test_helpers::test_unary_elementwise( - &$vector::::leading_zeros, + &$vector::::trailing_zeros, &|x| x.trailing_zeros() as $scalar, &|_| true, ) @@ -236,7 +236,7 @@ fn leading_ones() { fn trailing_ones() { test_helpers::test_unary_elementwise( - &$vector::::leading_ones, + &$vector::::trailing_ones, &|x| x.trailing_ones() as $scalar, &|_| true, ) From c948b703ff57f25a1a41be5e03553065454080b9 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 1 Aug 2023 07:58:44 -0400 Subject: [PATCH 31/59] Simplify signed leading_ones/trailing_ones --- crates/core_simd/src/elements/int.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs index 6992b679515..e81ed7bf601 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/elements/int.rs @@ -213,7 +213,7 @@ pub trait SimdInt: Copy + Sealed { } macro_rules! impl_trait { - { $($ty:ident ($unsigned:ident)),* } => { + { $($ty:ty),* } => { $( impl Sealed for Simd<$ty, LANES> where @@ -353,18 +353,16 @@ fn trailing_zeros(self) -> Self { #[inline] fn leading_ones(self) -> Self { - use crate::simd::SimdUint; - self.cast::<$unsigned>().leading_ones().cast() + (!self).leading_zeros() } #[inline] fn trailing_ones(self) -> Self { - use crate::simd::SimdUint; - self.cast::<$unsigned>().trailing_ones().cast() + (!self).trailing_zeros() } } )* } } -impl_trait! { i8 (u8), i16 (u16), i32 (u32), i64 (u64), isize (usize) } +impl_trait! { i8, i16, i32, i64, isize } From 29392c091b8c22a42ff5e8074d164c28e1122113 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 26 Aug 2023 12:00:05 +0000 Subject: [PATCH 32/59] Commit Cargo.lock As of rust-lang/cargo#8728 it is now recommended to always check in Cargo.lock. This will help with reproducability and will avoid the need for cg_clif's build system to keep it's own copy of Cargo.lock for vendoring. It will also allow tidy to run on the portable-simd workspace. --- .gitignore | 1 - Cargo.lock | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 304 insertions(+), 1 deletion(-) create mode 100644 Cargo.lock diff --git a/.gitignore b/.gitignore index 96ef6c0b944..ea8c4bf7f35 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ /target -Cargo.lock diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000000..46312c09657 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,304 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "core_simd" +version = "0.1.0" +dependencies = [ + "proptest", + "std_float", + "test_helpers", + "wasm-bindgen", + "wasm-bindgen-test", +] + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12e6c80c1139113c28ee4670dc50cc42915228b51f56a9e407f0ec60f966646f" +dependencies = [ + "bitflags", + "byteorder", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_xorshift" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77d416b86801d23dde1aa643023b775c3a462efc0ed96443add11546cdf1dca8" +dependencies = [ + "rand_core", +] + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + +[[package]] +name = "std_float" +version = "0.1.0" +dependencies = [ + "core_simd", +] + +[[package]] +name = "syn" +version = "2.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "test_helpers" +version = "0.1.0" +dependencies = [ + "proptest", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "wasm-bindgen-test" +version = "0.3.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671" +dependencies = [ + "console_error_panic_hook", + "js-sys", + "scoped-tls", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "wasm-bindgen-test-macro" +version = "0.3.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] From d08b2a50a3a06edd6c2f412a4ffbeaec53507d2f Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 26 Aug 2023 12:32:10 +0000 Subject: [PATCH 33/59] Allow internal_features lint --- crates/core_simd/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index fde406bda70..2d68e4cce85 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -18,6 +18,7 @@ #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] +#![allow(internal_features)] #![unstable(feature = "portable_simd", issue = "86656")] //! Portable SIMD module. From 0a1e7453204383a109f4854cc588fd3b35c990c6 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 10 Sep 2023 18:20:47 -0400 Subject: [PATCH 34/59] Return unsigned integers from some signed integer functions --- crates/core_simd/src/elements/int.rs | 37 +++++++++++++++------------- crates/core_simd/tests/ops_macros.rs | 8 +++--- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs index e81ed7bf601..c341c59545c 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/elements/int.rs @@ -1,6 +1,7 @@ use super::sealed::Sealed; use crate::simd::{ - intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SupportedLaneCount, + intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SimdUint, + SupportedLaneCount, }; /// Operations on SIMD vectors of signed integers. @@ -11,6 +12,9 @@ pub trait SimdInt: Copy + Sealed { /// Scalar type contained by this SIMD vector type. type Scalar; + /// A SIMD vector of unsigned integers with the same element size. + type Unsigned; + /// A SIMD vector with a different element type. type Cast; @@ -200,20 +204,20 @@ pub trait SimdInt: Copy + Sealed { fn reverse_bits(self) -> Self; /// Returns the number of leading zeros in the binary representation of each element. - fn leading_zeros(self) -> Self; + fn leading_zeros(self) -> Self::Unsigned; /// Returns the number of trailing zeros in the binary representation of each element. - fn trailing_zeros(self) -> Self; + fn trailing_zeros(self) -> Self::Unsigned; /// Returns the number of leading ones in the binary representation of each element. - fn leading_ones(self) -> Self; + fn leading_ones(self) -> Self::Unsigned; /// Returns the number of trailing ones in the binary representation of each element. - fn trailing_ones(self) -> Self; + fn trailing_ones(self) -> Self::Unsigned; } macro_rules! impl_trait { - { $($ty:ty),* } => { + { $($ty:ident ($unsigned:ident)),* } => { $( impl Sealed for Simd<$ty, LANES> where @@ -227,6 +231,7 @@ impl SimdInt for Simd<$ty, LANES> { type Mask = Mask<<$ty as SimdElement>::Mask, LANES>; type Scalar = $ty; + type Unsigned = Simd<$unsigned, LANES>; type Cast = Simd; #[inline] @@ -340,29 +345,27 @@ fn reverse_bits(self) -> Self { } #[inline] - fn leading_zeros(self) -> Self { - // Safety: `self` is an integer vector - unsafe { intrinsics::simd_ctlz(self) } + fn leading_zeros(self) -> Self::Unsigned { + self.cast::<$unsigned>().leading_zeros() } #[inline] - fn trailing_zeros(self) -> Self { - // Safety: `self` is an integer vector - unsafe { intrinsics::simd_cttz(self) } + fn trailing_zeros(self) -> Self::Unsigned { + self.cast::<$unsigned>().trailing_zeros() } #[inline] - fn leading_ones(self) -> Self { - (!self).leading_zeros() + fn leading_ones(self) -> Self::Unsigned { + self.cast::<$unsigned>().leading_ones() } #[inline] - fn trailing_ones(self) -> Self { - (!self).trailing_zeros() + fn trailing_ones(self) -> Self::Unsigned { + self.cast::<$unsigned>().trailing_ones() } } )* } } -impl_trait! { i8, i16, i32, i64, isize } +impl_trait! { i8 (u8), i16 (u16), i32 (u32), i64 (u64), isize (usize) } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 23e914e64b5..135f3ecf7b2 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -213,7 +213,7 @@ fn reverse_bits() { fn leading_zeros() { test_helpers::test_unary_elementwise( &$vector::::leading_zeros, - &|x| x.leading_zeros() as $scalar, + &|x| x.leading_zeros() as _, &|_| true, ) } @@ -221,7 +221,7 @@ fn leading_zeros() { fn trailing_zeros() { test_helpers::test_unary_elementwise( &$vector::::trailing_zeros, - &|x| x.trailing_zeros() as $scalar, + &|x| x.trailing_zeros() as _, &|_| true, ) } @@ -229,7 +229,7 @@ fn trailing_zeros() { fn leading_ones() { test_helpers::test_unary_elementwise( &$vector::::leading_ones, - &|x| x.leading_ones() as $scalar, + &|x| x.leading_ones() as _, &|_| true, ) } @@ -237,7 +237,7 @@ fn leading_ones() { fn trailing_ones() { test_helpers::test_unary_elementwise( &$vector::::trailing_ones, - &|x| x.trailing_ones() as $scalar, + &|x| x.trailing_ones() as _, &|_| true, ) } From eb3c050405cf9fe8342225b328e245dc02dd8b48 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 23 Sep 2023 13:30:21 -0400 Subject: [PATCH 35/59] Fix lints --- crates/core_simd/src/masks/to_bitmask.rs | 1 + crates/core_simd/src/ops/shift_scalar.rs | 4 ++++ crates/core_simd/src/to_bytes.rs | 6 ++++++ crates/core_simd/src/vendor.rs | 2 +- crates/core_simd/src/vendor/x86.rs | 2 +- 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index fc7d6b781f2..8e724c9de8c 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -74,6 +74,7 @@ impl ToBitMask for Mask<_, 64> /// Returns the minimum number of bytes in a bitmask with `lanes` lanes. #[cfg(feature = "generic_const_exprs")] +#[allow(clippy::missing_inline_in_public_items)] pub const fn bitmask_len(lanes: usize) -> usize { (lanes + 7) / 8 } diff --git a/crates/core_simd/src/ops/shift_scalar.rs b/crates/core_simd/src/ops/shift_scalar.rs index 77aac656395..f5115a5a5e9 100644 --- a/crates/core_simd/src/ops/shift_scalar.rs +++ b/crates/core_simd/src/ops/shift_scalar.rs @@ -10,6 +10,7 @@ impl core::ops::$trait<$ty> for Simd<$ty, N> LaneCount: SupportedLaneCount, { type Output = Self; + #[inline] fn $trait_fn(self, rhs: $ty) -> Self::Output { self.$trait_fn(Simd::splat(rhs)) } @@ -20,6 +21,7 @@ impl core::ops::$trait<&$ty> for Simd<$ty, N> LaneCount: SupportedLaneCount, { type Output = Self; + #[inline] fn $trait_fn(self, rhs: &$ty) -> Self::Output { self.$trait_fn(Simd::splat(*rhs)) } @@ -30,6 +32,7 @@ impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N> LaneCount: SupportedLaneCount, { type Output = Simd<$ty, N>; + #[inline] fn $trait_fn(self, rhs: $ty) -> Self::Output { self.$trait_fn(Simd::splat(rhs)) } @@ -40,6 +43,7 @@ impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N> LaneCount: SupportedLaneCount, { type Output = Simd<$ty, N>; + #[inline] fn $trait_fn(self, rhs: &$ty) -> Self::Output { self.$trait_fn(Simd::splat(*rhs)) } diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index 563b0c95a8a..5f1374fd5a5 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -9,6 +9,7 @@ impl crate::simd::Simd<$ty, LANES> { /// Return the memory representation of this integer as a byte array in native byte /// order. + #[inline] pub fn to_ne_bytes(self) -> crate::simd::Simd { // Safety: transmuting between vectors is safe unsafe { core::mem::transmute_copy(&self) } @@ -16,6 +17,7 @@ pub fn to_ne_bytes(self) -> crate::simd::Simd { /// Return the memory representation of this integer as a byte array in big-endian /// (network) byte order. + #[inline] pub fn to_be_bytes(self) -> crate::simd::Simd { let bytes = self.to_ne_bytes(); if cfg!(target_endian = "big") { @@ -27,6 +29,7 @@ pub fn to_be_bytes(self) -> crate::simd::Simd { /// Return the memory representation of this integer as a byte array in little-endian /// byte order. + #[inline] pub fn to_le_bytes(self) -> crate::simd::Simd { let bytes = self.to_ne_bytes(); if cfg!(target_endian = "little") { @@ -38,12 +41,14 @@ pub fn to_le_bytes(self) -> crate::simd::Simd { /// Create a native endian integer value from its memory representation as a byte array /// in native endianness. + #[inline] pub fn from_ne_bytes(bytes: crate::simd::Simd) -> Self { // Safety: transmuting between vectors is safe unsafe { core::mem::transmute_copy(&bytes) } } /// Create an integer value from its representation as a byte array in big endian. + #[inline] pub fn from_be_bytes(bytes: crate::simd::Simd) -> Self { let bytes = if cfg!(target_endian = "big") { bytes @@ -54,6 +59,7 @@ pub fn from_be_bytes(bytes: crate::simd::Simd) -> Self } /// Create an integer value from its representation as a byte array in little endian. + #[inline] pub fn from_le_bytes(bytes: crate::simd::Simd) -> Self { let bytes = if cfg!(target_endian = "little") { bytes diff --git a/crates/core_simd/src/vendor.rs b/crates/core_simd/src/vendor.rs index 9fb70218c95..6223bedb4e1 100644 --- a/crates/core_simd/src/vendor.rs +++ b/crates/core_simd/src/vendor.rs @@ -21,7 +21,7 @@ fn from(value: $from) -> $to { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod x86; -#[cfg(any(target_arch = "wasm32"))] +#[cfg(target_arch = "wasm32")] mod wasm32; #[cfg(any(target_arch = "aarch64", target_arch = "arm",))] diff --git a/crates/core_simd/src/vendor/x86.rs b/crates/core_simd/src/vendor/x86.rs index 0dd47015ed2..66aaf90eef5 100644 --- a/crates/core_simd/src/vendor/x86.rs +++ b/crates/core_simd/src/vendor/x86.rs @@ -1,6 +1,6 @@ use crate::simd::*; -#[cfg(any(target_arch = "x86"))] +#[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] From 4825b2a64d765317066948867e8714674419359b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 1 Oct 2023 21:29:03 -0400 Subject: [PATCH 36/59] Fix lint --- crates/std_float/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 4bd4d4c05e3..bb2b5a2dbba 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -2,7 +2,8 @@ #![cfg_attr( feature = "as_crate", feature(platform_intrinsics), - feature(portable_simd) + feature(portable_simd), + allow(internal_features) )] #[cfg(not(feature = "as_crate"))] use core::simd; From a93ded542652cdff67e8b222c91a401d8e905777 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 1 Oct 2023 21:28:03 -0400 Subject: [PATCH 37/59] Remove generic_const_exprs --- crates/core_simd/Cargo.toml | 1 - crates/core_simd/src/lib.rs | 2 - crates/core_simd/src/masks.rs | 5 +- crates/core_simd/src/masks/bitmask.rs | 2 - crates/core_simd/src/masks/full_masks.rs | 25 +--- crates/core_simd/src/masks/to_bitmask.rs | 64 ++++----- crates/core_simd/src/mod.rs | 5 +- crates/core_simd/src/to_bytes.rs | 163 +++++++++++++++-------- crates/core_simd/tests/masks.rs | 1 - crates/core_simd/tests/to_bytes.rs | 6 +- 10 files changed, 156 insertions(+), 118 deletions(-) diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index d1a3a515a7e..b4a8fd70f4c 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -12,7 +12,6 @@ license = "MIT OR Apache-2.0" default = ["as_crate"] as_crate = [] std = [] -generic_const_exprs = [] all_lane_counts = [] [target.'cfg(target_arch = "wasm32")'.dev-dependencies] diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 2d68e4cce85..dd3c546e014 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -14,8 +14,6 @@ strict_provenance, ptr_metadata )] -#![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))] -#![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] #![allow(internal_features)] diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index fea687bdc1a..b6af9f83581 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -13,10 +13,7 @@ mod mask_impl; mod to_bitmask; -pub use to_bitmask::ToBitMask; - -#[cfg(feature = "generic_const_exprs")] -pub use to_bitmask::{bitmask_len, ToBitMaskArray}; +pub use to_bitmask::{ToBitMask, ToBitMaskArray}; use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; use core::cmp::Ordering; diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 20465ba9b07..a7df6304bc7 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -119,7 +119,6 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } } - #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new array and does not mutate the original value"] pub fn to_bitmask_array(self) -> [u8; N] { @@ -129,7 +128,6 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { unsafe { core::mem::transmute_copy(&self.0) } } - #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask_array(bitmask: [u8; N]) -> Self { diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 1d13c45b8e7..4b36adece71 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -1,12 +1,9 @@ //! Masks that take up full SIMD vector registers. -use super::MaskElement; +use super::{to_bitmask::ToBitMaskArray, MaskElement}; use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; -#[cfg(feature = "generic_const_exprs")] -use crate::simd::ToBitMaskArray; - #[repr(transparent)] pub struct Mask(Simd) where @@ -145,23 +142,19 @@ pub fn convert(self) -> Mask unsafe { Mask(intrinsics::simd_cast(self.0)) } } - #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new array and does not mutate the original value"] pub fn to_bitmask_array(self) -> [u8; N] where super::Mask: ToBitMaskArray, - [(); as ToBitMaskArray>::BYTES]: Sized, { - assert_eq!( as ToBitMaskArray>::BYTES, N); - - // Safety: N is the correct bitmask size + // Safety: Bytes is the right size array unsafe { // Compute the bitmask - let bitmask: [u8; as ToBitMaskArray>::BYTES] = + let bitmask: as ToBitMaskArray>::BitMaskArray = intrinsics::simd_bitmask(self.0); - // Transmute to the return type, previously asserted to be the same size + // Transmute to the return type let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask); // LLVM assumes bit order should match endianness @@ -175,17 +168,13 @@ pub fn convert(self) -> Mask } } - #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask_array(mut bitmask: [u8; N]) -> Self where super::Mask: ToBitMaskArray, - [(); as ToBitMaskArray>::BYTES]: Sized, { - assert_eq!( as ToBitMaskArray>::BYTES, N); - - // Safety: N is the correct bitmask size + // Safety: Bytes is the right size array unsafe { // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { @@ -194,8 +183,8 @@ pub fn convert(self) -> Mask } } - // Transmute to the bitmask type, previously asserted to be the same size - let bitmask: [u8; as ToBitMaskArray>::BYTES] = + // Transmute to the bitmask + let bitmask: as ToBitMaskArray>::BitMaskArray = core::mem::transmute_copy(&bitmask); // Compute the regular mask diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 8e724c9de8c..7041d15164d 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -30,19 +30,18 @@ pub trait ToBitMask: Sealed { /// Converts masks to and from byte array bitmasks. /// /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. -#[cfg(feature = "generic_const_exprs")] pub trait ToBitMaskArray: Sealed { - /// The length of the bitmask array. - const BYTES: usize; + /// The bitmask array. + type BitMaskArray; /// Converts a mask to a bitmask. - fn to_bitmask_array(self) -> [u8; Self::BYTES]; + fn to_bitmask_array(self) -> Self::BitMaskArray; /// Converts a bitmask to a mask. - fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self; + fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self; } -macro_rules! impl_integer_intrinsic { +macro_rules! impl_integer { { $(impl ToBitMask for Mask<_, $lanes:literal>)* } => { $( impl ToBitMask for Mask { @@ -62,7 +61,27 @@ fn from_bitmask(bitmask: $int) -> Self { } } -impl_integer_intrinsic! { +macro_rules! impl_array { + { $(impl ToBitMaskArray for Mask<_, $lanes:literal>)* } => { + $( + impl ToBitMaskArray for Mask { + type BitMaskArray = [u8; $int]; + + #[inline] + fn to_bitmask_array(self) -> Self::BitMaskArray { + self.0.to_bitmask_array() + } + + #[inline] + fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self { + Self(mask_impl::Mask::from_bitmask_array(bitmask)) + } + } + )* + } +} + +impl_integer! { impl ToBitMask for Mask<_, 1> impl ToBitMask for Mask<_, 2> impl ToBitMask for Mask<_, 4> @@ -72,27 +91,12 @@ impl ToBitMask for Mask<_, 32> impl ToBitMask for Mask<_, 64> } -/// Returns the minimum number of bytes in a bitmask with `lanes` lanes. -#[cfg(feature = "generic_const_exprs")] -#[allow(clippy::missing_inline_in_public_items)] -pub const fn bitmask_len(lanes: usize) -> usize { - (lanes + 7) / 8 -} - -#[cfg(feature = "generic_const_exprs")] -impl ToBitMaskArray for Mask -where - LaneCount: SupportedLaneCount, -{ - const BYTES: usize = bitmask_len(LANES); - - #[inline] - fn to_bitmask_array(self) -> [u8; Self::BYTES] { - self.0.to_bitmask_array() - } - - #[inline] - fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self { - Mask(mask_impl::Mask::from_bitmask_array(bitmask)) - } +impl_array! { + impl ToBitMaskArray for Mask<_, 1> + impl ToBitMaskArray for Mask<_, 2> + impl ToBitMaskArray for Mask<_, 4> + impl ToBitMaskArray for Mask<_, 8> + impl ToBitMaskArray for Mask<_, 16> + impl ToBitMaskArray for Mask<_, 32> + impl ToBitMaskArray for Mask<_, 64> } diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index dd954b7cc48..f489ae36de4 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -3,9 +3,6 @@ pub(crate) mod intrinsics; -#[cfg(feature = "generic_const_exprs")] -mod to_bytes; - mod alias; mod cast; mod elements; @@ -18,6 +15,7 @@ mod ord; mod select; mod swizzle_dyn; +mod to_bytes; mod vector; mod vendor; @@ -37,5 +35,6 @@ pub mod simd { pub use crate::core_simd::ord::*; pub use crate::core_simd::swizzle::*; pub use crate::core_simd::swizzle_dyn::*; + pub use crate::core_simd::to_bytes::ToBytes; pub use crate::core_simd::vector::*; } diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index 5f1374fd5a5..5fe4a77d50d 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -1,73 +1,127 @@ -use crate::simd::SimdUint; +use crate::simd::{LaneCount, Simd, SimdElement, SimdFloat, SimdInt, SimdUint, SupportedLaneCount}; + +mod sealed { + use super::*; + pub trait Sealed {} + impl Sealed for Simd where LaneCount: SupportedLaneCount {} +} +use sealed::Sealed; + +/// Convert SIMD vectors to vectors of bytes +pub trait ToBytes: Sealed { + /// This type, reinterpreted as bytes. + type Bytes; + + /// Return the memory representation of this integer as a byte array in native byte + /// order. + fn to_ne_bytes(self) -> Self::Bytes; + + /// Return the memory representation of this integer as a byte array in big-endian + /// (network) byte order. + fn to_be_bytes(self) -> Self::Bytes; + + /// Return the memory representation of this integer as a byte array in little-endian + /// byte order. + fn to_le_bytes(self) -> Self::Bytes; + + /// Create a native endian integer value from its memory representation as a byte array + /// in native endianness. + fn from_ne_bytes(bytes: Self::Bytes) -> Self; + + /// Create an integer value from its representation as a byte array in big endian. + fn from_be_bytes(bytes: Self::Bytes) -> Self; + + /// Create an integer value from its representation as a byte array in little endian. + fn from_le_bytes(bytes: Self::Bytes) -> Self; +} + +macro_rules! swap_bytes { + { f32, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) }; + { f64, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) }; + { $ty:ty, $x:expr } => { $x.swap_bytes() } +} macro_rules! impl_to_bytes { - { $ty:ty, $size:literal } => { - impl crate::simd::Simd<$ty, LANES> - where - crate::simd::LaneCount: crate::simd::SupportedLaneCount, - crate::simd::LaneCount<{{ $size * LANES }}>: crate::simd::SupportedLaneCount, - { - /// Return the memory representation of this integer as a byte array in native byte - /// order. + { $ty:tt, $size:tt } => { + impl_to_bytes! { $ty, $size * 1 } + impl_to_bytes! { $ty, $size * 2 } + impl_to_bytes! { $ty, $size * 4 } + impl_to_bytes! { $ty, $size * 8 } + impl_to_bytes! { $ty, $size * 16 } + impl_to_bytes! { $ty, $size * 32 } + impl_to_bytes! { $ty, $size * 64 } + }; + + // multiply element size by number of elements + { $ty:tt, 1 * $elems:literal } => { impl_to_bytes! { @impl [$ty; $elems], $elems } }; + { $ty:tt, $size:literal * 1 } => { impl_to_bytes! { @impl [$ty; 1], $size } }; + { $ty:tt, 2 * 2 } => { impl_to_bytes! { @impl [$ty; 2], 4 } }; + { $ty:tt, 2 * 4 } => { impl_to_bytes! { @impl [$ty; 4], 8 } }; + { $ty:tt, 2 * 8 } => { impl_to_bytes! { @impl [$ty; 8], 16 } }; + { $ty:tt, 2 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 32 } }; + { $ty:tt, 2 * 32 } => { impl_to_bytes! { @impl [$ty; 32], 64 } }; + { $ty:tt, 4 * 2 } => { impl_to_bytes! { @impl [$ty; 2], 8 } }; + { $ty:tt, 4 * 4 } => { impl_to_bytes! { @impl [$ty; 4], 16 } }; + { $ty:tt, 4 * 8 } => { impl_to_bytes! { @impl [$ty; 8], 32 } }; + { $ty:tt, 4 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 64 } }; + { $ty:tt, 8 * 2 } => { impl_to_bytes! { @impl [$ty; 2], 16 } }; + { $ty:tt, 8 * 4 } => { impl_to_bytes! { @impl [$ty; 4], 32 } }; + { $ty:tt, 8 * 8 } => { impl_to_bytes! { @impl [$ty; 8], 64 } }; + + // unsupported number of lanes + { $ty:ty, $a:literal * $b:literal } => { }; + + { @impl [$ty:tt; $elem:literal], $bytes:literal } => { + impl ToBytes for Simd<$ty, $elem> { + type Bytes = Simd; + #[inline] - pub fn to_ne_bytes(self) -> crate::simd::Simd { + fn to_ne_bytes(self) -> Self::Bytes { // Safety: transmuting between vectors is safe - unsafe { core::mem::transmute_copy(&self) } + unsafe { core::mem::transmute(self) } } - /// Return the memory representation of this integer as a byte array in big-endian - /// (network) byte order. #[inline] - pub fn to_be_bytes(self) -> crate::simd::Simd { - let bytes = self.to_ne_bytes(); + fn to_be_bytes(mut self) -> Self::Bytes { + if !cfg!(target_endian = "big") { + self = swap_bytes!($ty, self); + } + self.to_ne_bytes() + } + + #[inline] + fn to_le_bytes(mut self) -> Self::Bytes { + if !cfg!(target_endian = "little") { + self = swap_bytes!($ty, self); + } + self.to_ne_bytes() + } + + #[inline] + fn from_ne_bytes(bytes: Self::Bytes) -> Self { + // Safety: transmuting between vectors is safe + unsafe { core::mem::transmute(bytes) } + } + + #[inline] + fn from_be_bytes(bytes: Self::Bytes) -> Self { + let ret = Self::from_ne_bytes(bytes); if cfg!(target_endian = "big") { - bytes + ret } else { - bytes.swap_bytes() + swap_bytes!($ty, ret) } } - /// Return the memory representation of this integer as a byte array in little-endian - /// byte order. #[inline] - pub fn to_le_bytes(self) -> crate::simd::Simd { - let bytes = self.to_ne_bytes(); + fn from_le_bytes(bytes: Self::Bytes) -> Self { + let ret = Self::from_ne_bytes(bytes); if cfg!(target_endian = "little") { - bytes + ret } else { - bytes.swap_bytes() + swap_bytes!($ty, ret) } } - - /// Create a native endian integer value from its memory representation as a byte array - /// in native endianness. - #[inline] - pub fn from_ne_bytes(bytes: crate::simd::Simd) -> Self { - // Safety: transmuting between vectors is safe - unsafe { core::mem::transmute_copy(&bytes) } - } - - /// Create an integer value from its representation as a byte array in big endian. - #[inline] - pub fn from_be_bytes(bytes: crate::simd::Simd) -> Self { - let bytes = if cfg!(target_endian = "big") { - bytes - } else { - bytes.swap_bytes() - }; - Self::from_ne_bytes(bytes) - } - - /// Create an integer value from its representation as a byte array in little endian. - #[inline] - pub fn from_le_bytes(bytes: crate::simd::Simd) -> Self { - let bytes = if cfg!(target_endian = "little") { - bytes - } else { - bytes.swap_bytes() - }; - Self::from_ne_bytes(bytes) - } } } } @@ -89,3 +143,6 @@ pub fn from_le_bytes(bytes: crate::simd::Simd) -> Self impl_to_bytes! { isize, 4 } #[cfg(target_pointer_width = "64")] impl_to_bytes! { isize, 8 } + +impl_to_bytes! { f32, 4 } +impl_to_bytes! { f64, 8 } diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 9f8bad1c36c..7c1d4c7dd3f 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -125,7 +125,6 @@ fn cast_impl() cast_impl::(); } - #[cfg(feature = "generic_const_exprs")] #[test] fn roundtrip_bitmask_array_conversion() { use core_simd::simd::ToBitMaskArray; diff --git a/crates/core_simd/tests/to_bytes.rs b/crates/core_simd/tests/to_bytes.rs index 7dd740d65dd..66a7981cdc3 100644 --- a/crates/core_simd/tests/to_bytes.rs +++ b/crates/core_simd/tests/to_bytes.rs @@ -1,8 +1,6 @@ -#![feature(portable_simd, generic_const_exprs, adt_const_params)] -#![allow(incomplete_features)] -#![cfg(feature = "generic_const_exprs")] +#![feature(portable_simd)] -use core_simd::simd::Simd; +use core_simd::simd::{Simd, ToBytes}; #[test] fn byte_convert() { From b070f0f657bbe4e8a24c3731fe6a230fda64cdd0 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 1 Oct 2023 23:34:48 -0400 Subject: [PATCH 38/59] Fix cargo features in CI and enable them for testing --- .github/workflows/ci.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed1589be4f1..90543044ea8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -238,7 +238,7 @@ jobs: run: cross test --verbose --target=${{ matrix.target }} --release features: - name: "Check cargo features (${{ matrix.simd }} × ${{ matrix.features }})" + name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})" runs-on: ubuntu-latest strategy: fail-fast: false @@ -249,12 +249,8 @@ jobs: features: - "" - "--features std" - - "--features generic_const_exprs" - - "--features std --features generic_const_exprs" - "--features all_lane_counts" - - "--features all_lane_counts --features std" - - "--features all_lane_counts --features generic_const_exprs" - - "--features all_lane_counts --features std --features generic_const_exprs" + - "--all-features" steps: - uses: actions/checkout@v2 @@ -266,9 +262,9 @@ jobs: run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV - name: Check build if: ${{ matrix.simd == '' }} - run: RUSTFLAGS="-Dwarnings" cargo check --all-targets --no-default-features ${{ matrix.features }} + run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }} - name: Check AVX if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }} run: | echo "Found AVX features: $CPU_FEATURE" - RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo check --all-targets --no-default-features ${{ matrix.features }} + RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }} From b411cb401d97d128f87c47f3f58f615fa041d879 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 2 Oct 2023 16:15:37 -0400 Subject: [PATCH 39/59] Simplify macro --- crates/core_simd/src/to_bytes.rs | 43 +++++++++----------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index 5fe4a77d50d..07a3efea01a 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -42,38 +42,18 @@ macro_rules! swap_bytes { } macro_rules! impl_to_bytes { - { $ty:tt, $size:tt } => { - impl_to_bytes! { $ty, $size * 1 } - impl_to_bytes! { $ty, $size * 2 } - impl_to_bytes! { $ty, $size * 4 } - impl_to_bytes! { $ty, $size * 8 } - impl_to_bytes! { $ty, $size * 16 } - impl_to_bytes! { $ty, $size * 32 } - impl_to_bytes! { $ty, $size * 64 } - }; + { $ty:tt, 1 } => { impl_to_bytes! { $ty, 1 * [1, 2, 4, 8, 16, 32, 64] } }; + { $ty:tt, 2 } => { impl_to_bytes! { $ty, 2 * [1, 2, 4, 8, 16, 32] } }; + { $ty:tt, 4 } => { impl_to_bytes! { $ty, 4 * [1, 2, 4, 8, 16] } }; + { $ty:tt, 8 } => { impl_to_bytes! { $ty, 8 * [1, 2, 4, 8] } }; + { $ty:tt, 16 } => { impl_to_bytes! { $ty, 16 * [1, 2, 4] } }; + { $ty:tt, 32 } => { impl_to_bytes! { $ty, 32 * [1, 2] } }; + { $ty:tt, 64 } => { impl_to_bytes! { $ty, 64 * [1] } }; - // multiply element size by number of elements - { $ty:tt, 1 * $elems:literal } => { impl_to_bytes! { @impl [$ty; $elems], $elems } }; - { $ty:tt, $size:literal * 1 } => { impl_to_bytes! { @impl [$ty; 1], $size } }; - { $ty:tt, 2 * 2 } => { impl_to_bytes! { @impl [$ty; 2], 4 } }; - { $ty:tt, 2 * 4 } => { impl_to_bytes! { @impl [$ty; 4], 8 } }; - { $ty:tt, 2 * 8 } => { impl_to_bytes! { @impl [$ty; 8], 16 } }; - { $ty:tt, 2 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 32 } }; - { $ty:tt, 2 * 32 } => { impl_to_bytes! { @impl [$ty; 32], 64 } }; - { $ty:tt, 4 * 2 } => { impl_to_bytes! { @impl [$ty; 2], 8 } }; - { $ty:tt, 4 * 4 } => { impl_to_bytes! { @impl [$ty; 4], 16 } }; - { $ty:tt, 4 * 8 } => { impl_to_bytes! { @impl [$ty; 8], 32 } }; - { $ty:tt, 4 * 16 } => { impl_to_bytes! { @impl [$ty; 16], 64 } }; - { $ty:tt, 8 * 2 } => { impl_to_bytes! { @impl [$ty; 2], 16 } }; - { $ty:tt, 8 * 4 } => { impl_to_bytes! { @impl [$ty; 4], 32 } }; - { $ty:tt, 8 * 8 } => { impl_to_bytes! { @impl [$ty; 8], 64 } }; - - // unsupported number of lanes - { $ty:ty, $a:literal * $b:literal } => { }; - - { @impl [$ty:tt; $elem:literal], $bytes:literal } => { - impl ToBytes for Simd<$ty, $elem> { - type Bytes = Simd; + { $ty:tt, $size:literal * [$($elems:literal),*] } => { + $( + impl ToBytes for Simd<$ty, $elems> { + type Bytes = Simd; #[inline] fn to_ne_bytes(self) -> Self::Bytes { @@ -123,6 +103,7 @@ fn from_le_bytes(bytes: Self::Bytes) -> Self { } } } + )* } } From afe28b13e73da85c3b8e711e24dc709829a142b9 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 2 Oct 2023 16:27:18 -0400 Subject: [PATCH 40/59] Add various bounds --- crates/core_simd/src/masks/to_bitmask.rs | 11 ++++++++++- crates/core_simd/src/to_bytes.rs | 9 ++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 7041d15164d..12cb1771ce1 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -1,5 +1,6 @@ use super::{mask_impl, Mask, MaskElement}; use crate::simd::{LaneCount, SupportedLaneCount}; +use core::borrow::{Borrow, BorrowMut}; mod sealed { pub trait Sealed {} @@ -32,7 +33,15 @@ pub trait ToBitMask: Sealed { /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. pub trait ToBitMaskArray: Sealed { /// The bitmask array. - type BitMaskArray; + type BitMaskArray: Copy + + Unpin + + Send + + Sync + + AsRef<[u8]> + + AsMut<[u8]> + + Borrow<[u8]> + + BorrowMut<[u8]> + + 'static; /// Converts a mask to a bitmask. fn to_bitmask_array(self) -> Self::BitMaskArray; diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index 07a3efea01a..3c93fe47404 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -10,7 +10,14 @@ impl Sealed for Simd where LaneCount: S /// Convert SIMD vectors to vectors of bytes pub trait ToBytes: Sealed { /// This type, reinterpreted as bytes. - type Bytes; + type Bytes: Copy + + Unpin + + Send + + Sync + + AsRef<[u8]> + + AsMut<[u8]> + + SimdUint + + 'static; /// Return the memory representation of this integer as a byte array in native byte /// order. From 6a3c45eea827681ef1e8895f7714226ead61037e Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 14 Oct 2023 14:00:45 -0700 Subject: [PATCH 41/59] Eliminate use of #[cfg_attr(not(doc), repr(transparent))] --- crates/core_simd/src/masks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index fea687bdc1a..e04448a50be 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -88,7 +88,7 @@ unsafe impl MaskElement for $ty {} /// The layout of this type is unspecified, and may change between platforms /// and/or Rust versions, and code should not assume that it is equivalent to /// `[T; LANES]`. -#[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435 +#[repr(transparent)] pub struct Mask(mask_impl::Mask) where T: MaskElement, From 596aabe5c7dd00a037a8aa5fd41b929010ebb7ae Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 23 Sep 2023 00:19:36 -0400 Subject: [PATCH 42/59] Add num, ptr, and cmp modules --- crates/core_simd/examples/dot_product.rs | 2 +- crates/core_simd/examples/matrix_inversion.rs | 6 ++++-- crates/core_simd/examples/nbody.rs | 2 +- crates/core_simd/examples/spectral_norm.rs | 2 +- crates/core_simd/src/core_simd_docs.md | 2 +- crates/core_simd/src/masks.rs | 4 +++- crates/core_simd/src/mod.rs | 12 +++++------ crates/core_simd/src/ops.rs | 2 +- crates/core_simd/src/simd/cmp.rs | 7 +++++++ crates/core_simd/src/{ => simd/cmp}/eq.rs | 4 +++- crates/core_simd/src/{ => simd/cmp}/ord.rs | 5 ++++- .../src/{elements.rs => simd/num.rs} | 6 ++---- .../src/{elements => simd/num}/float.rs | 14 ++++++------- .../src/{elements => simd/num}/int.rs | 20 +++++++++---------- .../src/{elements => simd/num}/uint.rs | 6 +++--- crates/core_simd/src/simd/prelude.rs | 6 ++++-- crates/core_simd/src/simd/ptr.rs | 11 ++++++++++ .../src/{elements => simd/ptr}/const_ptr.rs | 4 +++- .../src/{elements => simd/ptr}/mut_ptr.rs | 4 +++- crates/core_simd/src/swizzle_dyn.rs | 4 ++-- crates/core_simd/src/to_bytes.rs | 5 ++++- crates/core_simd/src/vector.rs | 18 +++++++++-------- crates/core_simd/tests/cast.rs | 2 +- crates/core_simd/tests/ops_macros.rs | 12 +++++------ crates/core_simd/tests/pointers.rs | 5 ++++- crates/core_simd/tests/round.rs | 2 +- crates/std_float/src/lib.rs | 2 +- 27 files changed, 104 insertions(+), 65 deletions(-) create mode 100644 crates/core_simd/src/simd/cmp.rs rename crates/core_simd/src/{ => simd/cmp}/eq.rs (96%) rename crates/core_simd/src/{ => simd/cmp}/ord.rs (98%) rename crates/core_simd/src/{elements.rs => simd/num.rs} (63%) rename crates/core_simd/src/{elements => simd/num}/float.rs (98%) rename crates/core_simd/src/{elements => simd/num}/int.rs (96%) rename crates/core_simd/src/{elements => simd/num}/uint.rs (98%) create mode 100644 crates/core_simd/src/simd/ptr.rs rename crates/core_simd/src/{elements => simd/ptr}/const_ptr.rs (97%) rename crates/core_simd/src/{elements => simd/ptr}/mut_ptr.rs (97%) diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 391f08f55a0..e5815888bb7 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -6,7 +6,7 @@ #![feature(slice_as_chunks)] // Add these imports to use the stdsimd library #![feature(portable_simd)] -use core_simd::simd::*; +use core_simd::simd::prelude::*; // This is your barebones dot product implementation: // Take 2 vectors, multiply them element wise and *then* diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index 39f530f68f5..5176623c160 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -2,8 +2,10 @@ // Code ported from the `packed_simd` crate // Run this code with `cargo test --example matrix_inversion` #![feature(array_chunks, portable_simd)] -use core_simd::simd::*; -use Which::*; +use core_simd::simd::{ + prelude::*, + Which::{self, *}, +}; // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) #[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index df38a00967f..154e24c460e 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -5,7 +5,7 @@ /// Taken from the `packed_simd` crate /// Run this benchmark with `cargo test --example nbody` mod nbody { - use core_simd::simd::*; + use core_simd::simd::prelude::*; #[allow(unused)] // False positive? use std_float::StdFloat; diff --git a/crates/core_simd/examples/spectral_norm.rs b/crates/core_simd/examples/spectral_norm.rs index d576bd0ccee..bc7934c2522 100644 --- a/crates/core_simd/examples/spectral_norm.rs +++ b/crates/core_simd/examples/spectral_norm.rs @@ -1,6 +1,6 @@ #![feature(portable_simd)] -use core_simd::simd::*; +use core_simd::simd::prelude::*; fn a(i: usize, j: usize) -> f64 { ((i + j) * (i + j + 1) / 2 + i + 1) as f64 diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md index 8acdeb04427..fa93155ff5e 100644 --- a/crates/core_simd/src/core_simd_docs.md +++ b/crates/core_simd/src/core_simd_docs.md @@ -30,7 +30,7 @@ Instead, they map to a reasonable implementation of the operation for the target Consistency between targets is not compromised to use faster or fewer instructions. In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent. -For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`], but does not conform to the IEEE standard also used by [`f32::min`]. +For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`]. When necessary, [`Simd`] can be converted to the types provided by `std::arch` to make use of target-specific functions. Many targets simply don't have SIMD, or don't support SIMD for a particular element type. diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 13ae5088fb9..0a04cf66757 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -15,7 +15,9 @@ mod to_bitmask; pub use to_bitmask::{ToBitMask, ToBitMaskArray}; -use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{ + cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount, +}; use core::cmp::Ordering; use core::{fmt, mem}; diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index f489ae36de4..6fd458d24e7 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -5,14 +5,11 @@ mod alias; mod cast; -mod elements; -mod eq; mod fmt; mod iter; mod lane_count; mod masks; mod ops; -mod ord; mod select; mod swizzle_dyn; mod to_bytes; @@ -24,15 +21,18 @@ pub mod simd { pub mod prelude; + pub mod num; + + pub mod ptr; + + pub mod cmp; + pub(crate) use crate::core_simd::intrinsics; pub use crate::core_simd::alias::*; pub use crate::core_simd::cast::*; - pub use crate::core_simd::elements::*; - pub use crate::core_simd::eq::*; pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; - pub use crate::core_simd::ord::*; pub use crate::core_simd::swizzle::*; pub use crate::core_simd::swizzle_dyn::*; pub use crate::core_simd::to_bytes::ToBytes; diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 63a96106283..d1b4a504884 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::ops::{Add, Mul}; use core::ops::{BitAnd, BitOr, BitXor}; use core::ops::{Div, Rem, Sub}; diff --git a/crates/core_simd/src/simd/cmp.rs b/crates/core_simd/src/simd/cmp.rs new file mode 100644 index 00000000000..a8d81dbf20f --- /dev/null +++ b/crates/core_simd/src/simd/cmp.rs @@ -0,0 +1,7 @@ +//! Traits for comparing and ordering vectors. + +mod eq; +mod ord; + +pub use eq::*; +pub use ord::*; diff --git a/crates/core_simd/src/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs similarity index 96% rename from crates/core_simd/src/eq.rs rename to crates/core_simd/src/simd/cmp/eq.rs index 80763c07272..627ceba3c6f 100644 --- a/crates/core_simd/src/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -1,5 +1,7 @@ use crate::simd::{ - intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdElement, SimdMutPtr, SupportedLaneCount, + intrinsics, + ptr::{SimdConstPtr, SimdMutPtr}, + LaneCount, Mask, Simd, SimdElement, SupportedLaneCount, }; /// Parallel `PartialEq`. diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs similarity index 98% rename from crates/core_simd/src/ord.rs rename to crates/core_simd/src/simd/cmp/ord.rs index b2455190e82..509f907785c 100644 --- a/crates/core_simd/src/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -1,5 +1,8 @@ use crate::simd::{ - intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdMutPtr, SimdPartialEq, SupportedLaneCount, + cmp::SimdPartialEq, + intrinsics, + ptr::{SimdConstPtr, SimdMutPtr}, + LaneCount, Mask, Simd, SupportedLaneCount, }; /// Parallel `PartialOrd`. diff --git a/crates/core_simd/src/elements.rs b/crates/core_simd/src/simd/num.rs similarity index 63% rename from crates/core_simd/src/elements.rs rename to crates/core_simd/src/simd/num.rs index dc7f52a4d57..22a4802ec6c 100644 --- a/crates/core_simd/src/elements.rs +++ b/crates/core_simd/src/simd/num.rs @@ -1,15 +1,13 @@ -mod const_ptr; +//! Traits for vectors with numeric elements. + mod float; mod int; -mod mut_ptr; mod uint; mod sealed { pub trait Sealed {} } -pub use const_ptr::*; pub use float::*; pub use int::*; -pub use mut_ptr::*; pub use uint::*; diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/simd/num/float.rs similarity index 98% rename from crates/core_simd/src/elements/float.rs rename to crates/core_simd/src/simd/num/float.rs index d700011ff9c..affc01d111f 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -1,7 +1,7 @@ use super::sealed::Sealed; use crate::simd::{ - intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialEq, SimdPartialOrd, - SupportedLaneCount, + cmp::{SimdPartialEq, SimdPartialOrd}, + intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, }; /// Operations on SIMD vectors of floats. @@ -28,7 +28,7 @@ pub trait SimdFloat: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{SimdFloat, SimdInt, Simd}; + /// # use simd::prelude::*; /// let floats: Simd = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]); /// let ints = floats.cast::(); /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0])); @@ -162,7 +162,7 @@ unsafe fn to_int_unchecked(self) -> Self::Cast /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{f32x2, SimdFloat}; + /// # use simd::prelude::*; /// let v = f32x2::from_array([1., 2.]); /// assert_eq!(v.reduce_sum(), 3.); /// ``` @@ -176,7 +176,7 @@ unsafe fn to_int_unchecked(self) -> Self::Cast /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{f32x2, SimdFloat}; + /// # use simd::prelude::*; /// let v = f32x2::from_array([3., 4.]); /// assert_eq!(v.reduce_product(), 12.); /// ``` @@ -195,7 +195,7 @@ unsafe fn to_int_unchecked(self) -> Self::Cast /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{f32x2, SimdFloat}; + /// # use simd::prelude::*; /// let v = f32x2::from_array([1., 2.]); /// assert_eq!(v.reduce_max(), 2.); /// @@ -222,7 +222,7 @@ unsafe fn to_int_unchecked(self) -> Self::Cast /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{f32x2, SimdFloat}; + /// # use simd::prelude::*; /// let v = f32x2::from_array([3., 7.]); /// assert_eq!(v.reduce_min(), 3.); /// diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/simd/num/int.rs similarity index 96% rename from crates/core_simd/src/elements/int.rs rename to crates/core_simd/src/simd/num/int.rs index c341c59545c..d1f8e856a53 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/simd/num/int.rs @@ -1,6 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SimdUint, + cmp::SimdPartialOrd, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, }; @@ -32,7 +32,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdInt}; + /// # use simd::prelude::*; /// use core::i32::{MIN, MAX}; /// let x = Simd::from_array([MIN, 0, 1, MAX]); /// let max = Simd::splat(MAX); @@ -50,7 +50,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdInt}; + /// # use simd::prelude::*; /// use core::i32::{MIN, MAX}; /// let x = Simd::from_array([MIN, -2, -1, MAX]); /// let max = Simd::splat(MAX); @@ -68,7 +68,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdInt}; + /// # use simd::prelude::*; /// use core::i32::{MIN, MAX}; /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]); /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0])); @@ -83,7 +83,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdInt}; + /// # use simd::prelude::*; /// use core::i32::{MIN, MAX}; /// let xs = Simd::from_array([MIN, -2, 0, 3]); /// let unsat = xs.abs(); @@ -101,7 +101,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdInt}; + /// # use simd::prelude::*; /// use core::i32::{MIN, MAX}; /// let x = Simd::from_array([MIN, -2, 3, MAX]); /// let unsat = -x; @@ -131,7 +131,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{i32x4, SimdInt}; + /// # use simd::prelude::*; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_sum(), 10); /// @@ -149,7 +149,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{i32x4, SimdInt}; + /// # use simd::prelude::*; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_product(), 24); /// @@ -167,7 +167,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{i32x4, SimdInt}; + /// # use simd::prelude::*; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_max(), 4); /// ``` @@ -181,7 +181,7 @@ pub trait SimdInt: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{i32x4, SimdInt}; + /// # use simd::prelude::*; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_min(), 1); /// ``` diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/simd/num/uint.rs similarity index 98% rename from crates/core_simd/src/elements/uint.rs rename to crates/core_simd/src/simd/num/uint.rs index c33059f7d4e..7eadd2050b9 100644 --- a/crates/core_simd/src/elements/uint.rs +++ b/crates/core_simd/src/simd/num/uint.rs @@ -29,7 +29,7 @@ pub trait SimdUint: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdUint}; + /// # use simd::prelude::*; /// use core::u32::MAX; /// let x = Simd::from_array([2, 1, 0, MAX]); /// let max = Simd::splat(MAX); @@ -47,7 +47,7 @@ pub trait SimdUint: Copy + Sealed { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdUint}; + /// # use simd::prelude::*; /// use core::u32::MAX; /// let x = Simd::from_array([2, 1, 0, MAX]); /// let max = Simd::splat(MAX); @@ -122,7 +122,7 @@ fn cast(self) -> Self::Cast { #[inline] fn wrapping_neg(self) -> Self { - use crate::simd::SimdInt; + use crate::simd::num::SimdInt; (-self.cast::<$signed>()).cast() } diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs index e8fdc932d49..4b7c744c013 100644 --- a/crates/core_simd/src/simd/prelude.rs +++ b/crates/core_simd/src/simd/prelude.rs @@ -7,8 +7,10 @@ #[doc(no_inline)] pub use super::{ - simd_swizzle, Mask, Simd, SimdConstPtr, SimdFloat, SimdInt, SimdMutPtr, SimdOrd, SimdPartialEq, - SimdPartialOrd, SimdUint, + cmp::{SimdOrd, SimdPartialEq, SimdPartialOrd}, + num::{SimdFloat, SimdInt, SimdUint}, + ptr::{SimdConstPtr, SimdMutPtr}, + simd_swizzle, Mask, Simd, }; #[rustfmt::skip] diff --git a/crates/core_simd/src/simd/ptr.rs b/crates/core_simd/src/simd/ptr.rs new file mode 100644 index 00000000000..3f8e6669118 --- /dev/null +++ b/crates/core_simd/src/simd/ptr.rs @@ -0,0 +1,11 @@ +//! Traits for vectors of pointers. + +mod const_ptr; +mod mut_ptr; + +mod sealed { + pub trait Sealed {} +} + +pub use const_ptr::*; +pub use mut_ptr::*; diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs similarity index 97% rename from crates/core_simd/src/elements/const_ptr.rs rename to crates/core_simd/src/simd/ptr/const_ptr.rs index f215f9a61d0..f82def1d377 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -1,5 +1,7 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount}; +use crate::simd::{ + cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount, +}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs similarity index 97% rename from crates/core_simd/src/elements/mut_ptr.rs rename to crates/core_simd/src/simd/ptr/mut_ptr.rs index 4bdc6a14ce4..283054dc8ce 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -1,5 +1,7 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount}; +use crate::simd::{ + cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount, +}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index ce621792534..bd8a38e350d 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -86,7 +86,7 @@ pub fn swizzle_dyn(self, idxs: Simd) -> Self { #[inline] #[allow(clippy::let_and_return)] unsafe fn avx2_pshufb(bytes: Simd, idxs: Simd) -> Simd { - use crate::simd::SimdPartialOrd; + use crate::simd::cmp::SimdPartialOrd; #[cfg(target_arch = "x86")] use core::arch::x86; #[cfg(target_arch = "x86_64")] @@ -149,7 +149,7 @@ fn zeroing_idxs(idxs: Simd) -> Simd // On x86, make sure the top bit is set. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] let idxs = { - use crate::simd::SimdPartialOrd; + use crate::simd::cmp::SimdPartialOrd; idxs.simd_lt(Simd::splat(N as u8)) .select(idxs, Simd::splat(u8::MAX)) }; diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index 3c93fe47404..dd01929551c 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -1,4 +1,7 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SimdFloat, SimdInt, SimdUint, SupportedLaneCount}; +use crate::simd::{ + num::{SimdFloat, SimdInt, SimdUint}, + LaneCount, Simd, SimdElement, SupportedLaneCount, +}; mod sealed { use super::*; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 9aa7bacfce9..70188337444 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,6 +1,8 @@ use crate::simd::{ - intrinsics, LaneCount, Mask, MaskElement, SimdConstPtr, SimdMutPtr, SimdPartialOrd, - SupportedLaneCount, Swizzle, + cmp::SimdPartialOrd, + intrinsics, + ptr::{SimdConstPtr, SimdMutPtr}, + LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, }; use core::convert::{TryFrom, TryInto}; @@ -394,7 +396,7 @@ pub fn gather_select( /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdPartialOrd, Mask}; + /// # use simd::{Simd, cmp::SimdPartialOrd, Mask}; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index /// let alt = Simd::from_array([-5, -4, -3, -2]); @@ -434,7 +436,7 @@ pub unsafe fn gather_select_unchecked( /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdConstPtr}; + /// # use simd::prelude::*; /// let values = [6, 2, 4, 9]; /// let offsets = Simd::from_array([1, 0, 0, 3]); /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets); @@ -467,7 +469,7 @@ pub unsafe fn gather_ptr(source: Simd<*const T, N>) -> Self /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Mask, Simd, SimdConstPtr}; + /// # use simd::prelude::*; /// let values = [6, 2, 4, 9]; /// let enable = Mask::from_array([true, true, false, true]); /// let offsets = Simd::from_array([1, 0, 0, 3]); @@ -550,7 +552,7 @@ pub fn scatter_select(self, slice: &mut [T], enable: Mask, idxs: Simd< /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdPartialOrd, Mask}; + /// # use simd::{Simd, cmp::SimdPartialOrd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); @@ -604,7 +606,7 @@ pub unsafe fn scatter_select_unchecked( /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Simd, SimdMutPtr}; + /// # use simd::{Simd, ptr::SimdMutPtr}; /// let mut values = [0; 4]; /// let offset = Simd::from_array([3, 2, 1, 0]); /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); @@ -631,7 +633,7 @@ pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, N>) { /// # #![feature(portable_simd)] /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{Mask, Simd, SimdMutPtr}; + /// # use simd::{Mask, Simd, ptr::SimdMutPtr}; /// let mut values = [0; 4]; /// let offset = Simd::from_array([3, 2, 1, 0]); /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); diff --git a/crates/core_simd/tests/cast.rs b/crates/core_simd/tests/cast.rs index 00545936ea2..185e1945faa 100644 --- a/crates/core_simd/tests/cast.rs +++ b/crates/core_simd/tests/cast.rs @@ -3,7 +3,7 @@ macro_rules! cast_types { ($start:ident, $($target:ident),*) => { mod $start { #[allow(unused)] - use core_simd::simd::{Simd, SimdInt, SimdUint, SimdFloat}; + use core_simd::simd::prelude::*; type Vector = Simd<$start, N>; $( mod $target { diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 40aba2fd6cb..50faba04991 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -254,7 +254,7 @@ fn trailing_ones() { macro_rules! impl_signed_tests { { $scalar:tt } => { mod $scalar { - use core_simd::simd::SimdInt; + use core_simd::simd::num::SimdInt; type Vector = core_simd::simd::Simd; type Scalar = $scalar; @@ -306,7 +306,7 @@ fn rem_min_may_overflow() { } fn simd_min() { - use core_simd::simd::SimdOrd; + use core_simd::simd::cmp::SimdOrd; let a = Vector::::splat(Scalar::MIN); let b = Vector::::splat(0); assert_eq!(a.simd_min(b), a); @@ -316,7 +316,7 @@ fn simd_min() { } fn simd_max() { - use core_simd::simd::SimdOrd; + use core_simd::simd::cmp::SimdOrd; let a = Vector::::splat(Scalar::MIN); let b = Vector::::splat(0); assert_eq!(a.simd_max(b), b); @@ -326,7 +326,7 @@ fn simd_max() { } fn simd_clamp() { - use core_simd::simd::SimdOrd; + use core_simd::simd::cmp::SimdOrd; let min = Vector::::splat(Scalar::MIN); let max = Vector::::splat(Scalar::MAX); let zero = Vector::::splat(0); @@ -395,7 +395,7 @@ fn rem_neg_one_no_panic() { macro_rules! impl_unsigned_tests { { $scalar:tt } => { mod $scalar { - use core_simd::simd::SimdUint; + use core_simd::simd::num::SimdUint; type Vector = core_simd::simd::Simd; type Scalar = $scalar; @@ -440,7 +440,7 @@ fn wrapping_neg() { macro_rules! impl_float_tests { { $scalar:tt, $int_scalar:tt } => { mod $scalar { - use core_simd::simd::SimdFloat; + use core_simd::simd::num::SimdFloat; type Vector = core_simd::simd::Simd; type Scalar = $scalar; diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs index 0ae8f83b8b9..a90ff928ced 100644 --- a/crates/core_simd/tests/pointers.rs +++ b/crates/core_simd/tests/pointers.rs @@ -1,6 +1,9 @@ #![feature(portable_simd, strict_provenance)] -use core_simd::simd::{Simd, SimdConstPtr, SimdMutPtr}; +use core_simd::simd::{ + ptr::{SimdConstPtr, SimdMutPtr}, + Simd, +}; macro_rules! common_tests { { $constness:ident } => { diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 191c39e2370..847766ec41e 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -53,7 +53,7 @@ fn fract() { test_helpers::test_lanes! { fn to_int_unchecked() { - use core_simd::simd::SimdFloat; + use core_simd::simd::num::SimdFloat; // The maximum integer that can be represented by the equivalently sized float has // all of the mantissa digits set to 1, pushed up to the MSB. const ALL_MANTISSA_BITS: IntScalar = ((1 << ::MANTISSA_DIGITS) - 1); diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index bb2b5a2dbba..1fef17242ca 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -149,7 +149,7 @@ fn fract(self) -> Self { #[cfg(test)] mod tests { use super::*; - use simd::*; + use simd::prelude::*; #[test] fn everything_works() { From 4fc3ce733d647deb2c537856eb142c6208e2b9f1 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 1 Oct 2023 12:31:39 -0400 Subject: [PATCH 43/59] Simplify Swizzle trait and condense all swizzles into this trait --- crates/core_simd/examples/matrix_inversion.rs | 13 +- crates/core_simd/src/lib.rs | 1 + crates/core_simd/src/swizzle.rs | 349 ++++++++---------- crates/core_simd/src/vector.rs | 4 +- crates/core_simd/tests/swizzle.rs | 4 +- 5 files changed, 174 insertions(+), 197 deletions(-) diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index 5176623c160..faf4a44467d 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -2,10 +2,7 @@ // Code ported from the `packed_simd` crate // Run this code with `cargo test --example matrix_inversion` #![feature(array_chunks, portable_simd)] -use core_simd::simd::{ - prelude::*, - Which::{self, *}, -}; +use core_simd::simd::prelude::*; // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) #[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] @@ -166,10 +163,10 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option { let m_2 = f32x4::from_array(m[2]); let m_3 = f32x4::from_array(m[3]); - const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)]; - const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)]; - const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)]; - const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)]; + const SHUFFLE01: [usize; 4] = [0, 1, 4, 5]; + const SHUFFLE02: [usize; 4] = [0, 2, 4, 6]; + const SHUFFLE13: [usize; 4] = [1, 3, 5, 7]; + const SHUFFLE23: [usize; 4] = [2, 3, 6, 7]; let tmp = simd_swizzle!(m_0, m_1, SHUFFLE01); let row1 = simd_swizzle!(m_2, m_3, SHUFFLE01); diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index dd3c546e014..64ba9705ef5 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -5,6 +5,7 @@ const_mut_refs, convert_float_to_int, decl_macro, + inline_const, intra_doc_pointers, platform_intrinsics, repr_simd, diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 68f20516cf5..fb257e34cf9 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -1,17 +1,15 @@ use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; -/// Constructs a new SIMD vector by copying elements from selected lanes in other vectors. +/// Constructs a new SIMD vector by copying elements from selected elements in other vectors. /// -/// When swizzling one vector, lanes are selected by a `const` array of `usize`, -/// like [`Swizzle`]. +/// When swizzling one vector, elements are selected like [`Swizzle::swizzle`]. /// -/// When swizzling two vectors, lanes are selected by a `const` array of [`Which`], -/// like [`Swizzle2`]. +/// When swizzling two vectors, elements are selected like [`Swizzle::concat_swizzle`]. /// /// # Examples /// -/// With a single SIMD vector, the const array specifies lane indices in that vector: +/// With a single SIMD vector, the const array specifies element indices in that vector: /// ``` /// # #![feature(portable_simd)] /// # use core::simd::{u32x2, u32x4, simd_swizzle}; @@ -21,25 +19,27 @@ /// let r: u32x4 = simd_swizzle!(v, [3, 0, 1, 2]); /// assert_eq!(r.to_array(), [13, 10, 11, 12]); /// -/// // Changing the number of lanes +/// // Changing the number of elements /// let r: u32x2 = simd_swizzle!(v, [3, 1]); /// assert_eq!(r.to_array(), [13, 11]); /// ``` /// -/// With two input SIMD vectors, the const array uses `Which` to specify the source of each index: +/// With two input SIMD vectors, the const array specifies element indices in the concatenation of +/// those vectors: /// ``` /// # #![feature(portable_simd)] -/// # use core::simd::{u32x2, u32x4, simd_swizzle, Which}; -/// use Which::{First, Second}; +/// # #[cfg(feature = "as_crate")] use core_simd::simd; +/// # #[cfg(not(feature = "as_crate"))] use core::simd; +/// # use simd::{u32x2, u32x4, simd_swizzle}; /// let a = u32x4::from_array([0, 1, 2, 3]); /// let b = u32x4::from_array([4, 5, 6, 7]); /// /// // Keeping the same size -/// let r: u32x4 = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]); +/// let r: u32x4 = simd_swizzle!(a, b, [0, 1, 6, 7]); /// assert_eq!(r.to_array(), [0, 1, 6, 7]); /// -/// // Changing the number of lanes -/// let r: u32x2 = simd_swizzle!(a, b, [First(0), Second(0)]); +/// // Changing the number of elements +/// let r: u32x2 = simd_swizzle!(a, b, [0, 4]); /// assert_eq!(r.to_array(), [0, 4]); /// ``` #[allow(unused_macros)] @@ -50,7 +50,7 @@ { use $crate::simd::Swizzle; struct Impl; - impl Swizzle for Impl { + impl Swizzle<{$index.len()}> for Impl { const INDEX: [usize; {$index.len()}] = $index; } Impl::swizzle($vector) @@ -60,127 +60,117 @@ impl Swizzle for Impl { $first:expr, $second:expr, $index:expr $(,)? ) => { { - use $crate::simd::{Which, Swizzle2}; + use $crate::simd::Swizzle; struct Impl; - impl Swizzle2 for Impl { - const INDEX: [Which; {$index.len()}] = $index; + impl Swizzle<{$index.len()}> for Impl { + const INDEX: [usize; {$index.len()}] = $index; } - Impl::swizzle2($first, $second) + Impl::concat_swizzle($first, $second) } } } -/// Specifies a lane index into one of two SIMD vectors. -/// -/// This is an input type for [Swizzle2] and helper macros like [simd_swizzle]. -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Which { - /// Index of a lane in the first input SIMD vector. - First(usize), - /// Index of a lane in the second input SIMD vector. - Second(usize), -} - /// Create a vector from the elements of another vector. -pub trait Swizzle { - /// Map from the lanes of the input vector to the output vector. - const INDEX: [usize; OUTPUT_LANES]; +pub trait Swizzle { + /// Map from the elements of the input vector to the output vector. + const INDEX: [usize; N]; - /// Create a new vector from the lanes of `vector`. + /// Create a new vector from the elements of `vector`. /// /// Lane `i` of the output is `vector[Self::INDEX[i]]`. #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - fn swizzle(vector: Simd) -> Simd + fn swizzle(vector: Simd) -> Simd where T: SimdElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32. - unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) } + // Safety: `vector` is a vector, and the index is a const array of u32. + unsafe { + intrinsics::simd_shuffle( + vector, + vector, + const { + let mut output = [0; N]; + let mut i = 0; + while i < N { + let index = Self::INDEX[i]; + assert!(index as u32 as usize == index); + assert!(index < M, "source element index exceeds input vector length"); + output[i] = index as u32; + i += 1; + } + output + }, + ) + } } -} -/// Create a vector from the elements of two other vectors. -pub trait Swizzle2 { - /// Map from the lanes of the input vectors to the output vector - const INDEX: [Which; OUTPUT_LANES]; - - /// Create a new vector from the lanes of `first` and `second`. + /// Create a new vector from the elements of `first` and `second`. /// - /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is - /// `Second(j)`. + /// Lane `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of + /// `first` and `second`. #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - fn swizzle2( - first: Simd, - second: Simd, - ) -> Simd + fn concat_swizzle(first: Simd, second: Simd) -> Simd where T: SimdElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32. - unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) } + // Safety: `first` and `second` are vectors, and the index is a const array of u32. + unsafe { + intrinsics::simd_shuffle( + first, + second, + const { + let mut output = [0; N]; + let mut i = 0; + while i < N { + let index = Self::INDEX[i]; + assert!(index as u32 as usize == index); + assert!(index < 2 * M, "source element index exceeds input vector length"); + output[i] = index as u32; + i += 1; + } + output + }, + ) + } } -} -/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here. -/// This trait hides `INDEX_IMPL` from the public API. -trait SwizzleImpl { - const INDEX_IMPL: [u32; OUTPUT_LANES]; -} + /// Create a new mask from the elements of `first` and `second`. + /// + /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of + /// `first` and `second`. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original inputs"] + fn swizzle_mask(vector: Mask) -> Mask + where + T: MaskElement, + LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, + { + // SAFETY: all elements of this mask come from another mask + unsafe { Mask::from_int_unchecked(Self::swizzle(vector.to_int())) } + } -impl SwizzleImpl - for T -where - T: Swizzle + ?Sized, -{ - const INDEX_IMPL: [u32; OUTPUT_LANES] = { - let mut output = [0; OUTPUT_LANES]; - let mut i = 0; - while i < OUTPUT_LANES { - let index = Self::INDEX[i]; - assert!(index as u32 as usize == index); - assert!(index < INPUT_LANES, "source lane exceeds input lane count",); - output[i] = index as u32; - i += 1; - } - output - }; -} - -/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here. -/// This trait hides `INDEX_IMPL` from the public API. -trait Swizzle2Impl { - const INDEX_IMPL: [u32; OUTPUT_LANES]; -} - -impl Swizzle2Impl - for T -where - T: Swizzle2 + ?Sized, -{ - const INDEX_IMPL: [u32; OUTPUT_LANES] = { - let mut output = [0; OUTPUT_LANES]; - let mut i = 0; - while i < OUTPUT_LANES { - let (offset, index) = match Self::INDEX[i] { - Which::First(index) => (false, index), - Which::Second(index) => (true, index), - }; - assert!(index < INPUT_LANES, "source lane exceeds input lane count",); - - // lanes are indexed by the first vector, then second vector - let index = if offset { index + INPUT_LANES } else { index }; - assert!(index as u32 as usize == index); - output[i] = index as u32; - i += 1; - } - output - }; + /// Create a new mask from the elements of `first` and `second`. + /// + /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of + /// `first` and `second`. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original inputs"] + fn concat_swizzle_mask(first: Mask, second: Mask) -> Mask + where + T: MaskElement, + LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, + { + // SAFETY: all elements of this mask come from another mask + unsafe { Mask::from_int_unchecked(Self::concat_swizzle(first.to_int(), second.to_int())) } + } } impl Simd @@ -188,24 +178,22 @@ impl Simd T: SimdElement, LaneCount: SupportedLaneCount, { - /// Reverse the order of the lanes in the vector. + /// Reverse the order of the elements in the vector. #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn reverse(self) -> Self { - const fn reverse_index() -> [usize; LANES] { - let mut index = [0; LANES]; - let mut i = 0; - while i < LANES { - index[i] = LANES - i - 1; - i += 1; - } - index - } - struct Reverse; - impl Swizzle for Reverse { - const INDEX: [usize; LANES] = reverse_index::(); + impl Swizzle for Reverse { + const INDEX: [usize; N] = const { + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = N - i - 1; + i += 1; + } + index + }; } Reverse::swizzle(self) @@ -217,21 +205,19 @@ impl Swizzle for Reverse { #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn rotate_lanes_left(self) -> Self { - const fn rotate_index() -> [usize; LANES] { - let offset = OFFSET % LANES; - let mut index = [0; LANES]; - let mut i = 0; - while i < LANES { - index[i] = (i + offset) % LANES; - i += 1; - } - index - } - struct Rotate; - impl Swizzle for Rotate { - const INDEX: [usize; LANES] = rotate_index::(); + impl Swizzle for Rotate { + const INDEX: [usize; N] = const { + let offset = OFFSET % N; + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = (i + offset) % N; + i += 1; + } + index + }; } Rotate::::swizzle(self) @@ -243,21 +229,19 @@ impl Swizzle for Rotate(self) -> Self { - const fn rotate_index() -> [usize; LANES] { - let offset = LANES - OFFSET % LANES; - let mut index = [0; LANES]; - let mut i = 0; - while i < LANES { - index[i] = (i + offset) % LANES; - i += 1; - } - index - } - struct Rotate; - impl Swizzle for Rotate { - const INDEX: [usize; LANES] = rotate_index::(); + impl Swizzle for Rotate { + const INDEX: [usize; N] = const { + let offset = N - OFFSET % N; + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = (i + offset) % N; + i += 1; + } + index + }; } Rotate::::swizzle(self) @@ -265,7 +249,7 @@ impl Swizzle for Rotate Swizzle for Rotate (Self, Self) { - const fn interleave(high: bool) -> [Which; LANES] { - let mut idx = [Which::First(0); LANES]; + const fn interleave(high: bool) -> [usize; N] { + let mut idx = [0; N]; let mut i = 0; - while i < LANES { - // Treat the source as a concatenated vector - let dst_index = if high { i + LANES } else { i }; - let src_index = dst_index / 2 + (dst_index % 2) * LANES; - idx[i] = if src_index < LANES { - Which::First(src_index) - } else { - Which::Second(src_index % LANES) - }; + while i < N { + let dst_index = if high { i + N } else { i }; + let src_index = dst_index / 2 + (dst_index % 2) * N; + idx[i] = src_index; i += 1; } idx @@ -302,24 +281,27 @@ pub fn interleave(self, other: Self) -> (Self, Self) { struct Lo; struct Hi; - impl Swizzle2 for Lo { - const INDEX: [Which; LANES] = interleave::(false); + impl Swizzle for Lo { + const INDEX: [usize; N] = interleave::(false); } - impl Swizzle2 for Hi { - const INDEX: [Which; LANES] = interleave::(true); + impl Swizzle for Hi { + const INDEX: [usize; N] = interleave::(true); } - (Lo::swizzle2(self, other), Hi::swizzle2(self, other)) + ( + Lo::concat_swizzle(self, other), + Hi::concat_swizzle(self, other), + ) } /// Deinterleave two vectors. /// - /// The first result takes every other lane of `self` and then `other`, starting with - /// the first lane. + /// The first result takes every other element of `self` and then `other`, starting with + /// the first element. /// - /// The second result takes every other lane of `self` and then `other`, starting with - /// the second lane. + /// The second result takes every other element of `self` and then `other`, starting with + /// the second element. /// /// The reverse of this operation is [`Simd::interleave`]. /// @@ -335,17 +317,11 @@ impl Swizzle2 for Hi { #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn deinterleave(self, other: Self) -> (Self, Self) { - const fn deinterleave(second: bool) -> [Which; LANES] { - let mut idx = [Which::First(0); LANES]; + const fn deinterleave(second: bool) -> [usize; N] { + let mut idx = [0; N]; let mut i = 0; - while i < LANES { - // Treat the source as a concatenated vector - let src_index = i * 2 + second as usize; - idx[i] = if src_index < LANES { - Which::First(src_index) - } else { - Which::Second(src_index % LANES) - }; + while i < N { + idx[i] = i * 2 + second as usize; i += 1; } idx @@ -354,14 +330,17 @@ pub fn deinterleave(self, other: Self) -> (Self, Self) { struct Even; struct Odd; - impl Swizzle2 for Even { - const INDEX: [Which; LANES] = deinterleave::(false); + impl Swizzle for Even { + const INDEX: [usize; N] = deinterleave::(false); } - impl Swizzle2 for Odd { - const INDEX: [Which; LANES] = deinterleave::(true); + impl Swizzle for Odd { + const INDEX: [usize; N] = deinterleave::(true); } - (Even::swizzle2(self, other), Odd::swizzle2(self, other)) + ( + Even::concat_swizzle(self, other), + Odd::concat_swizzle(self, other), + ) } } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 70188337444..18a0bb0a77e 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -144,10 +144,10 @@ pub fn splat(value: T) -> Self { // This is preferred over `[value; N]`, since it's explicitly a splat: // https://github.com/rust-lang/rust/issues/97804 struct Splat; - impl Swizzle<1, N> for Splat { + impl Swizzle for Splat { const INDEX: [usize; N] = [0; N]; } - Splat::swizzle(Simd::::from([value])) + Splat::swizzle::(Simd::::from([value])) } /// Returns an array reference containing the entire SIMD vector. diff --git a/crates/core_simd/tests/swizzle.rs b/crates/core_simd/tests/swizzle.rs index 8cd7c33e823..46aaf748ad8 100644 --- a/crates/core_simd/tests/swizzle.rs +++ b/crates/core_simd/tests/swizzle.rs @@ -11,10 +11,10 @@ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn swizzle() { struct Index; - impl Swizzle<4, 4> for Index { + impl Swizzle<4> for Index { const INDEX: [usize; 4] = [2, 1, 3, 0]; } - impl Swizzle<4, 2> for Index { + impl Swizzle<2> for Index { const INDEX: [usize; 2] = [1, 1]; } From 7a7faf6cc6849a48a4ba2c82a64be3855e1460fa Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 1 Oct 2023 13:59:11 -0400 Subject: [PATCH 44/59] Fix formatting --- crates/core_simd/src/swizzle.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index fb257e34cf9..48aebba91fd 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -97,7 +97,10 @@ fn swizzle(vector: Simd) -> Simd while i < N { let index = Self::INDEX[i]; assert!(index as u32 as usize == index); - assert!(index < M, "source element index exceeds input vector length"); + assert!( + index < M, + "source element index exceeds input vector length" + ); output[i] = index as u32; i += 1; } @@ -130,7 +133,10 @@ fn concat_swizzle(first: Simd, second: Simd) -> S while i < N { let index = Self::INDEX[i]; assert!(index as u32 as usize == index); - assert!(index < 2 * M, "source element index exceeds input vector length"); + assert!( + index < 2 * M, + "source element index exceeds input vector length" + ); output[i] = index as u32; i += 1; } From 6e0de1983ca1861ca900c9a1b63b7b62e8babd02 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 20 Oct 2023 19:15:22 -0400 Subject: [PATCH 45/59] Fix variable and comment --- crates/core_simd/src/swizzle.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 48aebba91fd..ed4bd72b9a5 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -146,20 +146,20 @@ fn concat_swizzle(first: Simd, second: Simd) -> S } } - /// Create a new mask from the elements of `first` and `second`. + /// Create a new mask from the elements of `mask`. /// /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of /// `first` and `second`. #[inline] #[must_use = "method returns a new mask and does not mutate the original inputs"] - fn swizzle_mask(vector: Mask) -> Mask + fn swizzle_mask(mask: Mask) -> Mask where T: MaskElement, LaneCount: SupportedLaneCount, LaneCount: SupportedLaneCount, { // SAFETY: all elements of this mask come from another mask - unsafe { Mask::from_int_unchecked(Self::swizzle(vector.to_int())) } + unsafe { Mask::from_int_unchecked(Self::swizzle(mask.to_int())) } } /// Create a new mask from the elements of `first` and `second`. From b962b612e02fb7a4585adf5f1753771687aa8e06 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 20 Oct 2023 20:44:47 -0400 Subject: [PATCH 46/59] Change lane to element in documentation --- crates/core_simd/examples/matrix_inversion.rs | 38 ++++++------ crates/core_simd/src/masks.rs | 60 +++++++++---------- crates/core_simd/src/masks/to_bitmask.rs | 4 +- crates/core_simd/src/select.rs | 12 ++-- crates/core_simd/src/simd/cmp/eq.rs | 4 +- crates/core_simd/src/simd/cmp/ord.rs | 26 ++++---- crates/core_simd/src/simd/num/float.rs | 60 +++++++++---------- crates/core_simd/src/simd/num/int.rs | 22 +++---- crates/core_simd/src/simd/num/uint.rs | 14 ++--- crates/core_simd/src/simd/ptr/const_ptr.rs | 24 ++++---- crates/core_simd/src/simd/ptr/mut_ptr.rs | 24 ++++---- crates/core_simd/src/swizzle.rs | 14 ++--- crates/core_simd/tests/swizzle.rs | 24 ++++---- 13 files changed, 163 insertions(+), 163 deletions(-) diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index faf4a44467d..bad86414401 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -179,58 +179,58 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option { let row2 = simd_swizzle!(tmp, row3, SHUFFLE02); let row3 = simd_swizzle!(row3, tmp, SHUFFLE13); - let tmp = (row2 * row3).reverse().rotate_lanes_right::<2>(); + let tmp = (row2 * row3).reverse().rotate_elements_right::<2>(); let minor0 = row1 * tmp; let minor1 = row0 * tmp; - let tmp = tmp.rotate_lanes_right::<2>(); + let tmp = tmp.rotate_elements_right::<2>(); let minor0 = (row1 * tmp) - minor0; let minor1 = (row0 * tmp) - minor1; - let minor1 = minor1.rotate_lanes_right::<2>(); + let minor1 = minor1.rotate_elements_right::<2>(); - let tmp = (row1 * row2).reverse().rotate_lanes_right::<2>(); + let tmp = (row1 * row2).reverse().rotate_elements_right::<2>(); let minor0 = (row3 * tmp) + minor0; let minor3 = row0 * tmp; - let tmp = tmp.rotate_lanes_right::<2>(); + let tmp = tmp.rotate_elements_right::<2>(); let minor0 = minor0 - row3 * tmp; let minor3 = row0 * tmp - minor3; - let minor3 = minor3.rotate_lanes_right::<2>(); + let minor3 = minor3.rotate_elements_right::<2>(); - let tmp = (row3 * row1.rotate_lanes_right::<2>()) + let tmp = (row3 * row1.rotate_elements_right::<2>()) .reverse() - .rotate_lanes_right::<2>(); - let row2 = row2.rotate_lanes_right::<2>(); + .rotate_elements_right::<2>(); + let row2 = row2.rotate_elements_right::<2>(); let minor0 = row2 * tmp + minor0; let minor2 = row0 * tmp; - let tmp = tmp.rotate_lanes_right::<2>(); + let tmp = tmp.rotate_elements_right::<2>(); let minor0 = minor0 - row2 * tmp; let minor2 = row0 * tmp - minor2; - let minor2 = minor2.rotate_lanes_right::<2>(); + let minor2 = minor2.rotate_elements_right::<2>(); - let tmp = (row0 * row1).reverse().rotate_lanes_right::<2>(); + let tmp = (row0 * row1).reverse().rotate_elements_right::<2>(); let minor2 = minor2 + row3 * tmp; let minor3 = row2 * tmp - minor3; - let tmp = tmp.rotate_lanes_right::<2>(); + let tmp = tmp.rotate_elements_right::<2>(); let minor2 = row3 * tmp - minor2; let minor3 = minor3 - row2 * tmp; - let tmp = (row0 * row3).reverse().rotate_lanes_right::<2>(); + let tmp = (row0 * row3).reverse().rotate_elements_right::<2>(); let minor1 = minor1 - row2 * tmp; let minor2 = row1 * tmp + minor2; - let tmp = tmp.rotate_lanes_right::<2>(); + let tmp = tmp.rotate_elements_right::<2>(); let minor1 = row2 * tmp + minor1; let minor2 = minor2 - row1 * tmp; - let tmp = (row0 * row2).reverse().rotate_lanes_right::<2>(); + let tmp = (row0 * row2).reverse().rotate_elements_right::<2>(); let minor1 = row3 * tmp + minor1; let minor3 = minor3 - row1 * tmp; - let tmp = tmp.rotate_lanes_right::<2>(); + let tmp = tmp.rotate_elements_right::<2>(); let minor1 = minor1 - row3 * tmp; let minor3 = row1 * tmp + minor3; let det = row0 * minor0; - let det = det.rotate_lanes_right::<2>() + det; - let det = det.reverse().rotate_lanes_right::<2>() + det; + let det = det.rotate_elements_right::<2>() + det; + let det = det.reverse().rotate_elements_right::<2>() + det; if det.reduce_sum() == 0. { return None; diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 0a04cf66757..c3da4468757 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -1,4 +1,4 @@ -//! Types and traits associated with masking lanes of vectors. +//! Types and traits associated with masking elements of vectors. //! Types representing #![allow(non_camel_case_types)] @@ -82,7 +82,7 @@ unsafe impl MaskElement for $ty {} /// A SIMD vector mask for `LANES` elements of width specified by `Element`. /// -/// Masks represent boolean inclusion/exclusion on a per-lane basis. +/// Masks represent boolean inclusion/exclusion on a per-element basis. /// /// The layout of this type is unspecified, and may change between platforms /// and/or Rust versions, and code should not assume that it is equivalent to @@ -116,7 +116,7 @@ impl Mask T: MaskElement, LaneCount: SupportedLaneCount, { - /// Construct a mask by setting all lanes to the given value. + /// Construct a mask by setting all elements to the given value. #[inline] pub fn splat(value: bool) -> Self { Self(mask_impl::Mask::splat(value)) @@ -163,7 +163,7 @@ pub fn splat(value: bool) -> Self { /// represents `true`. /// /// # Safety - /// All lanes must be either 0 or -1. + /// All elements must be either 0 or -1. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { @@ -175,7 +175,7 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { /// represents `true`. /// /// # Panics - /// Panics if any lane is not 0 or -1. + /// Panics if any element is not 0 or -1. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] #[track_caller] @@ -193,71 +193,71 @@ pub fn to_int(self) -> Simd { self.0.to_int() } - /// Converts the mask to a mask of any other lane size. + /// Converts the mask to a mask of any other element size. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn cast(self) -> Mask { Mask(self.0.convert()) } - /// Tests the value of the specified lane. + /// Tests the value of the specified element. /// /// # Safety - /// `lane` must be less than `LANES`. + /// `element` must be less than `self.len()`. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] - pub unsafe fn test_unchecked(&self, lane: usize) -> bool { + pub unsafe fn test_unchecked(&self, index: usize) -> bool { // Safety: the caller must confirm this invariant - unsafe { self.0.test_unchecked(lane) } + unsafe { self.0.test_unchecked(index) } } - /// Tests the value of the specified lane. + /// Tests the value of the specified element. /// /// # Panics - /// Panics if `lane` is greater than or equal to the number of lanes in the vector. + /// Panics if `index` is greater than or equal to the number of elements in the vector. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] #[track_caller] - pub fn test(&self, lane: usize) -> bool { - assert!(lane < LANES, "lane index out of range"); - // Safety: the lane index has been checked - unsafe { self.test_unchecked(lane) } + pub fn test(&self, index: usize) -> bool { + assert!(index < LANES, "element index out of range"); + // Safety: the element index has been checked + unsafe { self.test_unchecked(index) } } - /// Sets the value of the specified lane. + /// Sets the value of the specified element. /// /// # Safety - /// `lane` must be less than `LANES`. + /// `index` must be less than `self.len()`. #[inline] - pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { + pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) { // Safety: the caller must confirm this invariant unsafe { - self.0.set_unchecked(lane, value); + self.0.set_unchecked(index, value); } } - /// Sets the value of the specified lane. + /// Sets the value of the specified element. /// /// # Panics - /// Panics if `lane` is greater than or equal to the number of lanes in the vector. + /// Panics if `index` is greater than or equal to the number of elements in the vector. #[inline] #[track_caller] - pub fn set(&mut self, lane: usize, value: bool) { - assert!(lane < LANES, "lane index out of range"); - // Safety: the lane index has been checked + pub fn set(&mut self, index: usize, value: bool) { + assert!(index < LANES, "element index out of range"); + // Safety: the element index has been checked unsafe { - self.set_unchecked(lane, value); + self.set_unchecked(index, value); } } - /// Returns true if any lane is set, or false otherwise. + /// Returns true if any element is set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { self.0.any() } - /// Returns true if all lanes are set, or false otherwise. + /// Returns true if all elements are set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn all(self) -> bool { @@ -294,7 +294,7 @@ impl Default for Mask LaneCount: SupportedLaneCount, { #[inline] - #[must_use = "method returns a defaulted mask with all lanes set to false (0)"] + #[must_use = "method returns a defaulted mask with all elements set to false (0)"] fn default() -> Self { Self::splat(false) } @@ -332,7 +332,7 @@ impl fmt::Debug for Mask #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list() - .entries((0..LANES).map(|lane| self.test(lane))) + .entries((0..LANES).map(|i| self.test(i))) .finish() } } diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 12cb1771ce1..382928ac308 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -16,7 +16,7 @@ impl Sealed for Mask /// Converts masks to and from integer bitmasks. /// -/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB. +/// Each bit of the bitmask corresponds to a mask element, starting with the LSB. pub trait ToBitMask: Sealed { /// The integer bitmask type. type BitMask; @@ -30,7 +30,7 @@ pub trait ToBitMask: Sealed { /// Converts masks to and from byte array bitmasks. /// -/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. +/// Each bit of the bitmask corresponds to a mask element, starting with the LSB of the first byte. pub trait ToBitMaskArray: Sealed { /// The bitmask array. type BitMaskArray: Copy diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index 065c5987d3f..a1a26032e87 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -6,10 +6,10 @@ impl Mask T: MaskElement, LaneCount: SupportedLaneCount, { - /// Choose lanes from two vectors. + /// Choose elements from two vectors. /// - /// For each lane in the mask, choose the corresponding lane from `true_values` if - /// that lane mask is true, and `false_values` if that lane mask is false. + /// For each element in the mask, choose the corresponding element from `true_values` if + /// that element mask is true, and `false_values` if that element mask is false. /// /// # Examples /// ``` @@ -36,10 +36,10 @@ pub fn select( unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) } } - /// Choose lanes from two masks. + /// Choose elements from two masks. /// - /// For each lane in the mask, choose the corresponding lane from `true_values` if - /// that lane mask is true, and `false_values` if that lane mask is false. + /// For each element in the mask, choose the corresponding element from `true_values` if + /// that element mask is true, and `false_values` if that element mask is false. /// /// # Examples /// ``` diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs index 627ceba3c6f..0ca0401b7ed 100644 --- a/crates/core_simd/src/simd/cmp/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -9,11 +9,11 @@ pub trait SimdPartialEq { /// The mask type returned by each comparison. type Mask; - /// Test if each lane is equal to the corresponding lane in `other`. + /// Test if each element is equal to the corresponding element in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_eq(self, other: Self) -> Self::Mask; - /// Test if each lane is equal to the corresponding lane in `other`. + /// Test if each element is equal to the corresponding element in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_ne(self, other: Self) -> Self::Mask; } diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs index 509f907785c..667eb00e111 100644 --- a/crates/core_simd/src/simd/cmp/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -7,41 +7,41 @@ /// Parallel `PartialOrd`. pub trait SimdPartialOrd: SimdPartialEq { - /// Test if each lane is less than the corresponding lane in `other`. + /// Test if each element is less than the corresponding element in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_lt(self, other: Self) -> Self::Mask; - /// Test if each lane is less than or equal to the corresponding lane in `other`. + /// Test if each element is less than or equal to the corresponding element in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_le(self, other: Self) -> Self::Mask; - /// Test if each lane is greater than the corresponding lane in `other`. + /// Test if each element is greater than the corresponding element in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_gt(self, other: Self) -> Self::Mask; - /// Test if each lane is greater than or equal to the corresponding lane in `other`. + /// Test if each element is greater than or equal to the corresponding element in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_ge(self, other: Self) -> Self::Mask; } /// Parallel `Ord`. pub trait SimdOrd: SimdPartialOrd { - /// Returns the lane-wise maximum with `other`. + /// Returns the element-wise maximum with `other`. #[must_use = "method returns a new vector and does not mutate the original value"] fn simd_max(self, other: Self) -> Self; - /// Returns the lane-wise minimum with `other`. + /// Returns the element-wise minimum with `other`. #[must_use = "method returns a new vector and does not mutate the original value"] fn simd_min(self, other: Self) -> Self; - /// Restrict each lane to a certain interval. + /// Restrict each element to a certain interval. /// - /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is + /// For each element, returns `max` if `self` is greater than `max`, and `min` if `self` is /// less than `min`. Otherwise returns `self`. /// /// # Panics /// - /// Panics if `min > max` on any lane. + /// Panics if `min > max` on any element. #[must_use = "method returns a new vector and does not mutate the original value"] fn simd_clamp(self, min: Self, max: Self) -> Self; } @@ -101,7 +101,7 @@ fn simd_min(self, other: Self) -> Self { fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", + "each element in `min` must be less than or equal to the corresponding element in `max`", ); self.simd_max(min).simd_min(max) } @@ -208,7 +208,7 @@ fn simd_min(self, other: Self) -> Self { fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", + "each element in `min` must be less than or equal to the corresponding element in `max`", ); self.simd_max(min).simd_min(max) } @@ -263,7 +263,7 @@ fn simd_min(self, other: Self) -> Self { fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", + "each element in `min` must be less than or equal to the corresponding element in `max`", ); self.simd_max(min).simd_min(max) } @@ -313,7 +313,7 @@ fn simd_min(self, other: Self) -> Self { fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", + "each element in `min` must be less than or equal to the corresponding element in `max`", ); self.simd_max(min).simd_min(max) } diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index affc01d111f..e8378c3147a 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -63,64 +63,64 @@ unsafe fn to_int_unchecked(self) -> Self::Cast Self::Scalar: core::convert::FloatToInt; /// Raw transmutation to an unsigned integer vector type with the - /// same size and number of lanes. + /// same size and number of elements. #[must_use = "method returns a new vector and does not mutate the original value"] fn to_bits(self) -> Self::Bits; /// Raw transmutation from an unsigned integer vector type with the - /// same size and number of lanes. + /// same size and number of elements. #[must_use = "method returns a new vector and does not mutate the original value"] fn from_bits(bits: Self::Bits) -> Self; - /// Produces a vector where every lane has the absolute value of the - /// equivalently-indexed lane in `self`. + /// Produces a vector where every element has the absolute value of the + /// equivalently-indexed element in `self`. #[must_use = "method returns a new vector and does not mutate the original value"] fn abs(self) -> Self; - /// Takes the reciprocal (inverse) of each lane, `1/x`. + /// Takes the reciprocal (inverse) of each element, `1/x`. #[must_use = "method returns a new vector and does not mutate the original value"] fn recip(self) -> Self; - /// Converts each lane from radians to degrees. + /// Converts each element from radians to degrees. #[must_use = "method returns a new vector and does not mutate the original value"] fn to_degrees(self) -> Self; - /// Converts each lane from degrees to radians. + /// Converts each element from degrees to radians. #[must_use = "method returns a new vector and does not mutate the original value"] fn to_radians(self) -> Self; - /// Returns true for each lane if it has a positive sign, including + /// Returns true for each element if it has a positive sign, including /// `+0.0`, `NaN`s with positive sign bit and positive infinity. #[must_use = "method returns a new mask and does not mutate the original value"] fn is_sign_positive(self) -> Self::Mask; - /// Returns true for each lane if it has a negative sign, including + /// Returns true for each element if it has a negative sign, including /// `-0.0`, `NaN`s with negative sign bit and negative infinity. #[must_use = "method returns a new mask and does not mutate the original value"] fn is_sign_negative(self) -> Self::Mask; - /// Returns true for each lane if its value is `NaN`. + /// Returns true for each element if its value is `NaN`. #[must_use = "method returns a new mask and does not mutate the original value"] fn is_nan(self) -> Self::Mask; - /// Returns true for each lane if its value is positive infinity or negative infinity. + /// Returns true for each element if its value is positive infinity or negative infinity. #[must_use = "method returns a new mask and does not mutate the original value"] fn is_infinite(self) -> Self::Mask; - /// Returns true for each lane if its value is neither infinite nor `NaN`. + /// Returns true for each element if its value is neither infinite nor `NaN`. #[must_use = "method returns a new mask and does not mutate the original value"] fn is_finite(self) -> Self::Mask; - /// Returns true for each lane if its value is subnormal. + /// Returns true for each element if its value is subnormal. #[must_use = "method returns a new mask and does not mutate the original value"] fn is_subnormal(self) -> Self::Mask; - /// Returns true for each lane if its value is neither zero, infinite, + /// Returns true for each element if its value is neither zero, infinite, /// subnormal, nor `NaN`. #[must_use = "method returns a new mask and does not mutate the original value"] fn is_normal(self) -> Self::Mask; - /// Replaces each lane with a number that represents its sign. + /// Replaces each element with a number that represents its sign. /// /// * `1.0` if the number is positive, `+0.0`, or `INFINITY` /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY` @@ -128,33 +128,33 @@ unsafe fn to_int_unchecked(self) -> Self::Cast #[must_use = "method returns a new vector and does not mutate the original value"] fn signum(self) -> Self; - /// Returns each lane with the magnitude of `self` and the sign of `sign`. + /// Returns each element with the magnitude of `self` and the sign of `sign`. /// - /// For any lane containing a `NAN`, a `NAN` with the sign of `sign` is returned. + /// For any element containing a `NAN`, a `NAN` with the sign of `sign` is returned. #[must_use = "method returns a new vector and does not mutate the original value"] fn copysign(self, sign: Self) -> Self; - /// Returns the minimum of each lane. + /// Returns the minimum of each element. /// /// If one of the values is `NAN`, then the other value is returned. #[must_use = "method returns a new vector and does not mutate the original value"] fn simd_min(self, other: Self) -> Self; - /// Returns the maximum of each lane. + /// Returns the maximum of each element. /// /// If one of the values is `NAN`, then the other value is returned. #[must_use = "method returns a new vector and does not mutate the original value"] fn simd_max(self, other: Self) -> Self; - /// Restrict each lane to a certain interval unless it is NaN. + /// Restrict each element to a certain interval unless it is NaN. /// - /// For each lane in `self`, returns the corresponding lane in `max` if the lane is - /// greater than `max`, and the corresponding lane in `min` if the lane is less - /// than `min`. Otherwise returns the lane in `self`. + /// For each element in `self`, returns the corresponding element in `max` if the element is + /// greater than `max`, and the corresponding element in `min` if the element is less + /// than `min`. Otherwise returns the element in `self`. #[must_use = "method returns a new vector and does not mutate the original value"] fn simd_clamp(self, min: Self, max: Self) -> Self; - /// Returns the sum of the lanes of the vector. + /// Returns the sum of the elements of the vector. /// /// # Examples /// @@ -168,7 +168,7 @@ unsafe fn to_int_unchecked(self) -> Self::Cast /// ``` fn reduce_sum(self) -> Self::Scalar; - /// Reducing multiply. Returns the product of the lanes of the vector. + /// Reducing multiply. Returns the product of the elements of the vector. /// /// # Examples /// @@ -182,12 +182,12 @@ unsafe fn to_int_unchecked(self) -> Self::Cast /// ``` fn reduce_product(self) -> Self::Scalar; - /// Returns the maximum lane in the vector. + /// Returns the maximum element in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. /// - /// This function will not return `NaN` unless all lanes are `NaN`. + /// This function will not return `NaN` unless all elements are `NaN`. /// /// # Examples /// @@ -209,12 +209,12 @@ unsafe fn to_int_unchecked(self) -> Self::Cast /// ``` fn reduce_max(self) -> Self::Scalar; - /// Returns the minimum lane in the vector. + /// Returns the minimum element in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. /// - /// This function will not return `NaN` unless all lanes are `NaN`. + /// This function will not return `NaN` unless all elements are `NaN`. /// /// # Examples /// @@ -376,7 +376,7 @@ fn simd_max(self, other: Self) -> Self { fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", + "each element in `min` must be less than or equal to the corresponding element in `max`", ); let mut x = self; x = x.simd_lt(min).select(min, x); diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs index d1f8e856a53..5b0df597b24 100644 --- a/crates/core_simd/src/simd/num/int.rs +++ b/crates/core_simd/src/simd/num/int.rs @@ -61,7 +61,7 @@ pub trait SimdInt: Copy + Sealed { fn saturating_sub(self, second: Self) -> Self; /// Lanewise absolute value, implemented in Rust. - /// Every lane becomes its absolute value. + /// Every element becomes its absolute value. /// /// # Examples /// ``` @@ -111,19 +111,19 @@ pub trait SimdInt: Copy + Sealed { /// ``` fn saturating_neg(self) -> Self; - /// Returns true for each positive lane and false if it is zero or negative. + /// Returns true for each positive element and false if it is zero or negative. fn is_positive(self) -> Self::Mask; - /// Returns true for each negative lane and false if it is zero or positive. + /// Returns true for each negative element and false if it is zero or positive. fn is_negative(self) -> Self::Mask; - /// Returns numbers representing the sign of each lane. + /// Returns numbers representing the sign of each element. /// * `0` if the number is zero /// * `1` if the number is positive /// * `-1` if the number is negative fn signum(self) -> Self; - /// Returns the sum of the lanes of the vector, with wrapping addition. + /// Returns the sum of the elements of the vector, with wrapping addition. /// /// # Examples /// @@ -141,7 +141,7 @@ pub trait SimdInt: Copy + Sealed { /// ``` fn reduce_sum(self) -> Self::Scalar; - /// Returns the product of the lanes of the vector, with wrapping multiplication. + /// Returns the product of the elements of the vector, with wrapping multiplication. /// /// # Examples /// @@ -159,7 +159,7 @@ pub trait SimdInt: Copy + Sealed { /// ``` fn reduce_product(self) -> Self::Scalar; - /// Returns the maximum lane in the vector. + /// Returns the maximum element in the vector. /// /// # Examples /// @@ -173,7 +173,7 @@ pub trait SimdInt: Copy + Sealed { /// ``` fn reduce_max(self) -> Self::Scalar; - /// Returns the minimum lane in the vector. + /// Returns the minimum element in the vector. /// /// # Examples /// @@ -187,13 +187,13 @@ pub trait SimdInt: Copy + Sealed { /// ``` fn reduce_min(self) -> Self::Scalar; - /// Returns the cumulative bitwise "and" across the lanes of the vector. + /// Returns the cumulative bitwise "and" across the elements of the vector. fn reduce_and(self) -> Self::Scalar; - /// Returns the cumulative bitwise "or" across the lanes of the vector. + /// Returns the cumulative bitwise "or" across the elements of the vector. fn reduce_or(self) -> Self::Scalar; - /// Returns the cumulative bitwise "xor" across the lanes of the vector. + /// Returns the cumulative bitwise "xor" across the elements of the vector. fn reduce_xor(self) -> Self::Scalar; /// Reverses the byte order of each element. diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs index 7eadd2050b9..9c91e147a7a 100644 --- a/crates/core_simd/src/simd/num/uint.rs +++ b/crates/core_simd/src/simd/num/uint.rs @@ -57,25 +57,25 @@ pub trait SimdUint: Copy + Sealed { /// assert_eq!(sat, Simd::splat(0)); fn saturating_sub(self, second: Self) -> Self; - /// Returns the sum of the lanes of the vector, with wrapping addition. + /// Returns the sum of the elements of the vector, with wrapping addition. fn reduce_sum(self) -> Self::Scalar; - /// Returns the product of the lanes of the vector, with wrapping multiplication. + /// Returns the product of the elements of the vector, with wrapping multiplication. fn reduce_product(self) -> Self::Scalar; - /// Returns the maximum lane in the vector. + /// Returns the maximum element in the vector. fn reduce_max(self) -> Self::Scalar; - /// Returns the minimum lane in the vector. + /// Returns the minimum element in the vector. fn reduce_min(self) -> Self::Scalar; - /// Returns the cumulative bitwise "and" across the lanes of the vector. + /// Returns the cumulative bitwise "and" across the elements of the vector. fn reduce_and(self) -> Self::Scalar; - /// Returns the cumulative bitwise "or" across the lanes of the vector. + /// Returns the cumulative bitwise "or" across the elements of the vector. fn reduce_or(self) -> Self::Scalar; - /// Returns the cumulative bitwise "xor" across the lanes of the vector. + /// Returns the cumulative bitwise "xor" across the elements of the vector. fn reduce_xor(self) -> Self::Scalar; /// Reverses the byte order of each element. diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs index f82def1d377..246fc7ee381 100644 --- a/crates/core_simd/src/simd/ptr/const_ptr.rs +++ b/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -5,13 +5,13 @@ /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { - /// Vector of `usize` with the same number of lanes. + /// Vector of `usize` with the same number of elements. type Usize; - /// Vector of `isize` with the same number of lanes. + /// Vector of `isize` with the same number of elements. type Isize; - /// Vector of const pointers with the same number of lanes. + /// Vector of const pointers with the same number of elements. type CastPtr; /// Vector of mutable pointers to the same type. @@ -20,17 +20,17 @@ pub trait SimdConstPtr: Copy + Sealed { /// Mask type used for manipulating this SIMD vector type. type Mask; - /// Returns `true` for each lane that is null. + /// Returns `true` for each element that is null. fn is_null(self) -> Self::Mask; /// Casts to a pointer of another type. /// - /// Equivalent to calling [`pointer::cast`] on each lane. + /// Equivalent to calling [`pointer::cast`] on each element. fn cast(self) -> Self::CastPtr; /// Changes constness without changing the type. /// - /// Equivalent to calling [`pointer::cast_mut`] on each lane. + /// Equivalent to calling [`pointer::cast_mut`] on each element. fn cast_mut(self) -> Self::MutPtr; /// Gets the "address" portion of the pointer. @@ -41,7 +41,7 @@ pub trait SimdConstPtr: Copy + Sealed { /// This method semantically discards *provenance* and /// *address-space* information. To properly restore that information, use [`Self::with_addr`]. /// - /// Equivalent to calling [`pointer::addr`] on each lane. + /// Equivalent to calling [`pointer::addr`] on each element. fn addr(self) -> Self::Usize; /// Creates a new pointer with the given address. @@ -49,7 +49,7 @@ pub trait SimdConstPtr: Copy + Sealed { /// This performs the same operation as a cast, but copies the *address-space* and /// *provenance* of `self` to the new pointer. /// - /// Equivalent to calling [`pointer::with_addr`] on each lane. + /// Equivalent to calling [`pointer::with_addr`] on each element. fn with_addr(self, addr: Self::Usize) -> Self; /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use @@ -58,22 +58,22 @@ pub trait SimdConstPtr: Copy + Sealed { /// Convert an address back to a pointer, picking up a previously "exposed" provenance. /// - /// Equivalent to calling [`core::ptr::from_exposed_addr`] on each lane. + /// Equivalent to calling [`core::ptr::from_exposed_addr`] on each element. fn from_exposed_addr(addr: Self::Usize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. + /// Equivalent to calling [`pointer::wrapping_offset`] on each element. fn wrapping_offset(self, offset: Self::Isize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + /// Equivalent to calling [`pointer::wrapping_add`] on each element. fn wrapping_add(self, count: Self::Usize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_sub`] on each lane. + /// Equivalent to calling [`pointer::wrapping_sub`] on each element. fn wrapping_sub(self, count: Self::Usize) -> Self; } diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs index 283054dc8ce..69c927eb11a 100644 --- a/crates/core_simd/src/simd/ptr/mut_ptr.rs +++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -5,13 +5,13 @@ /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { - /// Vector of `usize` with the same number of lanes. + /// Vector of `usize` with the same number of elements. type Usize; - /// Vector of `isize` with the same number of lanes. + /// Vector of `isize` with the same number of elements. type Isize; - /// Vector of const pointers with the same number of lanes. + /// Vector of const pointers with the same number of elements. type CastPtr; /// Vector of constant pointers to the same type. @@ -20,17 +20,17 @@ pub trait SimdMutPtr: Copy + Sealed { /// Mask type used for manipulating this SIMD vector type. type Mask; - /// Returns `true` for each lane that is null. + /// Returns `true` for each element that is null. fn is_null(self) -> Self::Mask; /// Casts to a pointer of another type. /// - /// Equivalent to calling [`pointer::cast`] on each lane. + /// Equivalent to calling [`pointer::cast`] on each element. fn cast(self) -> Self::CastPtr; /// Changes constness without changing the type. /// - /// Equivalent to calling [`pointer::cast_const`] on each lane. + /// Equivalent to calling [`pointer::cast_const`] on each element. fn cast_const(self) -> Self::ConstPtr; /// Gets the "address" portion of the pointer. @@ -38,7 +38,7 @@ pub trait SimdMutPtr: Copy + Sealed { /// This method discards pointer semantic metadata, so the result cannot be /// directly cast into a valid pointer. /// - /// Equivalent to calling [`pointer::addr`] on each lane. + /// Equivalent to calling [`pointer::addr`] on each element. fn addr(self) -> Self::Usize; /// Creates a new pointer with the given address. @@ -46,7 +46,7 @@ pub trait SimdMutPtr: Copy + Sealed { /// This performs the same operation as a cast, but copies the *address-space* and /// *provenance* of `self` to the new pointer. /// - /// Equivalent to calling [`pointer::with_addr`] on each lane. + /// Equivalent to calling [`pointer::with_addr`] on each element. fn with_addr(self, addr: Self::Usize) -> Self; /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use @@ -55,22 +55,22 @@ pub trait SimdMutPtr: Copy + Sealed { /// Convert an address back to a pointer, picking up a previously "exposed" provenance. /// - /// Equivalent to calling [`core::ptr::from_exposed_addr_mut`] on each lane. + /// Equivalent to calling [`core::ptr::from_exposed_addr_mut`] on each element. fn from_exposed_addr(addr: Self::Usize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. + /// Equivalent to calling [`pointer::wrapping_offset`] on each element. fn wrapping_offset(self, offset: Self::Isize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + /// Equivalent to calling [`pointer::wrapping_add`] on each element. fn wrapping_add(self, count: Self::Usize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_sub`] on each lane. + /// Equivalent to calling [`pointer::wrapping_sub`] on each element. fn wrapping_sub(self, count: Self::Usize) -> Self; } diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index ed4bd72b9a5..cfd2cac576a 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -206,11 +206,11 @@ impl Swizzle for Reverse { } /// Rotates the vector such that the first `OFFSET` elements of the slice move to the end - /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`, - /// the element previously in lane `OFFSET` will become the first element in the slice. + /// while the last `self.len() - OFFSET` elements move to the front. After calling `rotate_elements_left`, + /// the element previously at index `OFFSET` will become the first element in the slice. #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn rotate_lanes_left(self) -> Self { + pub fn rotate_elements_left(self) -> Self { struct Rotate; impl Swizzle for Rotate { @@ -229,12 +229,12 @@ impl Swizzle for Rotate { Rotate::::swizzle(self) } - /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to - /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`, - /// the element previously at index `LANES - OFFSET` will become the first element in the slice. + /// Rotates the vector such that the first `self.len() - OFFSET` elements of the vector move to + /// the end while the last `OFFSET` elements move to the front. After calling `rotate_elements_right`, + /// the element previously at index `self.len() - OFFSET` will become the first element in the slice. #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn rotate_lanes_right(self) -> Self { + pub fn rotate_elements_right(self) -> Self { struct Rotate; impl Swizzle for Rotate { diff --git a/crates/core_simd/tests/swizzle.rs b/crates/core_simd/tests/swizzle.rs index 46aaf748ad8..522d71439b7 100644 --- a/crates/core_simd/tests/swizzle.rs +++ b/crates/core_simd/tests/swizzle.rs @@ -34,18 +34,18 @@ fn reverse() { #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn rotate() { let a = Simd::from_array([1, 2, 3, 4]); - assert_eq!(a.rotate_lanes_left::<0>().to_array(), [1, 2, 3, 4]); - assert_eq!(a.rotate_lanes_left::<1>().to_array(), [2, 3, 4, 1]); - assert_eq!(a.rotate_lanes_left::<2>().to_array(), [3, 4, 1, 2]); - assert_eq!(a.rotate_lanes_left::<3>().to_array(), [4, 1, 2, 3]); - assert_eq!(a.rotate_lanes_left::<4>().to_array(), [1, 2, 3, 4]); - assert_eq!(a.rotate_lanes_left::<5>().to_array(), [2, 3, 4, 1]); - assert_eq!(a.rotate_lanes_right::<0>().to_array(), [1, 2, 3, 4]); - assert_eq!(a.rotate_lanes_right::<1>().to_array(), [4, 1, 2, 3]); - assert_eq!(a.rotate_lanes_right::<2>().to_array(), [3, 4, 1, 2]); - assert_eq!(a.rotate_lanes_right::<3>().to_array(), [2, 3, 4, 1]); - assert_eq!(a.rotate_lanes_right::<4>().to_array(), [1, 2, 3, 4]); - assert_eq!(a.rotate_lanes_right::<5>().to_array(), [4, 1, 2, 3]); + assert_eq!(a.rotate_elements_left::<0>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_elements_left::<1>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_elements_left::<2>().to_array(), [3, 4, 1, 2]); + assert_eq!(a.rotate_elements_left::<3>().to_array(), [4, 1, 2, 3]); + assert_eq!(a.rotate_elements_left::<4>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_elements_left::<5>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_elements_right::<0>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_elements_right::<1>().to_array(), [4, 1, 2, 3]); + assert_eq!(a.rotate_elements_right::<2>().to_array(), [3, 4, 1, 2]); + assert_eq!(a.rotate_elements_right::<3>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_elements_right::<4>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_elements_right::<5>().to_array(), [4, 1, 2, 3]); } #[test] From 0f594090645912a7d2bd3e238de1e2a8afd83741 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 20 Oct 2023 20:49:58 -0400 Subject: [PATCH 47/59] Change LANES to LEN and self.lanes() to self.len() --- crates/core_simd/src/vector.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 18a0bb0a77e..befbd71359c 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -112,7 +112,7 @@ impl Simd T: SimdElement, { /// Number of elements in this vector. - pub const LANES: usize = N; + pub const LEN: usize = N; /// Returns the number of elements in this SIMD vector. /// @@ -122,11 +122,11 @@ impl Simd /// # #![feature(portable_simd)] /// # use core::simd::u32x4; /// let v = u32x4::splat(0); - /// assert_eq!(v.lanes(), 4); + /// assert_eq!(v.len(), 4); /// ``` #[inline] - pub const fn lanes(&self) -> usize { - Self::LANES + pub const fn len(&self) -> usize { + Self::LEN } /// Constructs a new SIMD vector with all elements set to the given value. @@ -273,7 +273,7 @@ impl Swizzle for Splat { #[track_caller] pub const fn from_slice(slice: &[T]) -> Self { assert!( - slice.len() >= Self::LANES, + slice.len() >= Self::LEN, "slice length must be at least the number of elements" ); // SAFETY: We just checked that the slice contains @@ -303,7 +303,7 @@ pub const fn from_slice(slice: &[T]) -> Self { #[track_caller] pub fn copy_to_slice(self, slice: &mut [T]) { assert!( - slice.len() >= Self::LANES, + slice.len() >= Self::LEN, "slice length must be at least the number of elements" ); // SAFETY: We just checked that the slice contains From 32b195ab2bf85c368d2998327b0289e7f563dd4c Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 20 Oct 2023 20:58:04 -0400 Subject: [PATCH 48/59] Change generic LANES to N --- crates/core_simd/src/fmt.rs | 4 +- crates/core_simd/src/iter.rs | 16 +- crates/core_simd/src/lane_count.rs | 8 +- crates/core_simd/src/masks.rs | 165 ++++++++++----------- crates/core_simd/src/masks/bitmask.rs | 74 ++++----- crates/core_simd/src/masks/full_masks.rs | 82 +++++----- crates/core_simd/src/masks/to_bitmask.rs | 4 +- crates/core_simd/src/ops.rs | 8 +- crates/core_simd/src/ops/assign.rs | 26 ++-- crates/core_simd/src/ops/deref.rs | 46 +++--- crates/core_simd/src/ops/unary.rs | 46 +++--- crates/core_simd/src/select.rs | 10 +- crates/core_simd/src/simd/cmp/eq.rs | 22 +-- crates/core_simd/src/simd/cmp/ord.rs | 36 ++--- crates/core_simd/src/simd/num/float.rs | 18 +-- crates/core_simd/src/simd/num/int.rs | 14 +- crates/core_simd/src/simd/num/uint.rs | 10 +- crates/core_simd/src/simd/ptr/const_ptr.rs | 18 +-- crates/core_simd/src/simd/ptr/mut_ptr.rs | 16 +- crates/core_simd/src/swizzle.rs | 4 +- 20 files changed, 313 insertions(+), 314 deletions(-) diff --git a/crates/core_simd/src/fmt.rs b/crates/core_simd/src/fmt.rs index b7317969cbb..3a540f5a049 100644 --- a/crates/core_simd/src/fmt.rs +++ b/crates/core_simd/src/fmt.rs @@ -1,9 +1,9 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::fmt; -impl fmt::Debug for Simd +impl fmt::Debug for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement + fmt::Debug, { /// A `Simd` has a debug format like the one for `[T]`: diff --git a/crates/core_simd/src/iter.rs b/crates/core_simd/src/iter.rs index 328c995b81d..b3732fd74d5 100644 --- a/crates/core_simd/src/iter.rs +++ b/crates/core_simd/src/iter.rs @@ -6,9 +6,9 @@ macro_rules! impl_traits { { $type:ty } => { - impl Sum for Simd<$type, LANES> + impl Sum for Simd<$type, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn sum>(iter: I) -> Self { @@ -16,9 +16,9 @@ fn sum>(iter: I) -> Self { } } - impl Product for Simd<$type, LANES> + impl Product for Simd<$type, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn product>(iter: I) -> Self { @@ -26,9 +26,9 @@ fn product>(iter: I) -> Self { } } - impl<'a, const LANES: usize> Sum<&'a Self> for Simd<$type, LANES> + impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn sum>(iter: I) -> Self { @@ -36,9 +36,9 @@ fn sum>(iter: I) -> Self { } } - impl<'a, const LANES: usize> Product<&'a Self> for Simd<$type, LANES> + impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn product>(iter: I) -> Self { diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs index 2b91eb9e800..4cd7265ed67 100644 --- a/crates/core_simd/src/lane_count.rs +++ b/crates/core_simd/src/lane_count.rs @@ -4,11 +4,11 @@ pub trait Sealed {} use sealed::Sealed; /// Specifies the number of lanes in a SIMD vector as a type. -pub struct LaneCount; +pub struct LaneCount; -impl LaneCount { +impl LaneCount { /// The number of bytes in a bitmask with this many lanes. - pub const BITMASK_LEN: usize = (LANES + 7) / 8; + pub const BITMASK_LEN: usize = (N + 7) / 8; } /// Statically guarantees that a lane count is marked as supported. @@ -21,7 +21,7 @@ pub trait SupportedLaneCount: Sealed { type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>; } -impl Sealed for LaneCount {} +impl Sealed for LaneCount {} macro_rules! supported_lane_count { ($($lanes:literal),+) => { diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index c3da4468757..bbfd6567cbf 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -31,9 +31,9 @@ mod sealed { /// prevent us from ever removing that bound, or from implementing `MaskElement` on /// non-`PartialEq` types in the future. pub trait Sealed { - fn valid(values: Simd) -> bool + fn valid(values: Simd) -> bool where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, Self: SimdElement; fn eq(self, other: Self) -> bool; @@ -55,9 +55,9 @@ macro_rules! impl_element { { $ty:ty } => { impl Sealed for $ty { #[inline] - fn valid(value: Simd) -> bool + fn valid(value: Simd) -> bool where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all() } @@ -80,30 +80,30 @@ unsafe impl MaskElement for $ty {} impl_element! { i64 } impl_element! { isize } -/// A SIMD vector mask for `LANES` elements of width specified by `Element`. +/// A SIMD vector mask for `N` elements of width specified by `Element`. /// /// Masks represent boolean inclusion/exclusion on a per-element basis. /// /// The layout of this type is unspecified, and may change between platforms /// and/or Rust versions, and code should not assume that it is equivalent to -/// `[T; LANES]`. +/// `[T; N]`. #[repr(transparent)] -pub struct Mask(mask_impl::Mask) +pub struct Mask(mask_impl::Mask) where T: MaskElement, - LaneCount: SupportedLaneCount; + LaneCount: SupportedLaneCount; -impl Copy for Mask +impl Copy for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } -impl Clone for Mask +impl Clone for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn clone(&self) -> Self { @@ -111,10 +111,10 @@ fn clone(&self) -> Self { } } -impl Mask +impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { /// Construct a mask by setting all elements to the given value. #[inline] @@ -124,7 +124,7 @@ pub fn splat(value: bool) -> Self { /// Converts an array of bools to a SIMD mask. #[inline] - pub fn from_array(array: [bool; LANES]) -> Self { + pub fn from_array(array: [bool; N]) -> Self { // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of // true: 0b_0000_0001 // false: 0b_0000_0000 @@ -132,16 +132,15 @@ pub fn splat(value: bool) -> Self { // This would be hypothetically valid as an "in-place" transmute, // but these are "dependently-sized" types, so copy elision it is! unsafe { - let bytes: [u8; LANES] = mem::transmute_copy(&array); - let bools: Simd = - intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); + let bytes: [u8; N] = mem::transmute_copy(&array); + let bools: Simd = intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); Mask::from_int_unchecked(intrinsics::simd_cast(bools)) } } /// Converts a SIMD mask to an array of bools. #[inline] - pub fn to_array(self) -> [bool; LANES] { + pub fn to_array(self) -> [bool; N] { // This follows mostly the same logic as from_array. // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of // true: 0b_0000_0001 @@ -153,7 +152,7 @@ pub fn splat(value: bool) -> Self { // This would be hypothetically valid as an "in-place" transmute, // but these are "dependently-sized" types, so copy elision it is! unsafe { - let mut bytes: Simd = intrinsics::simd_cast(self.to_int()); + let mut bytes: Simd = intrinsics::simd_cast(self.to_int()); bytes &= Simd::splat(1i8); mem::transmute_copy(&bytes) } @@ -166,7 +165,7 @@ pub fn splat(value: bool) -> Self { /// All elements must be either 0 or -1. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub unsafe fn from_int_unchecked(value: Simd) -> Self { + pub unsafe fn from_int_unchecked(value: Simd) -> Self { // Safety: the caller must confirm this invariant unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } } @@ -179,7 +178,7 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] #[track_caller] - pub fn from_int(value: Simd) -> Self { + pub fn from_int(value: Simd) -> Self { assert!(T::valid(value), "all values must be either 0 or -1",); // Safety: the validity has been checked unsafe { Self::from_int_unchecked(value) } @@ -189,14 +188,14 @@ pub fn from_int(value: Simd) -> Self { /// represents `true`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_int(self) -> Simd { + pub fn to_int(self) -> Simd { self.0.to_int() } /// Converts the mask to a mask of any other element size. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn cast(self) -> Mask { + pub fn cast(self) -> Mask { Mask(self.0.convert()) } @@ -219,7 +218,7 @@ pub unsafe fn test_unchecked(&self, index: usize) -> bool { #[must_use = "method returns a new bool and does not mutate the original value"] #[track_caller] pub fn test(&self, index: usize) -> bool { - assert!(index < LANES, "element index out of range"); + assert!(index < N, "element index out of range"); // Safety: the element index has been checked unsafe { self.test_unchecked(index) } } @@ -243,7 +242,7 @@ pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) { #[inline] #[track_caller] pub fn set(&mut self, index: usize, value: bool) { - assert!(index < LANES, "element index out of range"); + assert!(index < N, "element index out of range"); // Safety: the element index has been checked unsafe { self.set_unchecked(index, value); @@ -266,32 +265,32 @@ pub fn all(self) -> bool { } // vector/array conversion -impl From<[bool; LANES]> for Mask +impl From<[bool; N]> for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] - fn from(array: [bool; LANES]) -> Self { + fn from(array: [bool; N]) -> Self { Self::from_array(array) } } -impl From> for [bool; LANES] +impl From> for [bool; N] where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] - fn from(vector: Mask) -> Self { + fn from(vector: Mask) -> Self { vector.to_array() } } -impl Default for Mask +impl Default for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] #[must_use = "method returns a defaulted mask with all elements set to false (0)"] @@ -300,10 +299,10 @@ fn default() -> Self { } } -impl PartialEq for Mask +impl PartialEq for Mask where T: MaskElement + PartialEq, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] @@ -312,10 +311,10 @@ fn eq(&self, other: &Self) -> bool { } } -impl PartialOrd for Mask +impl PartialOrd for Mask where T: MaskElement + PartialOrd, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] #[must_use = "method returns a new Ordering and does not mutate the original value"] @@ -324,23 +323,23 @@ fn partial_cmp(&self, other: &Self) -> Option { } } -impl fmt::Debug for Mask +impl fmt::Debug for Mask where T: MaskElement + fmt::Debug, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list() - .entries((0..LANES).map(|i| self.test(i))) + .entries((0..N).map(|i| self.test(i))) .finish() } } -impl core::ops::BitAnd for Mask +impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -350,10 +349,10 @@ fn bitand(self, rhs: Self) -> Self { } } -impl core::ops::BitAnd for Mask +impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -363,23 +362,23 @@ fn bitand(self, rhs: bool) -> Self { } } -impl core::ops::BitAnd> for bool +impl core::ops::BitAnd> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Output = Mask; + type Output = Mask; #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - fn bitand(self, rhs: Mask) -> Mask { + fn bitand(self, rhs: Mask) -> Mask { Mask::splat(self) & rhs } } -impl core::ops::BitOr for Mask +impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -389,10 +388,10 @@ fn bitor(self, rhs: Self) -> Self { } } -impl core::ops::BitOr for Mask +impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -402,23 +401,23 @@ fn bitor(self, rhs: bool) -> Self { } } -impl core::ops::BitOr> for bool +impl core::ops::BitOr> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Output = Mask; + type Output = Mask; #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - fn bitor(self, rhs: Mask) -> Mask { + fn bitor(self, rhs: Mask) -> Mask { Mask::splat(self) | rhs } } -impl core::ops::BitXor for Mask +impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -428,10 +427,10 @@ fn bitxor(self, rhs: Self) -> Self::Output { } } -impl core::ops::BitXor for Mask +impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -441,25 +440,25 @@ fn bitxor(self, rhs: bool) -> Self::Output { } } -impl core::ops::BitXor> for bool +impl core::ops::BitXor> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Output = Mask; + type Output = Mask; #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - fn bitxor(self, rhs: Mask) -> Self::Output { + fn bitxor(self, rhs: Mask) -> Self::Output { Mask::splat(self) ^ rhs } } -impl core::ops::Not for Mask +impl core::ops::Not for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Output = Mask; + type Output = Mask; #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] fn not(self) -> Self::Output { @@ -467,10 +466,10 @@ fn not(self) -> Self::Output { } } -impl core::ops::BitAndAssign for Mask +impl core::ops::BitAndAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn bitand_assign(&mut self, rhs: Self) { @@ -478,10 +477,10 @@ fn bitand_assign(&mut self, rhs: Self) { } } -impl core::ops::BitAndAssign for Mask +impl core::ops::BitAndAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn bitand_assign(&mut self, rhs: bool) { @@ -489,10 +488,10 @@ fn bitand_assign(&mut self, rhs: bool) { } } -impl core::ops::BitOrAssign for Mask +impl core::ops::BitOrAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn bitor_assign(&mut self, rhs: Self) { @@ -500,10 +499,10 @@ fn bitor_assign(&mut self, rhs: Self) { } } -impl core::ops::BitOrAssign for Mask +impl core::ops::BitOrAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn bitor_assign(&mut self, rhs: bool) { @@ -511,10 +510,10 @@ fn bitor_assign(&mut self, rhs: bool) { } } -impl core::ops::BitXorAssign for Mask +impl core::ops::BitXorAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn bitxor_assign(&mut self, rhs: Self) { @@ -522,10 +521,10 @@ fn bitxor_assign(&mut self, rhs: Self) { } } -impl core::ops::BitXorAssign for Mask +impl core::ops::BitXorAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn bitxor_assign(&mut self, rhs: bool) { @@ -536,12 +535,12 @@ fn bitxor_assign(&mut self, rhs: bool) { macro_rules! impl_from { { $from:ty => $($to:ty),* } => { $( - impl From> for Mask<$to, LANES> + impl From> for Mask<$to, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] - fn from(value: Mask<$from, LANES>) -> Self { + fn from(value: Mask<$from, N>) -> Self { value.cast() } } diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index a7df6304bc7..b54912afda5 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -6,25 +6,25 @@ /// A mask where each lane is represented by a single bit. #[repr(transparent)] -pub struct Mask( - as SupportedLaneCount>::BitMask, +pub struct Mask( + as SupportedLaneCount>::BitMask, PhantomData, ) where T: MaskElement, - LaneCount: SupportedLaneCount; + LaneCount: SupportedLaneCount; -impl Copy for Mask +impl Copy for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } -impl Clone for Mask +impl Clone for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn clone(&self) -> Self { @@ -32,10 +32,10 @@ fn clone(&self) -> Self { } } -impl PartialEq for Mask +impl PartialEq for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn eq(&self, other: &Self) -> bool { @@ -43,10 +43,10 @@ fn eq(&self, other: &Self) -> bool { } } -impl PartialOrd for Mask +impl PartialOrd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -54,17 +54,17 @@ fn partial_cmp(&self, other: &Self) -> Option { } } -impl Eq for Mask +impl Eq for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } -impl Ord for Mask +impl Ord for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn cmp(&self, other: &Self) -> core::cmp::Ordering { @@ -72,22 +72,22 @@ fn cmp(&self, other: &Self) -> core::cmp::Ordering { } } -impl Mask +impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn splat(value: bool) -> Self { - let mut mask = as SupportedLaneCount>::BitMask::default(); + let mut mask = as SupportedLaneCount>::BitMask::default(); if value { mask.as_mut().fill(u8::MAX) } else { mask.as_mut().fill(u8::MIN) } - if LANES % 8 > 0 { - *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8); + if N % 8 > 0 { + *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8); } Self(mask, PhantomData) } @@ -107,7 +107,7 @@ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_int(self) -> Simd { + pub fn to_int(self) -> Simd { unsafe { intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE)) } @@ -115,7 +115,7 @@ pub fn to_int(self) -> Simd { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub unsafe fn from_int_unchecked(value: Simd) -> Self { + pub unsafe fn from_int_unchecked(value: Simd) -> Self { unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } } @@ -140,7 +140,7 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { #[inline] pub fn to_bitmask_integer(self) -> U where - super::Mask: ToBitMask, + super::Mask: ToBitMask, { // Safety: these are the same types unsafe { core::mem::transmute_copy(&self.0) } @@ -149,7 +149,7 @@ pub fn to_bitmask_integer(self) -> U #[inline] pub fn from_bitmask_integer(bitmask: U) -> Self where - super::Mask: ToBitMask, + super::Mask: ToBitMask, { // Safety: these are the same types unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) } @@ -157,7 +157,7 @@ pub fn from_bitmask_integer(bitmask: U) -> Self #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn convert(self) -> Mask + pub fn convert(self) -> Mask where U: MaskElement, { @@ -178,11 +178,11 @@ pub fn all(self) -> bool { } } -impl core::ops::BitAnd for Mask +impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, - as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, + LaneCount: SupportedLaneCount, + as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, { type Output = Self; #[inline] @@ -195,11 +195,11 @@ fn bitand(mut self, rhs: Self) -> Self { } } -impl core::ops::BitOr for Mask +impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, - as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, + LaneCount: SupportedLaneCount, + as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, { type Output = Self; #[inline] @@ -212,10 +212,10 @@ fn bitor(mut self, rhs: Self) -> Self { } } -impl core::ops::BitXor for Mask +impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -228,10 +228,10 @@ fn bitxor(mut self, rhs: Self) -> Self::Output { } } -impl core::ops::Not for Mask +impl core::ops::Not for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -240,8 +240,8 @@ fn not(mut self) -> Self::Output { for x in self.0.as_mut() { *x = !*x; } - if LANES % 8 > 0 { - *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8); + if N % 8 > 0 { + *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8); } self } diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 4b36adece71..2aa9272ab46 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -5,22 +5,22 @@ use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; #[repr(transparent)] -pub struct Mask(Simd) +pub struct Mask(Simd) where T: MaskElement, - LaneCount: SupportedLaneCount; + LaneCount: SupportedLaneCount; -impl Copy for Mask +impl Copy for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } -impl Clone for Mask +impl Clone for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] @@ -29,10 +29,10 @@ fn clone(&self) -> Self { } } -impl PartialEq for Mask +impl PartialEq for Mask where T: MaskElement + PartialEq, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn eq(&self, other: &Self) -> bool { @@ -40,10 +40,10 @@ fn eq(&self, other: &Self) -> bool { } } -impl PartialOrd for Mask +impl PartialOrd for Mask where T: MaskElement + PartialOrd, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -51,17 +51,17 @@ fn partial_cmp(&self, other: &Self) -> Option { } } -impl Eq for Mask +impl Eq for Mask where T: MaskElement + Eq, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } -impl Ord for Mask +impl Ord for Mask where T: MaskElement + Ord, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn cmp(&self, other: &Self) -> core::cmp::Ordering { @@ -98,10 +98,10 @@ fn reverse_bits(self, n: usize) -> Self { impl_reverse_bits! { u8, u16, u32, u64 } -impl Mask +impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] @@ -122,19 +122,19 @@ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_int(self) -> Simd { + pub fn to_int(self) -> Simd { self.0 } #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub unsafe fn from_int_unchecked(value: Simd) -> Self { + pub unsafe fn from_int_unchecked(value: Simd) -> Self { Self(value) } #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn convert(self) -> Mask + pub fn convert(self) -> Mask where U: MaskElement, { @@ -144,18 +144,18 @@ pub fn convert(self) -> Mask #[inline] #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask_array(self) -> [u8; N] + pub fn to_bitmask_array(self) -> [u8; M] where - super::Mask: ToBitMaskArray, + super::Mask: ToBitMaskArray, { // Safety: Bytes is the right size array unsafe { // Compute the bitmask - let bitmask: as ToBitMaskArray>::BitMaskArray = + let bitmask: as ToBitMaskArray>::BitMaskArray = intrinsics::simd_bitmask(self.0); // Transmute to the return type - let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask); + let mut bitmask: [u8; M] = core::mem::transmute_copy(&bitmask); // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { @@ -170,9 +170,9 @@ pub fn convert(self) -> Mask #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_array(mut bitmask: [u8; N]) -> Self + pub fn from_bitmask_array(mut bitmask: [u8; M]) -> Self where - super::Mask: ToBitMaskArray, + super::Mask: ToBitMaskArray, { // Safety: Bytes is the right size array unsafe { @@ -184,7 +184,7 @@ pub fn convert(self) -> Mask } // Transmute to the bitmask - let bitmask: as ToBitMaskArray>::BitMaskArray = + let bitmask: as ToBitMaskArray>::BitMaskArray = core::mem::transmute_copy(&bitmask); // Compute the regular mask @@ -199,14 +199,14 @@ pub fn convert(self) -> Mask #[inline] pub(crate) fn to_bitmask_integer(self) -> U where - super::Mask: ToBitMask, + super::Mask: ToBitMask, { // Safety: U is required to be the appropriate bitmask type let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) }; // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { - bitmask.reverse_bits(LANES) + bitmask.reverse_bits(N) } else { bitmask } @@ -215,11 +215,11 @@ pub(crate) fn to_bitmask_integer(self) -> U #[inline] pub(crate) fn from_bitmask_integer(bitmask: U) -> Self where - super::Mask: ToBitMask, + super::Mask: ToBitMask, { // LLVM assumes bit order should match endianness let bitmask = if cfg!(target_endian = "big") { - bitmask.reverse_bits(LANES) + bitmask.reverse_bits(N) } else { bitmask }; @@ -249,21 +249,21 @@ pub fn all(self) -> bool { } } -impl From> for Simd +impl From> for Simd where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] - fn from(value: Mask) -> Self { + fn from(value: Mask) -> Self { value.0 } } -impl core::ops::BitAnd for Mask +impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -274,10 +274,10 @@ fn bitand(self, rhs: Self) -> Self { } } -impl core::ops::BitOr for Mask +impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -288,10 +288,10 @@ fn bitor(self, rhs: Self) -> Self { } } -impl core::ops::BitXor for Mask +impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -302,10 +302,10 @@ fn bitxor(self, rhs: Self) -> Self { } } -impl core::ops::Not for Mask +impl core::ops::Not for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; #[inline] diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 382928ac308..06f09c65aca 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -7,10 +7,10 @@ pub trait Sealed {} } pub use sealed::Sealed; -impl Sealed for Mask +impl Sealed for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index d1b4a504884..8a1b083f039 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -9,10 +9,10 @@ mod shift_scalar; mod unary; -impl core::ops::Index for Simd +impl core::ops::Index for Simd where T: SimdElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, I: core::slice::SliceIndex<[T]>, { type Output = I::Output; @@ -22,10 +22,10 @@ fn index(&self, index: I) -> &Self::Output { } } -impl core::ops::IndexMut for Simd +impl core::ops::IndexMut for Simd where T: SimdElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, I: core::slice::SliceIndex<[T]>, { #[inline] diff --git a/crates/core_simd/src/ops/assign.rs b/crates/core_simd/src/ops/assign.rs index d2b48614fc9..0e87785025a 100644 --- a/crates/core_simd/src/ops/assign.rs +++ b/crates/core_simd/src/ops/assign.rs @@ -8,7 +8,7 @@ // Arithmetic macro_rules! assign_ops { - ($(impl $assignTrait:ident for Simd + ($(impl $assignTrait:ident for Simd where Self: $trait:ident, { @@ -16,11 +16,11 @@ fn $assign_call:ident(rhs: U) { $call:ident } })*) => { - $(impl $assignTrait for Simd + $(impl $assignTrait for Simd where Self: $trait, T: SimdElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn $assign_call(&mut self, rhs: U) { @@ -32,7 +32,7 @@ fn $assign_call(&mut self, rhs: U) { assign_ops! { // Arithmetic - impl AddAssign for Simd + impl AddAssign for Simd where Self: Add, { @@ -41,7 +41,7 @@ fn add_assign(rhs: U) { } } - impl MulAssign for Simd + impl MulAssign for Simd where Self: Mul, { @@ -50,7 +50,7 @@ fn mul_assign(rhs: U) { } } - impl SubAssign for Simd + impl SubAssign for Simd where Self: Sub, { @@ -59,7 +59,7 @@ fn sub_assign(rhs: U) { } } - impl DivAssign for Simd + impl DivAssign for Simd where Self: Div, { @@ -67,7 +67,7 @@ fn div_assign(rhs: U) { div } } - impl RemAssign for Simd + impl RemAssign for Simd where Self: Rem, { @@ -77,7 +77,7 @@ fn rem_assign(rhs: U) { } // Bitops - impl BitAndAssign for Simd + impl BitAndAssign for Simd where Self: BitAnd, { @@ -86,7 +86,7 @@ fn bitand_assign(rhs: U) { } } - impl BitOrAssign for Simd + impl BitOrAssign for Simd where Self: BitOr, { @@ -95,7 +95,7 @@ fn bitor_assign(rhs: U) { } } - impl BitXorAssign for Simd + impl BitXorAssign for Simd where Self: BitXor, { @@ -104,7 +104,7 @@ fn bitxor_assign(rhs: U) { } } - impl ShlAssign for Simd + impl ShlAssign for Simd where Self: Shl, { @@ -113,7 +113,7 @@ fn shl_assign(rhs: U) { } } - impl ShrAssign for Simd + impl ShrAssign for Simd where Self: Shr, { diff --git a/crates/core_simd/src/ops/deref.rs b/crates/core_simd/src/ops/deref.rs index 302bf148bd3..89a60ba1141 100644 --- a/crates/core_simd/src/ops/deref.rs +++ b/crates/core_simd/src/ops/deref.rs @@ -5,16 +5,16 @@ use super::*; macro_rules! deref_lhs { - (impl $trait:ident for $simd:ty { + (impl $trait:ident for $simd:ty { fn $call:ident }) => { - impl $trait<$simd> for &$simd + impl $trait<$simd> for &$simd where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Output = Simd; + type Output = Simd; #[inline] #[must_use = "operator returns a new vector without mutating the inputs"] @@ -26,16 +26,16 @@ fn $call(self, rhs: $simd) -> Self::Output { } macro_rules! deref_rhs { - (impl $trait:ident for $simd:ty { + (impl $trait:ident for $simd:ty { fn $call:ident }) => { - impl $trait<&$simd> for $simd + impl $trait<&$simd> for $simd where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Output = Simd; + type Output = Simd; #[inline] #[must_use = "operator returns a new vector without mutating the inputs"] @@ -47,25 +47,25 @@ fn $call(self, rhs: &$simd) -> Self::Output { } macro_rules! deref_ops { - ($(impl $trait:ident for $simd:ty { + ($(impl $trait:ident for $simd:ty { fn $call:ident })*) => { $( deref_rhs! { - impl $trait for $simd { + impl $trait for $simd { fn $call } } deref_lhs! { - impl $trait for $simd { + impl $trait for $simd { fn $call } } - impl<'lhs, 'rhs, T, const LANES: usize> $trait<&'rhs $simd> for &'lhs $simd + impl<'lhs, 'rhs, T, const N: usize> $trait<&'rhs $simd> for &'lhs $simd where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = $simd; @@ -81,44 +81,44 @@ fn $call(self, rhs: &'rhs $simd) -> Self::Output { deref_ops! { // Arithmetic - impl Add for Simd { + impl Add for Simd { fn add } - impl Mul for Simd { + impl Mul for Simd { fn mul } - impl Sub for Simd { + impl Sub for Simd { fn sub } - impl Div for Simd { + impl Div for Simd { fn div } - impl Rem for Simd { + impl Rem for Simd { fn rem } // Bitops - impl BitAnd for Simd { + impl BitAnd for Simd { fn bitand } - impl BitOr for Simd { + impl BitOr for Simd { fn bitor } - impl BitXor for Simd { + impl BitXor for Simd { fn bitxor } - impl Shl for Simd { + impl Shl for Simd { fn shl } - impl Shr for Simd { + impl Shr for Simd { fn shr } } diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs index 4ad02215034..a651aa73e95 100644 --- a/crates/core_simd/src/ops/unary.rs +++ b/crates/core_simd/src/ops/unary.rs @@ -3,11 +3,11 @@ use core::ops::{Neg, Not}; // unary ops macro_rules! neg { - ($(impl Neg for Simd<$scalar:ty, LANES>)*) => { - $(impl Neg for Simd<$scalar, LANES> + ($(impl Neg for Simd<$scalar:ty, N>)*) => { + $(impl Neg for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; @@ -22,27 +22,27 @@ fn neg(self) -> Self::Output { } neg! { - impl Neg for Simd + impl Neg for Simd - impl Neg for Simd + impl Neg for Simd - impl Neg for Simd + impl Neg for Simd - impl Neg for Simd + impl Neg for Simd - impl Neg for Simd + impl Neg for Simd - impl Neg for Simd + impl Neg for Simd - impl Neg for Simd + impl Neg for Simd } macro_rules! not { - ($(impl Not for Simd<$scalar:ty, LANES>)*) => { - $(impl Not for Simd<$scalar, LANES> + ($(impl Not for Simd<$scalar:ty, N>)*) => { + $(impl Not for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Output = Self; @@ -56,23 +56,23 @@ fn not(self) -> Self::Output { } not! { - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd - impl Not for Simd + impl Not for Simd } diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index a1a26032e87..2345f53a0de 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -1,10 +1,10 @@ use crate::simd::intrinsics; use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; -impl Mask +impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { /// Choose elements from two vectors. /// @@ -25,9 +25,9 @@ impl Mask #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn select( self, - true_values: Simd, - false_values: Simd, - ) -> Simd + true_values: Simd, + false_values: Simd, + ) -> Simd where U: SimdElement, { diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs index 0ca0401b7ed..f132fa2cc0c 100644 --- a/crates/core_simd/src/simd/cmp/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -21,11 +21,11 @@ pub trait SimdPartialEq { macro_rules! impl_number { { $($number:ty),* } => { $( - impl SimdPartialEq for Simd<$number, LANES> + impl SimdPartialEq for Simd<$number, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Mask = Mask<<$number as SimdElement>::Mask, LANES>; + type Mask = Mask<<$number as SimdElement>::Mask, N>; #[inline] fn simd_eq(self, other: Self) -> Self::Mask { @@ -50,9 +50,9 @@ fn simd_ne(self, other: Self) -> Self::Mask { macro_rules! impl_mask { { $($integer:ty),* } => { $( - impl SimdPartialEq for Mask<$integer, LANES> + impl SimdPartialEq for Mask<$integer, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Mask = Self; @@ -76,11 +76,11 @@ fn simd_ne(self, other: Self) -> Self::Mask { impl_mask! { i8, i16, i32, i64, isize } -impl SimdPartialEq for Simd<*const T, LANES> +impl SimdPartialEq for Simd<*const T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Mask = Mask; + type Mask = Mask; #[inline] fn simd_eq(self, other: Self) -> Self::Mask { @@ -93,11 +93,11 @@ fn simd_ne(self, other: Self) -> Self::Mask { } } -impl SimdPartialEq for Simd<*mut T, LANES> +impl SimdPartialEq for Simd<*mut T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Mask = Mask; + type Mask = Mask; #[inline] fn simd_eq(self, other: Self) -> Self::Mask { diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs index 667eb00e111..4e9d49ea221 100644 --- a/crates/core_simd/src/simd/cmp/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -49,9 +49,9 @@ pub trait SimdOrd: SimdPartialOrd { macro_rules! impl_integer { { $($integer:ty),* } => { $( - impl SimdPartialOrd for Simd<$integer, LANES> + impl SimdPartialOrd for Simd<$integer, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { @@ -82,9 +82,9 @@ fn simd_ge(self, other: Self) -> Self::Mask { } } - impl SimdOrd for Simd<$integer, LANES> + impl SimdOrd for Simd<$integer, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { @@ -115,9 +115,9 @@ fn simd_clamp(self, min: Self, max: Self) -> Self { macro_rules! impl_float { { $($float:ty),* } => { $( - impl SimdPartialOrd for Simd<$float, LANES> + impl SimdPartialOrd for Simd<$float, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { @@ -156,9 +156,9 @@ fn simd_ge(self, other: Self) -> Self::Mask { macro_rules! impl_mask { { $($integer:ty),* } => { $( - impl SimdPartialOrd for Mask<$integer, LANES> + impl SimdPartialOrd for Mask<$integer, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { @@ -189,9 +189,9 @@ fn simd_ge(self, other: Self) -> Self::Mask { } } - impl SimdOrd for Mask<$integer, LANES> + impl SimdOrd for Mask<$integer, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { @@ -219,9 +219,9 @@ fn simd_clamp(self, min: Self, max: Self) -> Self { impl_mask! { i8, i16, i32, i64, isize } -impl SimdPartialOrd for Simd<*const T, LANES> +impl SimdPartialOrd for Simd<*const T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { @@ -244,9 +244,9 @@ fn simd_ge(self, other: Self) -> Self::Mask { } } -impl SimdOrd for Simd<*const T, LANES> +impl SimdOrd for Simd<*const T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { @@ -269,9 +269,9 @@ fn simd_clamp(self, min: Self, max: Self) -> Self { } } -impl SimdPartialOrd for Simd<*mut T, LANES> +impl SimdPartialOrd for Simd<*mut T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { @@ -294,9 +294,9 @@ fn simd_ge(self, other: Self) -> Self::Mask { } } -impl SimdOrd for Simd<*mut T, LANES> +impl SimdOrd for Simd<*mut T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index e8378c3147a..fc0b99e87a6 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -240,20 +240,20 @@ unsafe fn to_int_unchecked(self) -> Self::Cast macro_rules! impl_trait { { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => { $( - impl Sealed for Simd<$ty, LANES> + impl Sealed for Simd<$ty, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } - impl SimdFloat for Simd<$ty, LANES> + impl SimdFloat for Simd<$ty, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>; + type Mask = Mask<<$mask_ty as SimdElement>::Mask, N>; type Scalar = $ty; - type Bits = Simd<$bits_ty, LANES>; - type Cast = Simd; + type Bits = Simd<$bits_ty, N>; + type Cast = Simd; #[inline] fn cast(self) -> Self::Cast @@ -273,14 +273,14 @@ unsafe fn to_int_unchecked(self) -> Self::Cast } #[inline] - fn to_bits(self) -> Simd<$bits_ty, LANES> { + fn to_bits(self) -> Simd<$bits_ty, N> { assert_eq!(core::mem::size_of::(), core::mem::size_of::()); // Safety: transmuting between vector types is safe unsafe { core::mem::transmute_copy(&self) } } #[inline] - fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self { + fn from_bits(bits: Simd<$bits_ty, N>) -> Self { assert_eq!(core::mem::size_of::(), core::mem::size_of::()); // Safety: transmuting between vector types is safe unsafe { core::mem::transmute_copy(&bits) } diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs index 5b0df597b24..1f1aa272782 100644 --- a/crates/core_simd/src/simd/num/int.rs +++ b/crates/core_simd/src/simd/num/int.rs @@ -219,20 +219,20 @@ pub trait SimdInt: Copy + Sealed { macro_rules! impl_trait { { $($ty:ident ($unsigned:ident)),* } => { $( - impl Sealed for Simd<$ty, LANES> + impl Sealed for Simd<$ty, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } - impl SimdInt for Simd<$ty, LANES> + impl SimdInt for Simd<$ty, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Mask = Mask<<$ty as SimdElement>::Mask, LANES>; + type Mask = Mask<<$ty as SimdElement>::Mask, N>; type Scalar = $ty; - type Unsigned = Simd<$unsigned, LANES>; - type Cast = Simd; + type Unsigned = Simd<$unsigned, N>; + type Cast = Simd; #[inline] fn cast(self) -> Self::Cast { diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs index 9c91e147a7a..c955ee8fe8b 100644 --- a/crates/core_simd/src/simd/num/uint.rs +++ b/crates/core_simd/src/simd/num/uint.rs @@ -101,18 +101,18 @@ pub trait SimdUint: Copy + Sealed { macro_rules! impl_trait { { $($ty:ident ($signed:ident)),* } => { $( - impl Sealed for Simd<$ty, LANES> + impl Sealed for Simd<$ty, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { } - impl SimdUint for Simd<$ty, LANES> + impl SimdUint for Simd<$ty, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { type Scalar = $ty; - type Cast = Simd; + type Cast = Simd; #[inline] fn cast(self) -> Self::Cast { diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs index 246fc7ee381..dd4525e991b 100644 --- a/crates/core_simd/src/simd/ptr/const_ptr.rs +++ b/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -77,20 +77,20 @@ pub trait SimdConstPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*const T, LANES> where - LaneCount: SupportedLaneCount +impl Sealed for Simd<*const T, N> where + LaneCount: SupportedLaneCount { } -impl SimdConstPtr for Simd<*const T, LANES> +impl SimdConstPtr for Simd<*const T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Usize = Simd; - type Isize = Simd; - type CastPtr = Simd<*const U, LANES>; - type MutPtr = Simd<*mut T, LANES>; - type Mask = Mask; + type Usize = Simd; + type Isize = Simd; + type CastPtr = Simd<*const U, N>; + type MutPtr = Simd<*mut T, N>; + type Mask = Mask; #[inline] fn is_null(self) -> Self::Mask { diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs index 69c927eb11a..8cdec74dda3 100644 --- a/crates/core_simd/src/simd/ptr/mut_ptr.rs +++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -74,18 +74,18 @@ pub trait SimdMutPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*mut T, LANES> where LaneCount: SupportedLaneCount +impl Sealed for Simd<*mut T, N> where LaneCount: SupportedLaneCount {} -impl SimdMutPtr for Simd<*mut T, LANES> +impl SimdMutPtr for Simd<*mut T, N> where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { - type Usize = Simd; - type Isize = Simd; - type CastPtr = Simd<*mut U, LANES>; - type ConstPtr = Simd<*const T, LANES>; - type Mask = Mask; + type Usize = Simd; + type Isize = Simd; + type CastPtr = Simd<*mut U, N>; + type ConstPtr = Simd<*const T, N>; + type Mask = Mask; #[inline] fn is_null(self) -> Self::Mask { diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index cfd2cac576a..6af882c0a0e 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -179,10 +179,10 @@ fn concat_swizzle_mask(first: Mask, second: Mask) } } -impl Simd +impl Simd where T: SimdElement, - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, { /// Reverse the order of the elements in the vector. #[inline] From 4f7b0252e0aa503326f85cb4b59cae0bb5f3e166 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 20 Oct 2023 21:07:18 -0400 Subject: [PATCH 49/59] Fix formatting --- crates/core_simd/src/select.rs | 6 +----- crates/core_simd/src/simd/ptr/const_ptr.rs | 5 +---- crates/core_simd/src/simd/ptr/mut_ptr.rs | 3 +-- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index 2345f53a0de..cdcf8eeec81 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -23,11 +23,7 @@ impl Mask /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn select( - self, - true_values: Simd, - false_values: Simd, - ) -> Simd + pub fn select(self, true_values: Simd, false_values: Simd) -> Simd where U: SimdElement, { diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs index dd4525e991b..97fe3fb600d 100644 --- a/crates/core_simd/src/simd/ptr/const_ptr.rs +++ b/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -77,10 +77,7 @@ pub trait SimdConstPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*const T, N> where - LaneCount: SupportedLaneCount -{ -} +impl Sealed for Simd<*const T, N> where LaneCount: SupportedLaneCount {} impl SimdConstPtr for Simd<*const T, N> where diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs index 8cdec74dda3..e35633d0433 100644 --- a/crates/core_simd/src/simd/ptr/mut_ptr.rs +++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -74,8 +74,7 @@ pub trait SimdMutPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*mut T, N> where LaneCount: SupportedLaneCount -{} +impl Sealed for Simd<*mut T, N> where LaneCount: SupportedLaneCount {} impl SimdMutPtr for Simd<*mut T, N> where From 56b6ee01f89fdd8ed8c608ae29e0ae665761f811 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 20 Oct 2023 21:15:28 -0400 Subject: [PATCH 50/59] Fix test import --- crates/core_simd/src/vector.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index befbd71359c..6b7c7f1436a 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -120,7 +120,9 @@ impl Simd /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::u32x4; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::u32x4; /// let v = u32x4::splat(0); /// assert_eq!(v.len(), 4); /// ``` @@ -135,7 +137,9 @@ pub const fn len(&self) -> usize { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::u32x4; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::u32x4; /// let v = u32x4::splat(8); /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); /// ``` From d06dc5c269413371482f34d5444ed857d71c9e1c Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 20 Oct 2023 21:28:19 -0400 Subject: [PATCH 51/59] Fix bitmasks --- crates/core_simd/src/masks/bitmask.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index b54912afda5..aaae28a07be 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -121,8 +121,8 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { #[inline] #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask_array(self) -> [u8; N] { - assert!(core::mem::size_of::() == N); + pub fn to_bitmask_array(self) -> [u8; M] { + assert!(core::mem::size_of::() == M); // Safety: converting an integer to an array of bytes of the same size is safe unsafe { core::mem::transmute_copy(&self.0) } @@ -130,8 +130,8 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_array(bitmask: [u8; N]) -> Self { - assert!(core::mem::size_of::() == N); + pub fn from_bitmask_array(bitmask: [u8; M]) -> Self { + assert!(core::mem::size_of::() == M); // Safety: converting an array of bytes to an integer of the same size is safe Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData) From 4cc260e497114b4bae3c095eddaf58dfc07dea68 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 23 Oct 2023 11:03:05 -0400 Subject: [PATCH 52/59] Update crates/core_simd/src/masks.rs Co-authored-by: Jacob Lifshay --- crates/core_simd/src/masks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index bbfd6567cbf..1199153a5bd 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -202,7 +202,7 @@ pub fn cast(self) -> Mask { /// Tests the value of the specified element. /// /// # Safety - /// `element` must be less than `self.len()`. + /// `index` must be less than `self.len()`. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, index: usize) -> bool { From 8d9bcda64cfe5f4dd172620d5d0eacadbdb13751 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 17 Nov 2023 00:48:35 -0500 Subject: [PATCH 53/59] Fix or silence lints --- crates/core_simd/examples/nbody.rs | 1 + crates/core_simd/src/mod.rs | 1 - crates/core_simd/src/to_bytes.rs | 10 ++++++++-- crates/core_simd/src/vector.rs | 1 + crates/core_simd/tests/ops_macros.rs | 2 ++ crates/core_simd/tests/swizzle_dyn.rs | 1 - 6 files changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index 154e24c460e..65820d1340b 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -1,4 +1,5 @@ #![feature(portable_simd)] +#![allow(clippy::excessive_precision)] extern crate std_float; /// Benchmarks game nbody code diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 6fd458d24e7..fd016f1c6f7 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -34,7 +34,6 @@ pub mod simd { pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; pub use crate::core_simd::swizzle::*; - pub use crate::core_simd::swizzle_dyn::*; pub use crate::core_simd::to_bytes::ToBytes; pub use crate::core_simd::vector::*; } diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index dd01929551c..222526c4ab3 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -68,7 +68,10 @@ impl ToBytes for Simd<$ty, $elems> { #[inline] fn to_ne_bytes(self) -> Self::Bytes { // Safety: transmuting between vectors is safe - unsafe { core::mem::transmute(self) } + unsafe { + #![allow(clippy::useless_transmute)] + core::mem::transmute(self) + } } #[inline] @@ -90,7 +93,10 @@ fn to_le_bytes(mut self) -> Self::Bytes { #[inline] fn from_ne_bytes(bytes: Self::Bytes) -> Self { // Safety: transmuting between vectors is safe - unsafe { core::mem::transmute(bytes) } + unsafe { + #![allow(clippy::useless_transmute)] + core::mem::transmute(bytes) + } } #[inline] diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 6b7c7f1436a..105c06741c5 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -127,6 +127,7 @@ impl Simd /// assert_eq!(v.len(), 4); /// ``` #[inline] + #[allow(clippy::len_without_is_empty)] pub const fn len(&self) -> usize { Self::LEN } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 50faba04991..aa565a13752 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -68,6 +68,7 @@ mod $fn { test_helpers::test_lanes! { fn normal() { + #![allow(clippy::redundant_closure_call)] test_helpers::test_binary_elementwise( & as core::ops::$trait>::$fn, &$scalar_fn, @@ -76,6 +77,7 @@ fn normal() { } fn assign() { + #![allow(clippy::redundant_closure_call)] test_helpers::test_binary_elementwise( &|mut a, b| { as core::ops::$trait_assign>::$fn_assign(&mut a, b); a }, &$scalar_fn, diff --git a/crates/core_simd/tests/swizzle_dyn.rs b/crates/core_simd/tests/swizzle_dyn.rs index 646cd5f3383..f21a937f01c 100644 --- a/crates/core_simd/tests/swizzle_dyn.rs +++ b/crates/core_simd/tests/swizzle_dyn.rs @@ -1,6 +1,5 @@ #![feature(portable_simd)] use core::{fmt, ops::RangeInclusive}; -use proptest; use test_helpers::{self, biteq, make_runner, prop_assert_biteq}; fn swizzle_dyn_scalar_ver(values: [u8; N], idxs: [u8; N]) -> [u8; N] { From 4ca9f04db5e9d551d69ffa27357a44435fd3af98 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 17 Nov 2023 00:25:16 -0500 Subject: [PATCH 54/59] Simplify bitmasks --- crates/core_simd/src/masks.rs | 42 ++++++++- crates/core_simd/src/masks/bitmask.rs | 69 +++++++++----- crates/core_simd/src/masks/full_masks.rs | 97 +++++++++++--------- crates/core_simd/src/masks/to_bitmask.rs | 111 ----------------------- crates/core_simd/src/swizzle.rs | 35 +++++++ crates/core_simd/tests/masks.rs | 18 ++-- 6 files changed, 183 insertions(+), 189 deletions(-) delete mode 100644 crates/core_simd/src/masks/to_bitmask.rs diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 1199153a5bd..5c0ae303162 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -12,9 +12,6 @@ )] mod mask_impl; -mod to_bitmask; -pub use to_bitmask::{ToBitMask, ToBitMaskArray}; - use crate::simd::{ cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount, }; @@ -262,6 +259,45 @@ pub fn any(self) -> bool { pub fn all(self) -> bool { self.0.all() } + + /// Create a bitmask from a mask. + /// + /// Each bit is set if the corresponding element in the mask is `true`. + /// If the mask contains more than 64 elements, the bitmask is truncated to the first 64. + #[inline] + #[must_use = "method returns a new integer and does not mutate the original value"] + pub fn to_bitmask(self) -> u64 { + self.0.to_bitmask_integer() + } + + /// Create a mask from a bitmask. + /// + /// For each bit, if it is set, the corresponding element in the mask is set to `true`. + /// If the mask contains more than 64 elements, the remainder are set to `false`. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn from_bitmask(bitmask: u64) -> Self { + Self(mask_impl::Mask::from_bitmask_integer(bitmask)) + } + + /// Create a bitmask vector from a mask. + /// + /// Each bit is set if the corresponding element in the mask is `true`. + /// The remaining bits are unset. + #[inline] + #[must_use = "method returns a new integer and does not mutate the original value"] + pub fn to_bitmask_vector(self) -> Simd { + self.0.to_bitmask_vector() + } + + /// Create a mask from a bitmask vector. + /// + /// For each bit, if it is set, the corresponding element in the mask is set to `true`. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn from_bitmask_vector(bitmask: Simd) -> Self { + Self(mask_impl::Mask::from_bitmask_vector(bitmask)) + } } // vector/array conversion diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index aaae28a07be..21d9e49a1b5 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -1,7 +1,7 @@ #![allow(unused_imports)] use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount}; use core::marker::PhantomData; /// A mask where each lane is represented by a single bit. @@ -120,39 +120,64 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { } #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask_array(self) -> [u8; M] { - assert!(core::mem::size_of::() == M); + #[must_use = "method returns a new vector and does not mutate the original value"] + pub fn to_bitmask_vector(self) -> Simd { + let mut bitmask = Self::splat(false).to_int(); - // Safety: converting an integer to an array of bytes of the same size is safe - unsafe { core::mem::transmute_copy(&self.0) } + assert!( + core::mem::size_of::>() + >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() + ); + + // Safety: the bitmask vector is big enough to hold the bitmask + unsafe { + core::ptr::copy_nonoverlapping( + self.0.as_ref().as_ptr(), + bitmask.as_mut_array().as_mut_ptr() as _, + self.0.as_ref().len(), + ); + } + + bitmask } #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_array(bitmask: [u8; M]) -> Self { - assert!(core::mem::size_of::() == M); + pub fn from_bitmask_vector(bitmask: Simd) -> Self { + let mut bytes = as SupportedLaneCount>::BitMask::default(); - // Safety: converting an array of bytes to an integer of the same size is safe - Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData) + assert!( + core::mem::size_of::>() + >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() + ); + + // Safety: the bitmask vector is big enough to hold the bitmask + unsafe { + core::ptr::copy_nonoverlapping( + bitmask.as_array().as_ptr() as _, + bytes.as_mut().as_mut_ptr(), + bytes.as_ref().len(), + ); + } + + Self(bytes, PhantomData) } #[inline] - pub fn to_bitmask_integer(self) -> U - where - super::Mask: ToBitMask, - { - // Safety: these are the same types - unsafe { core::mem::transmute_copy(&self.0) } + pub fn to_bitmask_integer(self) -> u64 { + let mut bitmask = [0u8; 8]; + bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref()); + u64::from_ne_bytes(bitmask) } #[inline] - pub fn from_bitmask_integer(bitmask: U) -> Self - where - super::Mask: ToBitMask, - { - // Safety: these are the same types - unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) } + pub fn from_bitmask_integer(bitmask: u64) -> Self { + let mut bytes = as SupportedLaneCount>::BitMask::default(); + let len = bytes.as_mut().len(); + bytes + .as_mut() + .copy_from_slice(&bitmask.to_ne_bytes()[..len]); + Self(bytes, PhantomData) } #[inline] diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 2aa9272ab46..73a0d898700 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -1,8 +1,7 @@ //! Masks that take up full SIMD vector registers. -use super::{to_bitmask::ToBitMaskArray, MaskElement}; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; +use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount}; #[repr(transparent)] pub struct Mask(Simd) @@ -143,53 +142,64 @@ pub fn convert(self) -> Mask } #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask_array(self) -> [u8; M] - where - super::Mask: ToBitMaskArray, - { + #[must_use = "method returns a new vector and does not mutate the original value"] + pub fn to_bitmask_vector(self) -> Simd { + let mut bitmask = Self::splat(false).to_int(); + // Safety: Bytes is the right size array unsafe { // Compute the bitmask - let bitmask: as ToBitMaskArray>::BitMaskArray = + let mut bytes: as SupportedLaneCount>::BitMask = intrinsics::simd_bitmask(self.0); - // Transmute to the return type - let mut bitmask: [u8; M] = core::mem::transmute_copy(&bitmask); - // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { - for x in bitmask.as_mut() { - *x = x.reverse_bits(); + for x in bytes.as_mut() { + *x = x.reverse_bits() } - }; + } - bitmask + assert!( + core::mem::size_of::>() + >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() + ); + core::ptr::copy_nonoverlapping( + bytes.as_ref().as_ptr(), + bitmask.as_mut_array().as_mut_ptr() as _, + bytes.as_ref().len(), + ); } + + bitmask } #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_array(mut bitmask: [u8; M]) -> Self - where - super::Mask: ToBitMaskArray, - { + pub fn from_bitmask_vector(bitmask: Simd) -> Self { + let mut bytes = as SupportedLaneCount>::BitMask::default(); + // Safety: Bytes is the right size array unsafe { + assert!( + core::mem::size_of::>() + >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() + ); + core::ptr::copy_nonoverlapping( + bitmask.as_array().as_ptr() as _, + bytes.as_mut().as_mut_ptr(), + bytes.as_mut().len(), + ); + // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { - for x in bitmask.as_mut() { + for x in bytes.as_mut() { *x = x.reverse_bits(); } } - // Transmute to the bitmask - let bitmask: as ToBitMaskArray>::BitMaskArray = - core::mem::transmute_copy(&bitmask); - // Compute the regular mask Self::from_int_unchecked(intrinsics::simd_select_bitmask( - bitmask, + bytes, Self::splat(true).to_int(), Self::splat(false).to_int(), )) @@ -197,41 +207,40 @@ pub fn convert(self) -> Mask } #[inline] - pub(crate) fn to_bitmask_integer(self) -> U - where - super::Mask: ToBitMask, - { - // Safety: U is required to be the appropriate bitmask type - let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) }; + pub(crate) fn to_bitmask_integer(self) -> u64 { + let resized = self.to_int().extend::<64>(T::FALSE); + + // SAFETY: `resized` is an integer vector with length 64 + let bitmask: u64 = unsafe { intrinsics::simd_bitmask(resized) }; // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { - bitmask.reverse_bits(N) + bitmask.reverse_bits() } else { bitmask } } #[inline] - pub(crate) fn from_bitmask_integer(bitmask: U) -> Self - where - super::Mask: ToBitMask, - { + pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { // LLVM assumes bit order should match endianness let bitmask = if cfg!(target_endian = "big") { - bitmask.reverse_bits(N) + bitmask.reverse_bits() } else { bitmask }; - // Safety: U is required to be the appropriate bitmask type - unsafe { - Self::from_int_unchecked(intrinsics::simd_select_bitmask( + // SAFETY: `mask` is the correct bitmask type for a u64 bitmask + let mask: Simd = unsafe { + intrinsics::simd_select_bitmask( bitmask, - Self::splat(true).to_int(), - Self::splat(false).to_int(), - )) - } + Simd::::splat(T::TRUE), + Simd::::splat(T::FALSE), + ) + }; + + // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE` + unsafe { Self::from_int_unchecked(mask.extend::(T::FALSE)) } } #[inline] diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs deleted file mode 100644 index 06f09c65aca..00000000000 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ /dev/null @@ -1,111 +0,0 @@ -use super::{mask_impl, Mask, MaskElement}; -use crate::simd::{LaneCount, SupportedLaneCount}; -use core::borrow::{Borrow, BorrowMut}; - -mod sealed { - pub trait Sealed {} -} -pub use sealed::Sealed; - -impl Sealed for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - -/// Converts masks to and from integer bitmasks. -/// -/// Each bit of the bitmask corresponds to a mask element, starting with the LSB. -pub trait ToBitMask: Sealed { - /// The integer bitmask type. - type BitMask; - - /// Converts a mask to a bitmask. - fn to_bitmask(self) -> Self::BitMask; - - /// Converts a bitmask to a mask. - fn from_bitmask(bitmask: Self::BitMask) -> Self; -} - -/// Converts masks to and from byte array bitmasks. -/// -/// Each bit of the bitmask corresponds to a mask element, starting with the LSB of the first byte. -pub trait ToBitMaskArray: Sealed { - /// The bitmask array. - type BitMaskArray: Copy - + Unpin - + Send - + Sync - + AsRef<[u8]> - + AsMut<[u8]> - + Borrow<[u8]> - + BorrowMut<[u8]> - + 'static; - - /// Converts a mask to a bitmask. - fn to_bitmask_array(self) -> Self::BitMaskArray; - - /// Converts a bitmask to a mask. - fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self; -} - -macro_rules! impl_integer { - { $(impl ToBitMask for Mask<_, $lanes:literal>)* } => { - $( - impl ToBitMask for Mask { - type BitMask = $int; - - #[inline] - fn to_bitmask(self) -> $int { - self.0.to_bitmask_integer() - } - - #[inline] - fn from_bitmask(bitmask: $int) -> Self { - Self(mask_impl::Mask::from_bitmask_integer(bitmask)) - } - } - )* - } -} - -macro_rules! impl_array { - { $(impl ToBitMaskArray for Mask<_, $lanes:literal>)* } => { - $( - impl ToBitMaskArray for Mask { - type BitMaskArray = [u8; $int]; - - #[inline] - fn to_bitmask_array(self) -> Self::BitMaskArray { - self.0.to_bitmask_array() - } - - #[inline] - fn from_bitmask_array(bitmask: Self::BitMaskArray) -> Self { - Self(mask_impl::Mask::from_bitmask_array(bitmask)) - } - } - )* - } -} - -impl_integer! { - impl ToBitMask for Mask<_, 1> - impl ToBitMask for Mask<_, 2> - impl ToBitMask for Mask<_, 4> - impl ToBitMask for Mask<_, 8> - impl ToBitMask for Mask<_, 16> - impl ToBitMask for Mask<_, 32> - impl ToBitMask for Mask<_, 64> -} - -impl_array! { - impl ToBitMaskArray for Mask<_, 1> - impl ToBitMaskArray for Mask<_, 2> - impl ToBitMaskArray for Mask<_, 4> - impl ToBitMaskArray for Mask<_, 8> - impl ToBitMaskArray for Mask<_, 16> - impl ToBitMaskArray for Mask<_, 32> - impl ToBitMaskArray for Mask<_, 64> -} diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 6af882c0a0e..e5b3d4444d8 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -349,4 +349,39 @@ impl Swizzle for Odd { Odd::concat_swizzle(self, other), ) } + + /// Extend a vector. + /// + /// Extends the length of a vector, setting the new elements to `value`. + /// If `M` < `N`, truncates the vector to the first `M` elements. + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::u32x4; + /// let x = u32x4::from_array([0, 1, 2, 3]); + /// assert_eq!(x.extend::<8>(9).to_array(), [0, 1, 2, 3, 9, 9, 9, 9]); + /// assert_eq!(x.extend::<2>(9).to_array(), [0, 1]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn extend(self, value: T) -> Simd + where + LaneCount: SupportedLaneCount, + { + struct Extend; + impl Swizzle for Extend { + const INDEX: [usize; M] = const { + let mut index = [0; M]; + let mut i = 0; + while i < M { + index[i] = if i < N { i } else { N }; + i += 1; + } + index + }; + } + Extend::::concat_swizzle(self, Simd::splat(value)) + } } diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 7c1d4c7dd3f..92ee53b3e55 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -13,7 +13,7 @@ mod $type { #[cfg(target_arch = "wasm32")] use wasm_bindgen_test::*; - use core_simd::simd::Mask; + use core_simd::simd::{Mask, Simd}; #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] @@ -72,7 +72,6 @@ fn roundtrip_int_conversion() { #[test] fn roundtrip_bitmask_conversion() { - use core_simd::simd::ToBitMask; let values = [ true, false, false, true, false, false, true, false, true, true, false, false, false, false, false, true, @@ -85,8 +84,6 @@ fn roundtrip_bitmask_conversion() { #[test] fn roundtrip_bitmask_conversion_short() { - use core_simd::simd::ToBitMask; - let values = [ false, false, false, true, ]; @@ -126,16 +123,19 @@ fn cast_impl() } #[test] - fn roundtrip_bitmask_array_conversion() { - use core_simd::simd::ToBitMaskArray; + fn roundtrip_bitmask_vector_conversion() { let values = [ true, false, false, true, false, false, true, false, true, true, false, false, false, false, false, true, ]; let mask = Mask::<$type, 16>::from_array(values); - let bitmask = mask.to_bitmask_array(); - assert_eq!(bitmask, [0b01001001, 0b10000011]); - assert_eq!(Mask::<$type, 16>::from_bitmask_array(bitmask), mask); + let bitmask = mask.to_bitmask_vector(); + if core::mem::size_of::<$type>() == 1 { + assert_eq!(bitmask, Simd::from_array([0b01001001 as _, 0b10000011 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ])); + } else { + assert_eq!(bitmask, Simd::from_array([0b1000001101001001 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])); + } + assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask); } } } From 082e3c8a5da8146b4e3d382d4f84a8a6847dd783 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 17 Nov 2023 10:15:12 -0500 Subject: [PATCH 55/59] Workaround simd_bitmask limitations --- crates/core_simd/src/masks/full_masks.rs | 90 +++++++++++++++++++++--- crates/core_simd/src/swizzle.rs | 16 ++--- crates/core_simd/tests/masks.rs | 9 +-- 3 files changed, 90 insertions(+), 25 deletions(-) diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 73a0d898700..a529490f3a2 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -207,40 +207,108 @@ pub fn from_bitmask_vector(bitmask: Simd) -> Self { } #[inline] - pub(crate) fn to_bitmask_integer(self) -> u64 { - let resized = self.to_int().extend::<64>(T::FALSE); + unsafe fn to_bitmask_impl(self) -> U + where + LaneCount: SupportedLaneCount, + { + let resized = self.to_int().resize::(T::FALSE); - // SAFETY: `resized` is an integer vector with length 64 - let bitmask: u64 = unsafe { intrinsics::simd_bitmask(resized) }; + // Safety: `resized` is an integer vector with length M, which must match T + let bitmask: U = unsafe { intrinsics::simd_bitmask(resized) }; // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { - bitmask.reverse_bits() + bitmask.reverse_bits(M) } else { bitmask } } #[inline] - pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { + unsafe fn from_bitmask_impl(bitmask: U) -> Self + where + LaneCount: SupportedLaneCount, + { // LLVM assumes bit order should match endianness let bitmask = if cfg!(target_endian = "big") { - bitmask.reverse_bits() + bitmask.reverse_bits(M) } else { bitmask }; // SAFETY: `mask` is the correct bitmask type for a u64 bitmask - let mask: Simd = unsafe { + let mask: Simd = unsafe { intrinsics::simd_select_bitmask( bitmask, - Simd::::splat(T::TRUE), - Simd::::splat(T::FALSE), + Simd::::splat(T::TRUE), + Simd::::splat(T::FALSE), ) }; // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE` - unsafe { Self::from_int_unchecked(mask.extend::(T::FALSE)) } + unsafe { Self::from_int_unchecked(mask.resize::(T::FALSE)) } + } + + #[inline] + pub(crate) fn to_bitmask_integer(self) -> u64 { + // TODO modify simd_bitmask to zero-extend output, making this unnecessary + macro_rules! bitmask { + { $($ty:ty: $($len:literal),*;)* } => { + match N { + $($( + // Safety: bitmask matches length + $len => unsafe { self.to_bitmask_impl::<$ty, $len>() as u64 }, + )*)* + // Safety: bitmask matches length + _ => unsafe { self.to_bitmask_impl::() }, + } + } + } + #[cfg(all_lane_counts)] + bitmask! { + u8: 1, 2, 3, 4, 5, 6, 7, 8; + u16: 9, 10, 11, 12, 13, 14, 15, 16; + u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32; + u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64; + } + #[cfg(not(all_lane_counts))] + bitmask! { + u8: 1, 2, 4, 8; + u16: 16; + u32: 32; + u64: 64; + } + } + + #[inline] + pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { + // TODO modify simd_bitmask_select to truncate input, making this unnecessary + macro_rules! bitmask { + { $($ty:ty: $($len:literal),*;)* } => { + match N { + $($( + // Safety: bitmask matches length + $len => unsafe { Self::from_bitmask_impl::<$ty, $len>(bitmask as $ty) }, + )*)* + // Safety: bitmask matches length + _ => unsafe { Self::from_bitmask_impl::(bitmask) }, + } + } + } + #[cfg(all_lane_counts)] + bitmask! { + u8: 1, 2, 3, 4, 5, 6, 7, 8; + u16: 9, 10, 11, 12, 13, 14, 15, 16; + u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32; + u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64; + } + #[cfg(not(all_lane_counts))] + bitmask! { + u8: 1, 2, 4, 8; + u16: 16; + u32: 32; + u64: 64; + } } #[inline] diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index e5b3d4444d8..ec8548d5574 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -350,9 +350,9 @@ impl Swizzle for Odd { ) } - /// Extend a vector. + /// Resize a vector. /// - /// Extends the length of a vector, setting the new elements to `value`. + /// If `M` > `N`, extends the length of a vector, setting the new elements to `value`. /// If `M` < `N`, truncates the vector to the first `M` elements. /// /// ``` @@ -361,17 +361,17 @@ impl Swizzle for Odd { /// # #[cfg(not(feature = "as_crate"))] use core::simd; /// # use simd::u32x4; /// let x = u32x4::from_array([0, 1, 2, 3]); - /// assert_eq!(x.extend::<8>(9).to_array(), [0, 1, 2, 3, 9, 9, 9, 9]); - /// assert_eq!(x.extend::<2>(9).to_array(), [0, 1]); + /// assert_eq!(x.resize::<8>(9).to_array(), [0, 1, 2, 3, 9, 9, 9, 9]); + /// assert_eq!(x.resize::<2>(9).to_array(), [0, 1]); /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn extend(self, value: T) -> Simd + pub fn resize(self, value: T) -> Simd where LaneCount: SupportedLaneCount, { - struct Extend; - impl Swizzle for Extend { + struct Resize; + impl Swizzle for Resize { const INDEX: [usize; M] = const { let mut index = [0; M]; let mut i = 0; @@ -382,6 +382,6 @@ impl Swizzle for Extend { index }; } - Extend::::concat_swizzle(self, Simd::splat(value)) + Resize::::concat_swizzle(self, Simd::splat(value)) } } diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 92ee53b3e55..00fc2a24e27 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -13,7 +13,7 @@ mod $type { #[cfg(target_arch = "wasm32")] use wasm_bindgen_test::*; - use core_simd::simd::{Mask, Simd}; + use core_simd::simd::Mask; #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] @@ -124,17 +124,14 @@ fn cast_impl() #[test] fn roundtrip_bitmask_vector_conversion() { + use core_simd::simd::ToBytes; let values = [ true, false, false, true, false, false, true, false, true, true, false, false, false, false, false, true, ]; let mask = Mask::<$type, 16>::from_array(values); let bitmask = mask.to_bitmask_vector(); - if core::mem::size_of::<$type>() == 1 { - assert_eq!(bitmask, Simd::from_array([0b01001001 as _, 0b10000011 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ])); - } else { - assert_eq!(bitmask, Simd::from_array([0b1000001101001001 as _, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])); - } + assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]); assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask); } } From 0ad68db91a3149885bc62ae11d2d83e7d401fc25 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 17 Nov 2023 19:17:03 -0500 Subject: [PATCH 56/59] Use u8xN for bitmasks --- crates/core_simd/src/masks.rs | 4 +-- crates/core_simd/src/masks/bitmask.rs | 39 ++++-------------------- crates/core_simd/src/masks/full_masks.rs | 27 ++++------------ 3 files changed, 14 insertions(+), 56 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 5c0ae303162..63731342423 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -286,7 +286,7 @@ pub fn from_bitmask(bitmask: u64) -> Self { /// The remaining bits are unset. #[inline] #[must_use = "method returns a new integer and does not mutate the original value"] - pub fn to_bitmask_vector(self) -> Simd { + pub fn to_bitmask_vector(self) -> Simd { self.0.to_bitmask_vector() } @@ -295,7 +295,7 @@ pub fn to_bitmask_vector(self) -> Simd { /// For each bit, if it is set, the corresponding element in the mask is set to `true`. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_vector(bitmask: Simd) -> Self { + pub fn from_bitmask_vector(bitmask: Simd) -> Self { Self(mask_impl::Mask::from_bitmask_vector(bitmask)) } } diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 21d9e49a1b5..6ddff07fea2 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -121,45 +121,18 @@ pub unsafe fn from_int_unchecked(value: Simd) -> Self { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_bitmask_vector(self) -> Simd { - let mut bitmask = Self::splat(false).to_int(); - - assert!( - core::mem::size_of::>() - >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() - ); - - // Safety: the bitmask vector is big enough to hold the bitmask - unsafe { - core::ptr::copy_nonoverlapping( - self.0.as_ref().as_ptr(), - bitmask.as_mut_array().as_mut_ptr() as _, - self.0.as_ref().len(), - ); - } - + pub fn to_bitmask_vector(self) -> Simd { + let mut bitmask = Simd::splat(0); + bitmask.as_mut_array()[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref()); bitmask } #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_vector(bitmask: Simd) -> Self { + pub fn from_bitmask_vector(bitmask: Simd) -> Self { let mut bytes = as SupportedLaneCount>::BitMask::default(); - - assert!( - core::mem::size_of::>() - >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() - ); - - // Safety: the bitmask vector is big enough to hold the bitmask - unsafe { - core::ptr::copy_nonoverlapping( - bitmask.as_array().as_ptr() as _, - bytes.as_mut().as_mut_ptr(), - bytes.as_ref().len(), - ); - } - + let len = bytes.as_ref().len(); + bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]); Self(bytes, PhantomData) } diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index a529490f3a2..0d17e90c128 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -143,8 +143,8 @@ pub fn convert(self) -> Mask #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_bitmask_vector(self) -> Simd { - let mut bitmask = Self::splat(false).to_int(); + pub fn to_bitmask_vector(self) -> Simd { + let mut bitmask = Simd::splat(0); // Safety: Bytes is the right size array unsafe { @@ -159,15 +159,7 @@ pub fn to_bitmask_vector(self) -> Simd { } } - assert!( - core::mem::size_of::>() - >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() - ); - core::ptr::copy_nonoverlapping( - bytes.as_ref().as_ptr(), - bitmask.as_mut_array().as_mut_ptr() as _, - bytes.as_ref().len(), - ); + bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref()); } bitmask @@ -175,20 +167,13 @@ pub fn to_bitmask_vector(self) -> Simd { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_vector(bitmask: Simd) -> Self { + pub fn from_bitmask_vector(bitmask: Simd) -> Self { let mut bytes = as SupportedLaneCount>::BitMask::default(); // Safety: Bytes is the right size array unsafe { - assert!( - core::mem::size_of::>() - >= core::mem::size_of::< as SupportedLaneCount>::BitMask>() - ); - core::ptr::copy_nonoverlapping( - bitmask.as_array().as_ptr() as _, - bytes.as_mut().as_mut_ptr(), - bytes.as_mut().len(), - ); + let len = bytes.as_ref().len(); + bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]); // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { From 62bbb360add75e3ae99b8b6745469671b049269b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 17 Nov 2023 18:16:24 -0500 Subject: [PATCH 57/59] Add first_set --- crates/core_simd/src/masks.rs | 88 +++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 63731342423..7af4517226a 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -13,7 +13,7 @@ mod mask_impl; use crate::simd::{ - cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount, + cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount, }; use core::cmp::Ordering; use core::{fmt, mem}; @@ -35,6 +35,10 @@ fn valid(values: Simd) -> bool fn eq(self, other: Self) -> bool; + fn as_usize(self) -> usize; + + type Unsigned: SimdElement; + const TRUE: Self; const FALSE: Self; @@ -46,10 +50,10 @@ fn valid(values: Simd) -> bool /// /// # Safety /// Type must be a signed integer. -pub unsafe trait MaskElement: SimdElement + Sealed {} +pub unsafe trait MaskElement: SimdElement + SimdCast + Sealed {} macro_rules! impl_element { - { $ty:ty } => { + { $ty:ty, $unsigned:ty } => { impl Sealed for $ty { #[inline] fn valid(value: Simd) -> bool @@ -62,6 +66,13 @@ fn valid(value: Simd) -> bool #[inline] fn eq(self, other: Self) -> bool { self == other } + #[inline] + fn as_usize(self) -> usize { + self as usize + } + + type Unsigned = $unsigned; + const TRUE: Self = -1; const FALSE: Self = 0; } @@ -71,11 +82,11 @@ unsafe impl MaskElement for $ty {} } } -impl_element! { i8 } -impl_element! { i16 } -impl_element! { i32 } -impl_element! { i64 } -impl_element! { isize } +impl_element! { i8, u8 } +impl_element! { i16, u16 } +impl_element! { i32, u32 } +impl_element! { i64, u64 } +impl_element! { isize, usize } /// A SIMD vector mask for `N` elements of width specified by `Element`. /// @@ -298,6 +309,67 @@ pub fn to_bitmask_vector(self) -> Simd { pub fn from_bitmask_vector(bitmask: Simd) -> Self { Self(mask_impl::Mask::from_bitmask_vector(bitmask)) } + + /// Find the index of the first set element. + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::mask32x8; + /// assert_eq!(mask32x8::splat(false).first_set(), None); + /// assert_eq!(mask32x8::splat(true).first_set(), Some(0)); + /// + /// let mask = mask32x8::from_array([false, true, false, false, true, false, false, true]); + /// assert_eq!(mask.first_set(), Some(1)); + /// ``` + #[inline] + #[must_use = "method returns the index and does not mutate the original value"] + pub fn first_set(self) -> Option { + // If bitmasks are efficient, using them is better + if cfg!(target_feature = "sse") && N <= 64 { + let tz = self.to_bitmask().trailing_zeros(); + return if tz == 64 { None } else { Some(tz as usize) }; + } + + // To find the first set index: + // * create a vector 0..N + // * replace unset mask elements in that vector with -1 + // * perform _unsigned_ reduce-min + // * check if the result is -1 or an index + + let index = Simd::from_array( + const { + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = i; + i += 1; + } + index + }, + ); + + // Safety: the input and output are integer vectors + let index: Simd = unsafe { intrinsics::simd_cast(index) }; + + let masked_index = self.select(index, Self::splat(true).to_int()); + + // Safety: the input and output are integer vectors + let masked_index: Simd = unsafe { intrinsics::simd_cast(masked_index) }; + + // Safety: the input is an integer vector + let min_index: T::Unsigned = unsafe { intrinsics::simd_reduce_min(masked_index) }; + + // Safety: the return value is the unsigned version of T + let min_index: T = unsafe { core::mem::transmute_copy(&min_index) }; + + if min_index.eq(T::TRUE) { + None + } else { + Some(min_index.as_usize()) + } + } } // vector/array conversion From 64ea0884efbae271c9b0d1e4364bab9222f54d67 Mon Sep 17 00:00:00 2001 From: cui fliter Date: Sun, 16 Jul 2023 00:37:30 +0800 Subject: [PATCH 58/59] remove repetitive words Signed-off-by: cui fliter --- crates/core_simd/examples/dot_product.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index e5815888bb7..f047010a65c 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -130,7 +130,7 @@ pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 { } // This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that. -// Notice the the use of `mul_add`, which can do a multiply and an add operation ber iteration. +// Notice the use of `mul_add`, which can do a multiply and an add operation ber iteration. pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 { a.array_chunks::<4>() .map(|&a| f32x4::from_array(a)) From 5739caae279262440c28321845bfbf286e6dd1c1 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 19 Nov 2023 00:56:00 -0500 Subject: [PATCH 59/59] Follow-up fixes for to_bitmask --- crates/core_simd/src/masks.rs | 23 ++++++++ crates/core_simd/src/masks/full_masks.rs | 74 ++++++++---------------- 2 files changed, 47 insertions(+), 50 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 7af4517226a..0623d2bf3d1 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -295,6 +295,16 @@ pub fn from_bitmask(bitmask: u64) -> Self { /// /// Each bit is set if the corresponding element in the mask is `true`. /// The remaining bits are unset. + /// + /// The bits are packed into the first N bits of the vector: + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::mask32x8; + /// let mask = mask32x8::from_array([true, false, true, false, false, false, true, false]); + /// assert_eq!(mask.to_bitmask_vector()[0], 0b01000101); + /// ``` #[inline] #[must_use = "method returns a new integer and does not mutate the original value"] pub fn to_bitmask_vector(self) -> Simd { @@ -304,6 +314,19 @@ pub fn to_bitmask_vector(self) -> Simd { /// Create a mask from a bitmask vector. /// /// For each bit, if it is set, the corresponding element in the mask is set to `true`. + /// + /// The bits are packed into the first N bits of the vector: + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{mask32x8, u8x8}; + /// let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]); + /// assert_eq!( + /// mask32x8::from_bitmask_vector(bitmask), + /// mask32x8::from_array([true, false, true, false, false, false, true, false]), + /// ); + /// ``` #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask_vector(bitmask: Simd) -> Self { diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 0d17e90c128..63964f455e0 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -237,62 +237,36 @@ unsafe fn from_bitmask_impl(bitmask: U) -> Self #[inline] pub(crate) fn to_bitmask_integer(self) -> u64 { // TODO modify simd_bitmask to zero-extend output, making this unnecessary - macro_rules! bitmask { - { $($ty:ty: $($len:literal),*;)* } => { - match N { - $($( - // Safety: bitmask matches length - $len => unsafe { self.to_bitmask_impl::<$ty, $len>() as u64 }, - )*)* - // Safety: bitmask matches length - _ => unsafe { self.to_bitmask_impl::() }, - } - } - } - #[cfg(all_lane_counts)] - bitmask! { - u8: 1, 2, 3, 4, 5, 6, 7, 8; - u16: 9, 10, 11, 12, 13, 14, 15, 16; - u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32; - u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64; - } - #[cfg(not(all_lane_counts))] - bitmask! { - u8: 1, 2, 4, 8; - u16: 16; - u32: 32; - u64: 64; + if N <= 8 { + // Safety: bitmask matches length + unsafe { self.to_bitmask_impl::() as u64 } + } else if N <= 16 { + // Safety: bitmask matches length + unsafe { self.to_bitmask_impl::() as u64 } + } else if N <= 32 { + // Safety: bitmask matches length + unsafe { self.to_bitmask_impl::() as u64 } + } else { + // Safety: bitmask matches length + unsafe { self.to_bitmask_impl::() } } } #[inline] pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { // TODO modify simd_bitmask_select to truncate input, making this unnecessary - macro_rules! bitmask { - { $($ty:ty: $($len:literal),*;)* } => { - match N { - $($( - // Safety: bitmask matches length - $len => unsafe { Self::from_bitmask_impl::<$ty, $len>(bitmask as $ty) }, - )*)* - // Safety: bitmask matches length - _ => unsafe { Self::from_bitmask_impl::(bitmask) }, - } - } - } - #[cfg(all_lane_counts)] - bitmask! { - u8: 1, 2, 3, 4, 5, 6, 7, 8; - u16: 9, 10, 11, 12, 13, 14, 15, 16; - u32: 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32; - u64: 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64; - } - #[cfg(not(all_lane_counts))] - bitmask! { - u8: 1, 2, 4, 8; - u16: 16; - u32: 32; - u64: 64; + if N <= 8 { + // Safety: bitmask matches length + unsafe { Self::from_bitmask_impl::(bitmask as u8) } + } else if N <= 16 { + // Safety: bitmask matches length + unsafe { Self::from_bitmask_impl::(bitmask as u16) } + } else if N <= 32 { + // Safety: bitmask matches length + unsafe { Self::from_bitmask_impl::(bitmask as u32) } + } else { + // Safety: bitmask matches length + unsafe { Self::from_bitmask_impl::(bitmask) } } }