From 36e198b97a0615df965df5fe88bb052bd1bc92b1 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 13 Sep 2021 00:48:26 +0000 Subject: [PATCH 001/161] Use new bitmask intrinsics with byte arrays --- crates/core_simd/src/lane_count.rs | 9 --------- crates/core_simd/src/masks/bitmask.rs | 20 +++++++------------- crates/core_simd/src/masks/full_masks.rs | 19 ++----------------- 3 files changed, 9 insertions(+), 39 deletions(-) diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs index b017e7d137e..4a5dc80049a 100644 --- a/crates/core_simd/src/lane_count.rs +++ b/crates/core_simd/src/lane_count.rs @@ -15,34 +15,25 @@ impl LaneCount { pub trait SupportedLaneCount: Sealed { #[doc(hidden)] type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>; - - #[doc(hidden)] - type IntBitMask; } impl Sealed for LaneCount {} impl SupportedLaneCount for LaneCount<1> { type BitMask = [u8; 1]; - type IntBitMask = u8; } impl SupportedLaneCount for LaneCount<2> { type BitMask = [u8; 1]; - type IntBitMask = u8; } impl SupportedLaneCount for LaneCount<4> { type BitMask = [u8; 1]; - type IntBitMask = u8; } impl SupportedLaneCount for LaneCount<8> { type BitMask = [u8; 1]; - type IntBitMask = u8; } impl SupportedLaneCount for LaneCount<16> { type BitMask = [u8; 2]; - type IntBitMask = u16; } impl SupportedLaneCount for LaneCount<32> { type BitMask = [u8; 4]; - type IntBitMask = u32; } diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 2689e1a88a8..45990e9ce5f 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -1,3 +1,4 @@ +#![allow(unused_imports)] use super::MaskElement; use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SupportedLaneCount}; @@ -101,24 +102,17 @@ where #[inline] pub fn to_int(self) -> Simd { unsafe { - let mask: as SupportedLaneCount>::IntBitMask = - core::mem::transmute_copy(&self); - intrinsics::simd_select_bitmask(mask, Simd::splat(T::TRUE), Simd::splat(T::FALSE)) + crate::intrinsics::simd_select_bitmask( + self.0, + Simd::splat(T::TRUE), + Simd::splat(T::FALSE), + ) } } #[inline] pub unsafe fn from_int_unchecked(value: Simd) -> Self { - // TODO remove the transmute when rustc is more flexible - assert_eq!( - core::mem::size_of::< as SupportedLaneCount>::BitMask>(), - core::mem::size_of::< as SupportedLaneCount>::IntBitMask>(), - ); - unsafe { - let mask: as SupportedLaneCount>::IntBitMask = - intrinsics::simd_bitmask(value); - Self(core::mem::transmute_copy(&mask), PhantomData) - } + unsafe { Self(crate::intrinsics::simd_bitmask(value), PhantomData) } } #[cfg(feature = "generic_const_exprs")] diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index dd981cedb93..0f1edf9d2f5 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -106,15 +106,8 @@ where #[inline] pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { unsafe { - // TODO remove the transmute when rustc can use arrays of u8 as bitmasks - assert_eq!( - core::mem::size_of::< as SupportedLaneCount>::IntBitMask>(), - LaneCount::::BITMASK_LEN, - ); - let bitmask: as SupportedLaneCount>::IntBitMask = - intrinsics::simd_bitmask(self.0); let mut bitmask: [u8; LaneCount::::BITMASK_LEN] = - core::mem::transmute_copy(&bitmask); + crate::intrinsics::simd_bitmask(self.0); // There is a bug where LLVM appears to implement this operation with the wrong // bit order. @@ -142,15 +135,7 @@ where } } - // TODO remove the transmute when rustc can use arrays of u8 as bitmasks - assert_eq!( - core::mem::size_of::< as SupportedLaneCount>::IntBitMask>(), - LaneCount::::BITMASK_LEN, - ); - let bitmask: as SupportedLaneCount>::IntBitMask = - core::mem::transmute_copy(&bitmask); - - Self::from_int_unchecked(intrinsics::simd_select_bitmask( + Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask( bitmask, Self::splat(true).to_int(), Self::splat(false).to_int(), From 429e0b66a24399b5e6746cc8bff27921ecde7370 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sun, 14 Nov 2021 12:41:16 -0500 Subject: [PATCH 002/161] Update CONTRIBUTING.md for the fact that Travis is no longer used --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f9ba12d3a1b..9612fe871c6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ SIMD can be quite complex, and even a "simple" issue can be huge. If an issue is ## CI -We currently have 2 CI matrices through Travis CI and GitHub Actions that will automatically build and test your change in order to verify that `std::simd`'s portable API is, in fact, portable. If your change builds locally, but does not build on either, this is likely due to a platform-specific concern that your code has not addressed. Please consult the build logs and address the error, or ask for help if you need it. +We currently use GitHub Actions which will automatically build and test your change in order to verify that `std::simd`'s portable API is, in fact, portable. If your change builds locally, but does not build in CI, this is likely due to a platform-specific concern that your code has not addressed. Please consult the build logs and address the error, or ask for help if you need it. ## Beyond stdsimd From f7b03585737b18782c5697881e0a7cb04fe8e462 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sat, 13 Nov 2021 14:38:26 -0800 Subject: [PATCH 003/161] Sprinkle the crate with #[must_use] --- crates/core_simd/src/comparisons.rs | 6 ++++++ crates/core_simd/src/masks.rs | 24 ++++++++++++++++++++++++ crates/core_simd/src/masks/bitmask.rs | 13 +++++++++++++ crates/core_simd/src/masks/full_masks.rs | 15 +++++++++++++++ crates/core_simd/src/select.rs | 3 +++ crates/core_simd/src/swizzle.rs | 9 +++++++++ crates/core_simd/src/vector/float.rs | 20 ++++++++++++++++++++ 7 files changed, 90 insertions(+) diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs index 8c51baca8ed..edef5af3687 100644 --- a/crates/core_simd/src/comparisons.rs +++ b/crates/core_simd/src/comparisons.rs @@ -8,12 +8,14 @@ where { /// Test if each lane is equal to the corresponding lane in `other`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_eq(self, other: Self) -> Mask { unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } } /// Test if each lane is not equal to the corresponding lane in `other`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_ne(self, other: Self) -> Mask { unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } } @@ -26,24 +28,28 @@ where { /// Test if each lane is less than the corresponding lane in `other`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_lt(self, other: Self) -> Mask { unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } } /// Test if each lane is greater than the corresponding lane in `other`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_gt(self, other: Self) -> Mask { unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } } /// Test if each lane is less than or equal to the corresponding lane in `other`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_le(self, other: Self) -> Mask { unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } } /// Test if each lane is greater than or equal to the corresponding lane in `other`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_ge(self, other: Self) -> Mask { unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } } diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index d460da0d04f..191e9690313 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -129,6 +129,7 @@ where /// # Safety /// All lanes must be either 0 or -1. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } } @@ -139,6 +140,7 @@ where /// # Panics /// Panics if any lane is not 0 or -1. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_int(value: Simd) -> Self { assert!(T::valid(value), "all values must be either 0 or -1",); unsafe { Self::from_int_unchecked(value) } @@ -147,6 +149,7 @@ where /// Converts the mask to a vector of integers, where 0 represents `false` and -1 /// represents `true`. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_int(self) -> Simd { self.0.to_int() } @@ -156,6 +159,7 @@ where /// # Safety /// `lane` must be less than `LANES`. #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, lane: usize) -> bool { unsafe { self.0.test_unchecked(lane) } } @@ -165,6 +169,7 @@ where /// # Panics /// Panics if `lane` is greater than or equal to the number of lanes in the vector. #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub fn test(&self, lane: usize) -> bool { assert!(lane < LANES, "lane index out of range"); unsafe { self.test_unchecked(lane) } @@ -195,24 +200,30 @@ where /// Convert this mask to a bitmask, with one bit set per lane. #[cfg(feature = "generic_const_exprs")] + #[inline] + #[must_use = "method returns a new array and does not mutate the original value"] pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { self.0.to_bitmask() } /// Convert a bitmask to a mask. #[cfg(feature = "generic_const_exprs")] + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask(bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { Self(mask_impl::Mask::from_bitmask(bitmask)) } /// Returns true if any lane is set, or false otherwise. #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { self.0.any() } /// Returns true if all lanes are set, or false otherwise. #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub fn all(self) -> bool { self.0.all() } @@ -245,6 +256,7 @@ where LaneCount: SupportedLaneCount, { #[inline] + #[must_use = "method returns a defaulted mask with all lanes set to false (0)"] fn default() -> Self { Self::splat(false) } @@ -256,6 +268,7 @@ where LaneCount: SupportedLaneCount, { #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] fn eq(&self, other: &Self) -> bool { self.0 == other.0 } @@ -267,6 +280,7 @@ where LaneCount: SupportedLaneCount, { #[inline] + #[must_use = "method returns a new Ordering and does not mutate the original value"] fn partial_cmp(&self, other: &Self) -> Option { self.0.partial_cmp(&other.0) } @@ -291,6 +305,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(self, rhs: Self) -> Self { Self(self.0 & rhs.0) } @@ -303,6 +318,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(self, rhs: bool) -> Self { self & Self::splat(rhs) } @@ -315,6 +331,7 @@ where { type Output = Mask; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(self, rhs: Mask) -> Mask { Mask::splat(self) & rhs } @@ -327,6 +344,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(self, rhs: Self) -> Self { Self(self.0 | rhs.0) } @@ -339,6 +357,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(self, rhs: bool) -> Self { self | Self::splat(rhs) } @@ -351,6 +370,7 @@ where { type Output = Mask; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(self, rhs: Mask) -> Mask { Mask::splat(self) | rhs } @@ -363,6 +383,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(self, rhs: Self) -> Self::Output { Self(self.0 ^ rhs.0) } @@ -375,6 +396,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(self, rhs: bool) -> Self::Output { self ^ Self::splat(rhs) } @@ -387,6 +409,7 @@ where { type Output = Mask; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(self, rhs: Mask) -> Self::Output { Mask::splat(self) ^ rhs } @@ -399,6 +422,7 @@ where { type Output = Mask; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn not(self) -> Self::Output { Self(!self.0) } diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 45990e9ce5f..4c964cb52e1 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -74,6 +74,7 @@ where LaneCount: SupportedLaneCount, { #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn splat(value: bool) -> Self { let mut mask = as SupportedLaneCount>::BitMask::default(); if value { @@ -88,6 +89,7 @@ where } #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, lane: usize) -> bool { (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0 } @@ -100,6 +102,7 @@ where } #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_int(self) -> Simd { unsafe { crate::intrinsics::simd_select_bitmask( @@ -111,12 +114,14 @@ where } #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { unsafe { Self(crate::intrinsics::simd_bitmask(value), PhantomData) } } #[cfg(feature = "generic_const_exprs")] #[inline] + #[must_use = "method returns a new array and does not mutate the original value"] pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { // Safety: these are the same type and we are laundering the generic unsafe { core::mem::transmute_copy(&self.0) } @@ -124,12 +129,14 @@ where #[cfg(feature = "generic_const_exprs")] #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask(bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { // Safety: these are the same type and we are laundering the generic Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData) } #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn convert(self) -> Mask where U: MaskElement, @@ -138,11 +145,13 @@ where } #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { self != Self::splat(false) } #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub fn all(self) -> bool { self == Self::splat(true) } @@ -156,6 +165,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(mut self, rhs: Self) -> Self { for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { *l &= r; @@ -172,6 +182,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(mut self, rhs: Self) -> Self { for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { *l |= r; @@ -187,6 +198,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(mut self, rhs: Self) -> Self::Output { for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { *l ^= r; @@ -202,6 +214,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn not(mut self) -> Self::Output { for x in self.0.as_mut() { *x = !*x; diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 0f1edf9d2f5..5421ccbe3d8 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -23,6 +23,7 @@ where LaneCount: SupportedLaneCount, { #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn clone(&self) -> Self { *self } @@ -70,11 +71,14 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn splat(value: bool) -> Self { Self(Simd::splat(if value { T::TRUE } else { T::FALSE })) } #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, lane: usize) -> bool { T::eq(self.0[lane], T::TRUE) } @@ -85,16 +89,19 @@ where } #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_int(self) -> Simd { self.0 } #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { Self(value) } #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn convert(self) -> Mask where U: MaskElement, @@ -104,6 +111,7 @@ where #[cfg(feature = "generic_const_exprs")] #[inline] + #[must_use = "method returns a new array and does not mutate the original value"] pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { unsafe { let mut bitmask: [u8; LaneCount::::BITMASK_LEN] = @@ -124,6 +132,7 @@ where #[cfg(feature = "generic_const_exprs")] #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask(mut bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { unsafe { // There is a bug where LLVM appears to implement this operation with the wrong @@ -144,11 +153,13 @@ where } #[inline] + #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { unsafe { intrinsics::simd_reduce_any(self.to_int()) } } #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn all(self) -> bool { unsafe { intrinsics::simd_reduce_all(self.to_int()) } } @@ -171,6 +182,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(self, rhs: Self) -> Self { unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) } } @@ -183,6 +195,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(self, rhs: Self) -> Self { unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) } } @@ -195,6 +208,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(self, rhs: Self) -> Self { unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) } } @@ -207,6 +221,7 @@ where { type Output = Self; #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] fn not(self) -> Self::Output { Self::splat(true) ^ self } diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index d976231a03a..5d696ebf76e 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -17,6 +17,7 @@ where LaneCount: SupportedLaneCount, { #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] fn select(mask: Mask, true_values: Self, false_values: Self) -> Self { unsafe { intrinsics::simd_select(mask.to_int(), true_values, false_values) } } @@ -35,6 +36,7 @@ where LaneCount: SupportedLaneCount, { #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] fn select(mask: Self, true_values: Self, false_values: Self) -> Self { mask & true_values | !mask & false_values } @@ -80,6 +82,7 @@ where /// assert_eq!(c.to_array(), [true, false, true, false]); /// ``` #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn select>(self, true_values: S, false_values: S) -> S { S::select(self, true_values, false_values) } diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 62cda68f0a9..bdc489774a5 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -87,6 +87,8 @@ pub trait Swizzle { /// Create a new vector from the lanes of `vector`. /// /// Lane `i` of the output is `vector[Self::INDEX[i]]`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] fn swizzle(vector: Simd) -> Simd where T: SimdElement, @@ -106,6 +108,8 @@ pub trait Swizzle2 { /// /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is /// `Second(j)`. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] fn swizzle2( first: Simd, second: Simd, @@ -182,6 +186,7 @@ where { /// Reverse the order of the lanes in the vector. #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn reverse(self) -> Self { const fn reverse_index() -> [usize; LANES] { let mut index = [0; LANES]; @@ -206,6 +211,7 @@ where /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`, /// the element previously in lane `OFFSET` will become the first element in the slice. #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn rotate_lanes_left(self) -> Self { const fn rotate_index() -> [usize; LANES] { let offset = OFFSET % LANES; @@ -231,6 +237,7 @@ where /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`, /// the element previously at index `LANES - OFFSET` will become the first element in the slice. #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn rotate_lanes_right(self) -> Self { const fn rotate_index() -> [usize; LANES] { let offset = LANES - OFFSET % LANES; @@ -273,6 +280,7 @@ where /// assert_eq!(y.to_array(), [2, 6, 3, 7]); /// ``` #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn interleave(self, other: Self) -> (Self, Self) { const fn lo() -> [Which; LANES] { let mut idx = [Which::First(0); LANES]; @@ -336,6 +344,7 @@ where /// assert_eq!(y.to_array(), [4, 5, 6, 7]); /// ``` #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn deinterleave(self, other: Self) -> (Self, Self) { const fn even() -> [Which; LANES] { let mut idx = [Which::First(0); LANES]; diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index c09d0ac84d2..4a4b23238c4 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -15,6 +15,7 @@ macro_rules! impl_float_vector { /// Raw transmutation to an unsigned integer vector type with the /// same size and number of lanes. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_bits(self) -> Simd<$bits_ty, LANES> { assert_eq!(core::mem::size_of::(), core::mem::size_of::>()); unsafe { core::mem::transmute_copy(&self) } @@ -23,6 +24,7 @@ macro_rules! impl_float_vector { /// Raw transmutation from an unsigned integer vector type with the /// same size and number of lanes. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self { assert_eq!(core::mem::size_of::(), core::mem::size_of::>()); unsafe { core::mem::transmute_copy(&bits) } @@ -31,6 +33,7 @@ macro_rules! impl_float_vector { /// Produces a vector where every lane has the absolute value of the /// equivalently-indexed lane in `self`. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn abs(self) -> Self { unsafe { intrinsics::simd_fabs(self) } } @@ -44,6 +47,7 @@ macro_rules! impl_float_vector { /// hardware in mind. #[cfg(feature = "std")] #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn mul_add(self, a: Self, b: Self) -> Self { unsafe { intrinsics::simd_fma(self, a, b) } } @@ -51,6 +55,7 @@ macro_rules! impl_float_vector { /// Produces a vector where every lane has the square root value /// of the equivalently-indexed lane in `self` #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] #[cfg(feature = "std")] pub fn sqrt(self) -> Self { unsafe { intrinsics::simd_fsqrt(self) } @@ -58,12 +63,14 @@ macro_rules! impl_float_vector { /// Takes the reciprocal (inverse) of each lane, `1/x`. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn recip(self) -> Self { Self::splat(1.0) / self } /// Converts each lane from radians to degrees. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_degrees(self) -> Self { // to_degrees uses a special constant for better precision, so extract that constant self * Self::splat(<$type>::to_degrees(1.)) @@ -71,6 +78,7 @@ macro_rules! impl_float_vector { /// Converts each lane from degrees to radians. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_radians(self) -> Self { self * Self::splat(<$type>::to_radians(1.)) } @@ -78,6 +86,7 @@ macro_rules! impl_float_vector { /// Returns true for each lane if it has a positive sign, including /// `+0.0`, `NaN`s with positive sign bit and positive infinity. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_sign_positive(self) -> Mask<$mask_ty, LANES> { !self.is_sign_negative() } @@ -85,6 +94,7 @@ macro_rules! impl_float_vector { /// Returns true for each lane if it has a negative sign, including /// `-0.0`, `NaN`s with negative sign bit and negative infinity. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_sign_negative(self) -> Mask<$mask_ty, LANES> { let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1); sign_bits.lanes_gt(Simd::splat(0)) @@ -92,24 +102,28 @@ macro_rules! impl_float_vector { /// Returns true for each lane if its value is `NaN`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_nan(self) -> Mask<$mask_ty, LANES> { self.lanes_ne(self) } /// Returns true for each lane if its value is positive infinity or negative infinity. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_infinite(self) -> Mask<$mask_ty, LANES> { self.abs().lanes_eq(Self::splat(<$type>::INFINITY)) } /// Returns true for each lane if its value is neither infinite nor `NaN`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_finite(self) -> Mask<$mask_ty, LANES> { self.abs().lanes_lt(Self::splat(<$type>::INFINITY)) } /// Returns true for each lane if its value is subnormal. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> { self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0)) } @@ -117,6 +131,7 @@ macro_rules! impl_float_vector { /// Returns true for each lane if its value is neither neither zero, infinite, /// subnormal, or `NaN`. #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_normal(self) -> Mask<$mask_ty, LANES> { !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite()) } @@ -127,6 +142,7 @@ macro_rules! impl_float_vector { /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY` /// * `NAN` if the number is `NAN` #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn signum(self) -> Self { self.is_nan().select(Self::splat(<$type>::NAN), Self::splat(1.0).copysign(self)) } @@ -135,6 +151,7 @@ macro_rules! impl_float_vector { /// /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn copysign(self, sign: Self) -> Self { let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits(); let magnitude = self.to_bits() & !Self::splat(-0.).to_bits(); @@ -145,6 +162,7 @@ macro_rules! impl_float_vector { /// /// If one of the values is `NAN`, then the other value is returned. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn min(self, other: Self) -> Self { // TODO consider using an intrinsic self.is_nan().select( @@ -157,6 +175,7 @@ macro_rules! impl_float_vector { /// /// If one of the values is `NAN`, then the other value is returned. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn max(self, other: Self) -> Self { // TODO consider using an intrinsic self.is_nan().select( @@ -171,6 +190,7 @@ macro_rules! impl_float_vector { /// greater than `max`, and the corresponding lane in `min` if the lane is less /// than `min`. Otherwise returns the lane in `self`. #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] pub fn clamp(self, min: Self, max: Self) -> Self { assert!( min.lanes_le(max).all(), From 9129ae651f744328eb691016d59163832ef0c7e9 Mon Sep 17 00:00:00 2001 From: Proloy Mishra <67726964+pro465@users.noreply.github.com> Date: Mon, 15 Nov 2021 18:36:21 +0530 Subject: [PATCH 004/161] Fix outdated workflow badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index da536a4d6f2..db0af2da606 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # The Rust standard library's portable SIMD API -[![Build Status](https://travis-ci.com/rust-lang/portable-simd.svg?branch=master)](https://travis-ci.com/rust-lang/portable-simd) +![Build Status](https://github.com/rust-lang/portable-simd/actions/workflows/ci.yml/badge.svg?branch=master) Code repository for the [Portable SIMD Project Group](https://github.com/rust-lang/project-portable-simd). Please refer to [CONTRIBUTING.md](./CONTRIBUTING.md) for our contributing guidelines. From ced3a05526ce70583f827b5d99a69f436126af20 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 21 Nov 2021 14:35:25 -0800 Subject: [PATCH 005/161] Attempt to support to 64 lanes --- crates/core_simd/src/lane_count.rs | 3 +++ crates/test_helpers/src/lib.rs | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs index 4a5dc80049a..3b316f12b3e 100644 --- a/crates/core_simd/src/lane_count.rs +++ b/crates/core_simd/src/lane_count.rs @@ -37,3 +37,6 @@ impl SupportedLaneCount for LaneCount<16> { impl SupportedLaneCount for LaneCount<32> { type BitMask = [u8; 4]; } +impl SupportedLaneCount for LaneCount<64> { + type BitMask = [u8; 8]; +} diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 5c6478876f3..7edd6096381 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -376,6 +376,12 @@ macro_rules! test_lanes { fn lanes_32() { implementation::<32>(); } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + fn lanes_64() { + implementation::<64>(); + } } )* } @@ -431,6 +437,12 @@ macro_rules! test_lanes_panic { fn lanes_32() { implementation::<32>(); } + + #[test] + #[should_panic] + fn lanes_64() { + implementation::<64>(); + } } )* } From 0a6992f5bfb6a2e879d23ff015ae27e2534095aa Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Nov 2021 16:15:19 -0800 Subject: [PATCH 006/161] impl deref.rs<&Self> for Simd Instead of implementing each "deref" pattern for every single scalar, we can use type parameters for Simd operating on &Self. We can use a macro, but keep it cleaner and more explicit. --- crates/core_simd/src/ops.rs | 62 +++------------------------ crates/core_simd/src/ops/deref.rs | 70 +++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 56 deletions(-) create mode 100644 crates/core_simd/src/ops/deref.rs diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 5d7af474caf..f5683ebb2c0 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -1,5 +1,11 @@ use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use core::ops::{Add, Mul}; +use core::ops::{BitAnd, BitOr, BitXor}; +use core::ops::{Div, Rem, Sub}; +use core::ops::{Shl, Shr}; + +mod deref; impl core::ops::Index for Simd where @@ -57,42 +63,6 @@ macro_rules! impl_ref_ops { $(#[$attrs])* fn $fn($self_tok, $rhs_arg: $rhs_arg_ty) -> Self::Output $body } - - impl core::ops::$trait<&'_ $rhs> for $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - type Output = <$type as core::ops::$trait<$rhs>>::Output; - - $(#[$attrs])* - fn $fn($self_tok, $rhs_arg: &$rhs) -> Self::Output { - core::ops::$trait::$fn($self_tok, *$rhs_arg) - } - } - - impl core::ops::$trait<$rhs> for &'_ $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - type Output = <$type as core::ops::$trait<$rhs>>::Output; - - $(#[$attrs])* - fn $fn($self_tok, $rhs_arg: $rhs) -> Self::Output { - core::ops::$trait::$fn(*$self_tok, $rhs_arg) - } - } - - impl core::ops::$trait<&'_ $rhs> for &'_ $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - type Output = <$type as core::ops::$trait<$rhs>>::Output; - - $(#[$attrs])* - fn $fn($self_tok, $rhs_arg: &$rhs) -> Self::Output { - core::ops::$trait::$fn(*$self_tok, *$rhs_arg) - } - } }; // binary assignment op @@ -112,16 +82,6 @@ macro_rules! impl_ref_ops { $(#[$attrs])* fn $fn(&mut $self_tok, $rhs_arg: $rhs_arg_ty) $body } - - impl core::ops::$trait<&'_ $rhs> for $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - $(#[$attrs])* - fn $fn(&mut $self_tok, $rhs_arg: &$rhs_arg_ty) { - core::ops::$trait::$fn($self_tok, *$rhs_arg) - } - } }; // unary op @@ -141,16 +101,6 @@ macro_rules! impl_ref_ops { type Output = $output; fn $fn($self_tok) -> Self::Output $body } - - impl core::ops::$trait for &'_ $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - type Output = <$type as core::ops::$trait>::Output; - fn $fn($self_tok) -> Self::Output { - core::ops::$trait::$fn(*$self_tok) - } - } } } diff --git a/crates/core_simd/src/ops/deref.rs b/crates/core_simd/src/ops/deref.rs new file mode 100644 index 00000000000..1138b9494f6 --- /dev/null +++ b/crates/core_simd/src/ops/deref.rs @@ -0,0 +1,70 @@ +//! This module hacks in "implicit deref" for Simd's operators. +//! Ideally, Rust would take care of this itself, +//! and method calls usually handle the LHS implicitly. +//! So, we'll manually deref the RHS. +use super::*; + +macro_rules! deref_ops { + ($(impl $trait:ident<&Self> for Simd { + fn $call:ident(rhs: &Self) + })*) => { + $(impl $trait<&Self> for Simd + where + Self: $trait, + T: SimdElement, + LaneCount: SupportedLaneCount, + { + type Output = Self; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: &Self) -> Self::Output { + self.$call(*rhs) + } + })* + } +} + +deref_ops! { + // Arithmetic + impl Add<&Self> for Simd { + fn add(rhs: &Self) + } + + impl Mul<&Self> for Simd { + fn mul(rhs: &Self) + } + + impl Sub<&Self> for Simd { + fn sub(rhs: &Self) + } + + impl Div<&Self> for Simd { + fn div(rhs: &Self) + } + + impl Rem<&Self> for Simd { + fn rem(rhs: &Self) + } + + // Bitops + impl BitAnd<&Self> for Simd { + fn bitand(rhs: &Self) + } + + impl BitOr<&Self> for Simd { + fn bitor(rhs: &Self) + } + + impl BitXor<&Self> for Simd { + fn bitxor(rhs: &Self) + } + + impl Shl<&Self> for Simd { + fn shl(rhs: &Self) + } + + impl Shr<&Self> for Simd { + fn shr(rhs: &Self) + } +} From 51ff9259256f8db9b5491777f0f6cce92b11bde9 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Nov 2021 16:43:02 -0800 Subject: [PATCH 007/161] impl assign.rs for Simd Instead of implementing {Op}Assign traits for individual scalar type args to Simd<_, _>, use parametric impls that reassert the bounds of the binary op. --- crates/core_simd/src/ops.rs | 166 +++-------------------------- crates/core_simd/src/ops/assign.rs | 124 +++++++++++++++++++++ 2 files changed, 136 insertions(+), 154 deletions(-) create mode 100644 crates/core_simd/src/ops/assign.rs diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index f5683ebb2c0..aee5a111a82 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -5,6 +5,7 @@ use core::ops::{BitAnd, BitOr, BitXor}; use core::ops::{Div, Rem, Sub}; use core::ops::{Shl, Shr}; +mod assign; mod deref; impl core::ops::Index for Simd @@ -65,25 +66,6 @@ macro_rules! impl_ref_ops { } }; - // binary assignment op - { - impl core::ops::$trait:ident<$rhs:ty> for $type:ty - where - LaneCount<$lanes2:ident>: SupportedLaneCount, - { - $(#[$attrs:meta])* - fn $fn:ident(&mut $self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) $body:tt - } - } => { - impl core::ops::$trait<$rhs> for $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - $(#[$attrs])* - fn $fn(&mut $self_tok, $rhs_arg: $rhs_arg_ty) $body - } - }; - // unary op { impl core::ops::$trait:ident for $type:ty @@ -107,34 +89,34 @@ macro_rules! impl_ref_ops { /// Automatically implements operators over vectors and scalars for a particular vector. macro_rules! impl_op { { impl Add for $scalar:ty } => { - impl_op! { @binary $scalar, Add::add, AddAssign::add_assign, simd_add } + impl_op! { @binary $scalar, Add::add, simd_add } }; { impl Sub for $scalar:ty } => { - impl_op! { @binary $scalar, Sub::sub, SubAssign::sub_assign, simd_sub } + impl_op! { @binary $scalar, Sub::sub, simd_sub } }; { impl Mul for $scalar:ty } => { - impl_op! { @binary $scalar, Mul::mul, MulAssign::mul_assign, simd_mul } + impl_op! { @binary $scalar, Mul::mul, simd_mul } }; { impl Div for $scalar:ty } => { - impl_op! { @binary $scalar, Div::div, DivAssign::div_assign, simd_div } + impl_op! { @binary $scalar, Div::div, simd_div } }; { impl Rem for $scalar:ty } => { - impl_op! { @binary $scalar, Rem::rem, RemAssign::rem_assign, simd_rem } + impl_op! { @binary $scalar, Rem::rem, simd_rem } }; { impl Shl for $scalar:ty } => { - impl_op! { @binary $scalar, Shl::shl, ShlAssign::shl_assign, simd_shl } + impl_op! { @binary $scalar, Shl::shl, simd_shl } }; { impl Shr for $scalar:ty } => { - impl_op! { @binary $scalar, Shr::shr, ShrAssign::shr_assign, simd_shr } + impl_op! { @binary $scalar, Shr::shr, simd_shr } }; { impl BitAnd for $scalar:ty } => { - impl_op! { @binary $scalar, BitAnd::bitand, BitAndAssign::bitand_assign, simd_and } + impl_op! { @binary $scalar, BitAnd::bitand, simd_and } }; { impl BitOr for $scalar:ty } => { - impl_op! { @binary $scalar, BitOr::bitor, BitOrAssign::bitor_assign, simd_or } + impl_op! { @binary $scalar, BitOr::bitor, simd_or } }; { impl BitXor for $scalar:ty } => { - impl_op! { @binary $scalar, BitXor::bitxor, BitXorAssign::bitxor_assign, simd_xor } + impl_op! { @binary $scalar, BitXor::bitxor, simd_xor } }; { impl Not for $scalar:ty } => { @@ -166,7 +148,7 @@ macro_rules! impl_op { }; // generic binary op with assignment when output is `Self` - { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $assign_trait:ident :: $assign_trait_fn:ident, $intrinsic:ident } => { + { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $intrinsic:ident } => { impl_ref_ops! { impl core::ops::$trait for Simd<$scalar, LANES> where @@ -210,32 +192,6 @@ macro_rules! impl_op { } } } - - impl_ref_ops! { - impl core::ops::$assign_trait for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn $assign_trait_fn(&mut self, rhs: Self) { - unsafe { - *self = intrinsics::$intrinsic(*self, rhs); - } - } - } - } - - impl_ref_ops! { - impl core::ops::$assign_trait<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn $assign_trait_fn(&mut self, rhs: $scalar) { - core::ops::$assign_trait::$assign_trait_fn(self, Self::splat(rhs)); - } - } - } }; } @@ -331,30 +287,6 @@ macro_rules! impl_unsigned_int_ops { } } - impl_ref_ops! { - impl core::ops::DivAssign for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn div_assign(&mut self, rhs: Self) { - *self = *self / rhs; - } - } - } - - impl_ref_ops! { - impl core::ops::DivAssign<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn div_assign(&mut self, rhs: $scalar) { - *self = *self / rhs; - } - } - } - // remainder panics on zero divisor impl_ref_ops! { impl core::ops::Rem for Simd<$scalar, LANES> @@ -421,30 +353,6 @@ macro_rules! impl_unsigned_int_ops { } } - impl_ref_ops! { - impl core::ops::RemAssign for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn rem_assign(&mut self, rhs: Self) { - *self = *self % rhs; - } - } - } - - impl_ref_ops! { - impl core::ops::RemAssign<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn rem_assign(&mut self, rhs: $scalar) { - *self = *self % rhs; - } - } - } - // shifts panic on overflow impl_ref_ops! { impl core::ops::Shl for Simd<$scalar, LANES> @@ -486,31 +394,6 @@ macro_rules! impl_unsigned_int_ops { } } - - impl_ref_ops! { - impl core::ops::ShlAssign for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn shl_assign(&mut self, rhs: Self) { - *self = *self << rhs; - } - } - } - - impl_ref_ops! { - impl core::ops::ShlAssign<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn shl_assign(&mut self, rhs: $scalar) { - *self = *self << rhs; - } - } - } - impl_ref_ops! { impl core::ops::Shr for Simd<$scalar, LANES> where @@ -550,31 +433,6 @@ macro_rules! impl_unsigned_int_ops { } } } - - - impl_ref_ops! { - impl core::ops::ShrAssign for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn shr_assign(&mut self, rhs: Self) { - *self = *self >> rhs; - } - } - } - - impl_ref_ops! { - impl core::ops::ShrAssign<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn shr_assign(&mut self, rhs: $scalar) { - *self = *self >> rhs; - } - } - } )* }; } diff --git a/crates/core_simd/src/ops/assign.rs b/crates/core_simd/src/ops/assign.rs new file mode 100644 index 00000000000..d2b48614fc9 --- /dev/null +++ b/crates/core_simd/src/ops/assign.rs @@ -0,0 +1,124 @@ +//! Assignment operators +use super::*; +use core::ops::{AddAssign, MulAssign}; // commutative binary op-assignment +use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; // commutative bit binary op-assignment +use core::ops::{DivAssign, RemAssign, SubAssign}; // non-commutative binary op-assignment +use core::ops::{ShlAssign, ShrAssign}; // non-commutative bit binary op-assignment + +// Arithmetic + +macro_rules! assign_ops { + ($(impl $assignTrait:ident for Simd + where + Self: $trait:ident, + { + fn $assign_call:ident(rhs: U) { + $call:ident + } + })*) => { + $(impl $assignTrait for Simd + where + Self: $trait, + T: SimdElement, + LaneCount: SupportedLaneCount, + { + #[inline] + fn $assign_call(&mut self, rhs: U) { + *self = self.$call(rhs); + } + })* + } +} + +assign_ops! { + // Arithmetic + impl AddAssign for Simd + where + Self: Add, + { + fn add_assign(rhs: U) { + add + } + } + + impl MulAssign for Simd + where + Self: Mul, + { + fn mul_assign(rhs: U) { + mul + } + } + + impl SubAssign for Simd + where + Self: Sub, + { + fn sub_assign(rhs: U) { + sub + } + } + + impl DivAssign for Simd + where + Self: Div, + { + fn div_assign(rhs: U) { + div + } + } + impl RemAssign for Simd + where + Self: Rem, + { + fn rem_assign(rhs: U) { + rem + } + } + + // Bitops + impl BitAndAssign for Simd + where + Self: BitAnd, + { + fn bitand_assign(rhs: U) { + bitand + } + } + + impl BitOrAssign for Simd + where + Self: BitOr, + { + fn bitor_assign(rhs: U) { + bitor + } + } + + impl BitXorAssign for Simd + where + Self: BitXor, + { + fn bitxor_assign(rhs: U) { + bitxor + } + } + + impl ShlAssign for Simd + where + Self: Shl, + { + fn shl_assign(rhs: U) { + shl + } + } + + impl ShrAssign for Simd + where + Self: Shr, + { + fn shr_assign(rhs: U) { + shr + } + } +} From ae612100d28e3e806c6aa39e52792b3ae98907e7 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 21 Nov 2021 19:08:51 -0800 Subject: [PATCH 008/161] Generically implement horizontal_{and,or,xor} --- crates/core_simd/src/reduction.rs | 66 ++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index db0640aae79..e79a185816b 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -2,7 +2,8 @@ use crate::simd::intrinsics::{ simd_reduce_add_ordered, simd_reduce_and, simd_reduce_max, simd_reduce_min, simd_reduce_mul_ordered, simd_reduce_or, simd_reduce_xor, }; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use core::ops::{BitAnd, BitOr, BitXor}; macro_rules! impl_integer_reductions { { $scalar:ty } => { @@ -22,27 +23,6 @@ macro_rules! impl_integer_reductions { unsafe { simd_reduce_mul_ordered(self, 1) } } - /// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of - /// the vector. - #[inline] - pub fn horizontal_and(self) -> $scalar { - unsafe { simd_reduce_and(self) } - } - - /// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of - /// the vector. - #[inline] - pub fn horizontal_or(self) -> $scalar { - unsafe { simd_reduce_or(self) } - } - - /// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of - /// the vector. - #[inline] - pub fn horizontal_xor(self) -> $scalar { - unsafe { simd_reduce_xor(self) } - } - /// Horizontal maximum. Returns the maximum lane in the vector. #[inline] pub fn horizontal_max(self) -> $scalar { @@ -121,3 +101,45 @@ macro_rules! impl_float_reductions { impl_float_reductions! { f32 } impl_float_reductions! { f64 } + +impl Simd +where + Self: BitAnd, + T: SimdElement + BitAnd, + LaneCount: SupportedLaneCount, +{ + /// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of + /// the vector. + #[inline] + pub fn horizontal_and(self) -> T { + unsafe { simd_reduce_and(self) } + } +} + +impl Simd +where + Self: BitOr, + T: SimdElement + BitOr, + LaneCount: SupportedLaneCount, +{ + /// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of + /// the vector. + #[inline] + pub fn horizontal_or(self) -> T { + unsafe { simd_reduce_or(self) } + } +} + +impl Simd +where + Self: BitXor, + T: SimdElement + BitXor, + LaneCount: SupportedLaneCount, +{ + /// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of + /// the vector. + #[inline] + pub fn horizontal_xor(self) -> T { + unsafe { simd_reduce_xor(self) } + } +} From b2dac7124b2aa3951c7f564015d66f0fff6488aa Mon Sep 17 00:00:00 2001 From: Alexander Ronald Altman Date: Thu, 25 Nov 2021 00:45:28 -0800 Subject: [PATCH 009/161] Uncomment AVX512 byte vector conversions Resolves my comment in #197, at least for now; #187 is pending but since these are already here, just commented, it seemed to make sense to me to re-enable them anyway. --- crates/core_simd/src/vendor/x86.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/vendor/x86.rs b/crates/core_simd/src/vendor/x86.rs index d3c19ccc539..0dd47015ed2 100644 --- a/crates/core_simd/src/vendor/x86.rs +++ b/crates/core_simd/src/vendor/x86.rs @@ -8,10 +8,10 @@ use core::arch::x86_64::*; from_transmute! { unsafe u8x16 => __m128i } from_transmute! { unsafe u8x32 => __m256i } -//from_transmute! { unsafe u8x64 => __m512i } +from_transmute! { unsafe u8x64 => __m512i } from_transmute! { unsafe i8x16 => __m128i } from_transmute! { unsafe i8x32 => __m256i } -//from_transmute! { unsafe i8x64 => __m512i } +from_transmute! { unsafe i8x64 => __m512i } from_transmute! { unsafe u16x8 => __m128i } from_transmute! { unsafe u16x16 => __m256i } From 861a6e85e106850583a98f2254def0c9510de091 Mon Sep 17 00:00:00 2001 From: Dean Li Date: Sun, 28 Nov 2021 15:19:01 +0800 Subject: [PATCH 010/161] Add spectral_norm example from packed_simd --- crates/core_simd/examples/spectral_norm.rs | 77 ++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 crates/core_simd/examples/spectral_norm.rs diff --git a/crates/core_simd/examples/spectral_norm.rs b/crates/core_simd/examples/spectral_norm.rs new file mode 100644 index 00000000000..c515dad4dea --- /dev/null +++ b/crates/core_simd/examples/spectral_norm.rs @@ -0,0 +1,77 @@ +#![feature(portable_simd)] + +use core_simd::simd::*; + +fn a(i: usize, j: usize) -> f64 { + ((i + j) * (i + j + 1) / 2 + i + 1) as f64 +} + +fn mult_av(v: &[f64], out: &mut [f64]) { + assert!(v.len() == out.len()); + assert!(v.len() % 2 == 0); + + for (i, out) in out.iter_mut().enumerate() { + let mut sum = f64x2::splat(0.0); + + let mut j = 0; + while j < v.len() { + let b = f64x2::from_slice(&v[j..]); + let a = f64x2::from_array([a(i, j), a(i, j + 1)]); + sum += b / a; + j += 2 + } + *out = sum.horizontal_sum(); + } +} + +fn mult_atv(v: &[f64], out: &mut [f64]) { + assert!(v.len() == out.len()); + assert!(v.len() % 2 == 0); + + for (i, out) in out.iter_mut().enumerate() { + let mut sum = f64x2::splat(0.0); + + let mut j = 0; + while j < v.len() { + let b = f64x2::from_slice(&v[j..]); + let a = f64x2::from_array([a(j, i), a(j + 1, i)]); + sum += b / a; + j += 2 + } + *out = sum.horizontal_sum(); + } +} + +fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { + mult_av(v, tmp); + mult_atv(tmp, out); +} + +pub fn spectral_norm(n: usize) -> f64 { + assert!(n % 2 == 0, "only even lengths are accepted"); + + let mut u = vec![1.0; n]; + let mut v = u.clone(); + let mut tmp = u.clone(); + + for _ in 0..10 { + mult_atav(&u, &mut v, &mut tmp); + mult_atav(&v, &mut u, &mut tmp); + } + (dot(&u, &v) / dot(&v, &v)).sqrt() +} + +fn dot(x: &[f64], y: &[f64]) -> f64 { + // This is auto-vectorized: + x.iter().zip(y).map(|(&x, &y)| x * y).sum() +} + +#[cfg(test)] +#[test] +fn test() { + assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991"); +} + +fn main() { + // Empty main to make cargo happy +} From 6094f22ceb6a697bfcfc3e972170f33badc8f6ee Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Nov 2021 17:36:54 -0800 Subject: [PATCH 011/161] impl unary.rs for Simd<{i,u}{8,16,32,64,size}, _> In order to assure type soundness, these "base" impls need to go directly on Simd for every scalar type argument. A bit of cleanup of ops.rs is still warranted. --- crates/core_simd/src/ops.rs | 53 +-------------------- crates/core_simd/src/ops/unary.rs | 77 +++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 52 deletions(-) create mode 100644 crates/core_simd/src/ops/unary.rs diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index aee5a111a82..b7da4f341d1 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -7,6 +7,7 @@ use core::ops::{Shl, Shr}; mod assign; mod deref; +mod unary; impl core::ops::Index for Simd where @@ -65,25 +66,6 @@ macro_rules! impl_ref_ops { fn $fn($self_tok, $rhs_arg: $rhs_arg_ty) -> Self::Output $body } }; - - // unary op - { - impl core::ops::$trait:ident for $type:ty - where - LaneCount<$lanes2:ident>: SupportedLaneCount, - { - type Output = $output:ty; - fn $fn:ident($self_tok:ident) -> Self::Output $body:tt - } - } => { - impl core::ops::$trait for $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - type Output = $output; - fn $fn($self_tok) -> Self::Output $body - } - } } /// Automatically implements operators over vectors and scalars for a particular vector. @@ -119,34 +101,6 @@ macro_rules! impl_op { impl_op! { @binary $scalar, BitXor::bitxor, simd_xor } }; - { impl Not for $scalar:ty } => { - impl_ref_ops! { - impl core::ops::Not for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - fn not(self) -> Self::Output { - self ^ Self::splat(!<$scalar>::default()) - } - } - } - }; - - { impl Neg for $scalar:ty } => { - impl_ref_ops! { - impl core::ops::Neg for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - fn neg(self) -> Self::Output { - unsafe { intrinsics::simd_neg(self) } - } - } - } - }; - // generic binary op with assignment when output is `Self` { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $intrinsic:ident } => { impl_ref_ops! { @@ -204,7 +158,6 @@ macro_rules! impl_float_ops { impl_op! { impl Mul for $scalar } impl_op! { impl Div for $scalar } impl_op! { impl Rem for $scalar } - impl_op! { impl Neg for $scalar } )* }; } @@ -219,7 +172,6 @@ macro_rules! impl_unsigned_int_ops { impl_op! { impl BitAnd for $scalar } impl_op! { impl BitOr for $scalar } impl_op! { impl BitXor for $scalar } - impl_op! { impl Not for $scalar } // Integers panic on divide by 0 impl_ref_ops! { @@ -441,9 +393,6 @@ macro_rules! impl_unsigned_int_ops { macro_rules! impl_signed_int_ops { { $($scalar:ty),* } => { impl_unsigned_int_ops! { $($scalar),* } - $( // scalar - impl_op! { impl Neg for $scalar } - )* }; } diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs new file mode 100644 index 00000000000..4ebea560fc6 --- /dev/null +++ b/crates/core_simd/src/ops/unary.rs @@ -0,0 +1,77 @@ +use crate::simd::intrinsics; +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use core::ops::{Neg, Not}; // unary ops + +macro_rules! neg { + ($(impl Neg for Simd<$scalar:ty, LANES>)*) => { + $(impl Neg for Simd<$scalar, LANES> + where + $scalar: SimdElement, + LaneCount: SupportedLaneCount, + { + type Output = Self; + + #[inline] + #[must_use = "operator returns a new vector without mutating the input"] + fn neg(self) -> Self::Output { + unsafe { intrinsics::simd_neg(self) } + } + })* + } +} + +neg! { + impl Neg for Simd + + impl Neg for Simd + + impl Neg for Simd + + impl Neg for Simd + + impl Neg for Simd + + impl Neg for Simd + + impl Neg for Simd +} + +macro_rules! not { + ($(impl Not for Simd<$scalar:ty, LANES>)*) => { + $(impl Not for Simd<$scalar, LANES> + where + $scalar: SimdElement, + LaneCount: SupportedLaneCount, + { + type Output = Self; + + #[inline] + #[must_use = "operator returns a new vector without mutating the input"] + fn not(self) -> Self::Output { + self ^ (Simd::splat(!(0 as $scalar))) + } + })* + } +} + +not! { + impl Not for Simd + + impl Not for Simd + + impl Not for Simd + + impl Not for Simd + + impl Not for Simd + + impl Not for Simd + + impl Not for Simd + + impl Not for Simd + + impl Not for Simd + + impl Not for Simd +} From 257fa7aa6d03157476f0d6acd9a0b4c28a3877ec Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Nov 2021 17:55:14 -0800 Subject: [PATCH 012/161] Drop splats for Simd Unfortunately, splatting impls currently break several crates. Rust needs more time to review possible mitigations, so drop the impls for the `impl Add for Simd` pattern, for now. --- crates/core_simd/examples/nbody.rs | 10 +- crates/core_simd/src/math.rs | 8 +- crates/core_simd/src/ops.rs | 138 --------------------------- crates/core_simd/src/vector/ptr.rs | 4 +- crates/core_simd/tests/ops_macros.rs | 48 ---------- 5 files changed, 11 insertions(+), 197 deletions(-) diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index 779575985ed..43280feebbd 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -97,7 +97,7 @@ mod nbody { let sun = &mut sun[0]; for body in rest { let m_ratio = body.mass / SOLAR_MASS; - sun.v -= body.v * m_ratio; + sun.v -= body.v * Simd::splat(m_ratio); } } @@ -143,14 +143,14 @@ mod nbody { let mut i = 0; for j in 0..N_BODIES { for k in j + 1..N_BODIES { - let f = r[i] * mag[i]; - bodies[j].v -= f * bodies[k].mass; - bodies[k].v += f * bodies[j].mass; + let f = r[i] * Simd::splat(mag[i]); + bodies[j].v -= f * Simd::splat(bodies[k].mass); + bodies[k].v += f * Simd::splat(bodies[j].mass); i += 1 } } for body in bodies { - body.x += dt * body.v + body.x += Simd::splat(dt) * body.v } } diff --git a/crates/core_simd/src/math.rs b/crates/core_simd/src/math.rs index 2bae414ebfb..7435b6df918 100644 --- a/crates/core_simd/src/math.rs +++ b/crates/core_simd/src/math.rs @@ -17,7 +17,7 @@ macro_rules! impl_uint_arith { /// let max = Simd::splat(MAX); /// let unsat = x + max; /// let sat = x.saturating_add(max); - /// assert_eq!(x - 1, unsat); + /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1])); /// assert_eq!(sat, max); /// ``` #[inline] @@ -37,7 +37,7 @@ macro_rules! impl_uint_arith { /// let max = Simd::splat(MAX); /// let unsat = x - max; /// let sat = x.saturating_sub(max); - /// assert_eq!(unsat, x + 1); + /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0])); /// assert_eq!(sat, Simd::splat(0)); #[inline] pub fn saturating_sub(self, second: Self) -> Self { @@ -105,7 +105,7 @@ macro_rules! impl_int_arith { #[inline] pub fn abs(self) -> Self { const SHR: $ty = <$ty>::BITS as $ty - 1; - let m = self >> SHR; + let m = self >> Simd::splat(SHR); (self^m) - m } @@ -128,7 +128,7 @@ macro_rules! impl_int_arith { pub fn saturating_abs(self) -> Self { // arith shift for -1 or 0 mask based on sign bit, giving 2s complement const SHR: $ty = <$ty>::BITS as $ty - 1; - let m = self >> SHR; + let m = self >> Simd::splat(SHR); (self^m).saturating_sub(m) } diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index b7da4f341d1..3582c57870b 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -118,34 +118,6 @@ macro_rules! impl_op { } } } - - impl_ref_ops! { - impl core::ops::$trait<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn $trait_fn(self, rhs: $scalar) -> Self::Output { - core::ops::$trait::$trait_fn(self, Self::splat(rhs)) - } - } - } - - impl_ref_ops! { - impl core::ops::$trait> for $scalar - where - LaneCount: SupportedLaneCount, - { - type Output = Simd<$scalar, LANES>; - - #[inline] - fn $trait_fn(self, rhs: Simd<$scalar, LANES>) -> Self::Output { - core::ops::$trait::$trait_fn(Simd::splat(self), rhs) - } - } - } }; } @@ -202,43 +174,6 @@ macro_rules! impl_unsigned_int_ops { } } - impl_ref_ops! { - impl core::ops::Div<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn div(self, rhs: $scalar) -> Self::Output { - if rhs == 0 { - panic!("attempt to divide by zero"); - } - if <$scalar>::MIN != 0 && - self.as_array().iter().any(|x| *x == <$scalar>::MIN) && - rhs == -1 as _ { - panic!("attempt to divide with overflow"); - } - let rhs = Self::splat(rhs); - unsafe { intrinsics::simd_div(self, rhs) } - } - } - } - - impl_ref_ops! { - impl core::ops::Div> for $scalar - where - LaneCount: SupportedLaneCount, - { - type Output = Simd<$scalar, LANES>; - - #[inline] - fn div(self, rhs: Simd<$scalar, LANES>) -> Self::Output { - Simd::splat(self) / rhs - } - } - } - // remainder panics on zero divisor impl_ref_ops! { impl core::ops::Rem for Simd<$scalar, LANES> @@ -268,43 +203,6 @@ macro_rules! impl_unsigned_int_ops { } } - impl_ref_ops! { - impl core::ops::Rem<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn rem(self, rhs: $scalar) -> Self::Output { - if rhs == 0 { - panic!("attempt to calculate the remainder with a divisor of zero"); - } - if <$scalar>::MIN != 0 && - self.as_array().iter().any(|x| *x == <$scalar>::MIN) && - rhs == -1 as _ { - panic!("attempt to calculate the remainder with overflow"); - } - let rhs = Self::splat(rhs); - unsafe { intrinsics::simd_rem(self, rhs) } - } - } - } - - impl_ref_ops! { - impl core::ops::Rem> for $scalar - where - LaneCount: SupportedLaneCount, - { - type Output = Simd<$scalar, LANES>; - - #[inline] - fn rem(self, rhs: Simd<$scalar, LANES>) -> Self::Output { - Simd::splat(self) % rhs - } - } - } - // shifts panic on overflow impl_ref_ops! { impl core::ops::Shl for Simd<$scalar, LANES> @@ -328,24 +226,6 @@ macro_rules! impl_unsigned_int_ops { } } - impl_ref_ops! { - impl core::ops::Shl<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn shl(self, rhs: $scalar) -> Self::Output { - if invalid_shift_rhs(rhs) { - panic!("attempt to shift left with overflow"); - } - let rhs = Self::splat(rhs); - unsafe { intrinsics::simd_shl(self, rhs) } - } - } - } - impl_ref_ops! { impl core::ops::Shr for Simd<$scalar, LANES> where @@ -367,24 +247,6 @@ macro_rules! impl_unsigned_int_ops { } } } - - impl_ref_ops! { - impl core::ops::Shr<$scalar> for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn shr(self, rhs: $scalar) -> Self::Output { - if invalid_shift_rhs(rhs) { - panic!("attempt to shift with overflow"); - } - let rhs = Self::splat(rhs); - unsafe { intrinsics::simd_shr(self, rhs) } - } - } - } )* }; } diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs index ac9b98ca031..c668d9a6eae 100644 --- a/crates/core_simd/src/vector/ptr.rs +++ b/crates/core_simd/src/vector/ptr.rs @@ -23,7 +23,7 @@ where pub fn wrapping_add(self, addend: Simd) -> Self { unsafe { let x: Simd = mem::transmute_copy(&self); - mem::transmute_copy(&{ x + (addend * mem::size_of::()) }) + mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) } } } @@ -49,7 +49,7 @@ where pub fn wrapping_add(self, addend: Simd) -> Self { unsafe { let x: Simd = mem::transmute_copy(&self); - mem::transmute_copy(&{ x + (addend * mem::size_of::()) }) + mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) } } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 31b7ee20695..43ddde4c55e 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -38,22 +38,6 @@ macro_rules! impl_binary_op_test { ); } - fn scalar_rhs() { - test_helpers::test_binary_scalar_rhs_elementwise( - & as core::ops::$trait<$scalar>>::$fn, - &$scalar_fn, - &|_, _| true, - ); - } - - fn scalar_lhs() { - test_helpers::test_binary_scalar_lhs_elementwise( - &<$scalar as core::ops::$trait>>::$fn, - &$scalar_fn, - &|_, _| true, - ); - } - fn assign() { test_helpers::test_binary_elementwise( &|mut a, b| { as core::ops::$trait_assign>::$fn_assign(&mut a, b); a }, @@ -61,14 +45,6 @@ macro_rules! impl_binary_op_test { &|_, _| true, ); } - - fn assign_scalar_rhs() { - test_helpers::test_binary_scalar_rhs_elementwise( - &|mut a, b| { as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a }, - &$scalar_fn, - &|_, _| true, - ); - } } } }; @@ -99,22 +75,6 @@ macro_rules! impl_binary_checked_op_test { ); } - fn scalar_rhs() { - test_helpers::test_binary_scalar_rhs_elementwise( - & as core::ops::$trait<$scalar>>::$fn, - &$scalar_fn, - &|x, y| x.iter().all(|x| $check_fn(*x, y)), - ); - } - - fn scalar_lhs() { - test_helpers::test_binary_scalar_lhs_elementwise( - &<$scalar as core::ops::$trait>>::$fn, - &$scalar_fn, - &|x, y| y.iter().all(|y| $check_fn(x, *y)), - ); - } - fn assign() { test_helpers::test_binary_elementwise( &|mut a, b| { as core::ops::$trait_assign>::$fn_assign(&mut a, b); a }, @@ -122,14 +82,6 @@ macro_rules! impl_binary_checked_op_test { &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)), ) } - - fn assign_scalar_rhs() { - test_helpers::test_binary_scalar_rhs_elementwise( - &|mut a, b| { as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a }, - &$scalar_fn, - &|x, y| x.iter().all(|x| $check_fn(*x, y)), - ) - } } } }; From 8003b043233213c6f984837d7618f92a6181a875 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 1 Dec 2021 15:02:03 -0800 Subject: [PATCH 013/161] impl Op<&'_ RHS> for &'_ LHS --- crates/core_simd/src/ops/deref.rs | 114 ++++++++++++++++++++-------- crates/core_simd/tests/autoderef.rs | 22 ++++++ 2 files changed, 106 insertions(+), 30 deletions(-) create mode 100644 crates/core_simd/tests/autoderef.rs diff --git a/crates/core_simd/src/ops/deref.rs b/crates/core_simd/src/ops/deref.rs index 1138b9494f6..9883a74c92d 100644 --- a/crates/core_simd/src/ops/deref.rs +++ b/crates/core_simd/src/ops/deref.rs @@ -1,70 +1,124 @@ //! This module hacks in "implicit deref" for Simd's operators. //! Ideally, Rust would take care of this itself, //! and method calls usually handle the LHS implicitly. -//! So, we'll manually deref the RHS. +//! But this is not the case with arithmetic ops. use super::*; -macro_rules! deref_ops { - ($(impl $trait:ident<&Self> for Simd { - fn $call:ident(rhs: &Self) - })*) => { - $(impl $trait<&Self> for Simd +macro_rules! deref_lhs { + (impl $trait:ident for $simd:ty { + fn $call:ident + }) => { + impl $trait<$simd> for &$simd where - Self: $trait, T: SimdElement, + $simd: $trait<$simd, Output = $simd>, LaneCount: SupportedLaneCount, { - type Output = Self; + type Output = Simd; #[inline] #[must_use = "operator returns a new vector without mutating the inputs"] - fn $call(self, rhs: &Self) -> Self::Output { + fn $call(self, rhs: $simd) -> Self::Output { + (*self).$call(rhs) + } + } + }; +} + +macro_rules! deref_rhs { + (impl $trait:ident for $simd:ty { + fn $call:ident + }) => { + impl $trait<&$simd> for $simd + where + T: SimdElement, + $simd: $trait<$simd, Output = $simd>, + LaneCount: SupportedLaneCount, + { + type Output = Simd; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: &$simd) -> Self::Output { self.$call(*rhs) } - })* + } + }; +} + +macro_rules! deref_ops { + ($(impl $trait:ident for $simd:ty { + fn $call:ident + })*) => { + $( + deref_rhs! { + impl $trait for $simd { + fn $call + } + } + deref_lhs! { + impl $trait for $simd { + fn $call + } + } + impl<'lhs, 'rhs, T, const LANES: usize> $trait<&'rhs $simd> for &'lhs $simd + where + T: SimdElement, + $simd: $trait<$simd, Output = $simd>, + LaneCount: SupportedLaneCount, + { + type Output = $simd; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: &$simd) -> Self::Output { + (*self).$call(*rhs) + } + } + )* } } deref_ops! { // Arithmetic - impl Add<&Self> for Simd { - fn add(rhs: &Self) + impl Add for Simd { + fn add } - impl Mul<&Self> for Simd { - fn mul(rhs: &Self) + impl Mul for Simd { + fn mul } - impl Sub<&Self> for Simd { - fn sub(rhs: &Self) + impl Sub for Simd { + fn sub } - impl Div<&Self> for Simd { - fn div(rhs: &Self) + impl Div for Simd { + fn div } - impl Rem<&Self> for Simd { - fn rem(rhs: &Self) + impl Rem for Simd { + fn rem } // Bitops - impl BitAnd<&Self> for Simd { - fn bitand(rhs: &Self) + impl BitAnd for Simd { + fn bitand } - impl BitOr<&Self> for Simd { - fn bitor(rhs: &Self) + impl BitOr for Simd { + fn bitor } - impl BitXor<&Self> for Simd { - fn bitxor(rhs: &Self) + impl BitXor for Simd { + fn bitxor } - impl Shl<&Self> for Simd { - fn shl(rhs: &Self) + impl Shl for Simd { + fn shl } - impl Shr<&Self> for Simd { - fn shr(rhs: &Self) + impl Shr for Simd { + fn shr } } diff --git a/crates/core_simd/tests/autoderef.rs b/crates/core_simd/tests/autoderef.rs new file mode 100644 index 00000000000..9359da16ee5 --- /dev/null +++ b/crates/core_simd/tests/autoderef.rs @@ -0,0 +1,22 @@ +// Test that we handle all our "auto-deref" cases correctly. +#![feature(portable_simd)] +use core_simd::f32x4; + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn deref() { + let x = f32x4::splat(1.0); + let y = f32x4::splat(2.0); + let a = &x; + let b = &y; + assert_eq!(f32x4::splat(3.0), x + y); + assert_eq!(f32x4::splat(3.0), x + b); + assert_eq!(f32x4::splat(3.0), a + y); + assert_eq!(f32x4::splat(3.0), a + b); +} From d9f82f9c4d7f4627721794c3fd9cf4598f60f688 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 4 Dec 2021 05:54:15 +0000 Subject: [PATCH 014/161] Remove Select trait --- crates/core_simd/src/mod.rs | 1 - crates/core_simd/src/select.rs | 74 ++++++++++------------------------ 2 files changed, 21 insertions(+), 54 deletions(-) diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index ec874a22389..85026265956 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -27,7 +27,6 @@ pub mod simd { pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; - pub use crate::core_simd::select::Select; pub use crate::core_simd::swizzle::*; pub use crate::core_simd::vector::*; } diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index 5d696ebf76e..8d521057fbd 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -1,54 +1,6 @@ use crate::simd::intrinsics; use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; -mod sealed { - pub trait Sealed { - fn select(mask: Mask, true_values: Self, false_values: Self) -> Self; - } -} -use sealed::Sealed; - -/// Supporting trait for vector `select` function -pub trait Select: Sealed {} - -impl Sealed> for Simd -where - T: SimdElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - #[must_use = "method returns a new vector and does not mutate the original inputs"] - fn select(mask: Mask, true_values: Self, false_values: Self) -> Self { - unsafe { intrinsics::simd_select(mask.to_int(), true_values, false_values) } - } -} - -impl Select> for Simd -where - T: SimdElement, - LaneCount: SupportedLaneCount, -{ -} - -impl Sealed for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - #[must_use = "method returns a new vector and does not mutate the original inputs"] - fn select(mask: Self, true_values: Self, false_values: Self) -> Self { - mask & true_values | !mask & false_values - } -} - -impl Select for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - impl Mask where T: MaskElement, @@ -69,8 +21,24 @@ where /// let c = mask.select(a, b); /// assert_eq!(c.to_array(), [0, 5, 6, 3]); /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn select( + self, + true_values: Simd, + false_values: Simd, + ) -> Simd + where + U: SimdElement, + { + unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) } + } + + /// Choose lanes from two masks. + /// + /// For each lane in the mask, choose the corresponding lane from `true_values` if + /// that lane mask is true, and `false_values` if that lane mask is false. /// - /// `select` can also be used on masks: /// ``` /// # #![feature(portable_simd)] /// # #[cfg(feature = "std")] use core_simd::Mask; @@ -78,12 +46,12 @@ where /// let a = Mask::::from_array([true, true, false, false]); /// let b = Mask::::from_array([false, false, true, true]); /// let mask = Mask::::from_array([true, false, false, true]); - /// let c = mask.select(a, b); + /// let c = mask.select_mask(a, b); /// assert_eq!(c.to_array(), [true, false, true, false]); /// ``` #[inline] - #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn select>(self, true_values: S, false_values: S) -> S { - S::select(self, true_values, false_values) + #[must_use = "method returns a new mask and does not mutate the original inputs"] + pub fn select_mask(self, true_values: Self, false_values: Self) -> Self { + self & true_values | !self & false_values } } From b6d0eec3de0ac75ce81784251b4648a1cef7f628 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 8 Dec 2021 16:44:21 -0800 Subject: [PATCH 015/161] Wrap bitshifts in ops.rs For all other operators, we use wrapping logic where applicable. This is another case it applies. Per rust-lang/rust#91237, we may wish to specify this as the natural behavior of `simd_{shl,shr}`. --- crates/core_simd/src/ops.rs | 168 +++++++++++++++++++++++------------- 1 file changed, 109 insertions(+), 59 deletions(-) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 3582c57870b..2ebcef3d829 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -32,14 +32,115 @@ where } } -/// Checks if the right-hand side argument of a left- or right-shift would cause overflow. -fn invalid_shift_rhs(rhs: T) -> bool -where - T: Default + PartialOrd + core::convert::TryFrom, - >::Error: core::fmt::Debug, -{ - let bits_in_type = T::try_from(8 * core::mem::size_of::()).unwrap(); - rhs < T::default() || rhs >= bits_in_type +/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. +/// It handles performing a bitand in addition to calling the shift operator, so that the result +/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= ::BITS +/// At worst, this will maybe add another instruction and cycle, +/// at best, it may open up more optimization opportunities, +/// or simply be elided entirely, especially for SIMD ISAs which default to this. +/// +// FIXME: Consider implementing this in cg_llvm instead? +// cg_clif defaults to this, and scalar MIR shifts also default to wrapping +macro_rules! wrap_bitshift_inner { + (impl $op:ident for Simd<$int:ty, LANES> { + fn $call:ident(self, rhs: Self) -> Self::Output { + unsafe { $simd_call:ident } + } + }) => { + impl $op for Simd<$int, LANES> + where + $int: SimdElement, + LaneCount: SupportedLaneCount, + { + type Output = Self; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: Self) -> Self::Output { + unsafe { + $crate::intrinsics::$simd_call(self, rhs.bitand(Simd::splat(<$int>::BITS as $int - 1))) + } + } + } + }; +} + +macro_rules! wrap_bitshifts { + ($(impl ShiftOps for Simd<$int:ty, LANES> { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + })*) => { + $( + wrap_bitshift_inner! { + impl Shl for Simd<$int, LANES> { + fn shl(self, rhs: Self) -> Self::Output { + unsafe { simd_shl } + } + } + } + wrap_bitshift_inner! { + impl Shr for Simd<$int, LANES> { + fn shr(self, rhs: Self) -> Self::Output { + // This automatically monomorphizes to lshr or ashr, depending, + // so it's fine to use it for both UInts and SInts. + unsafe { simd_shr } + } + } + } + )* + }; +} + +wrap_bitshifts! { + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } + + impl ShiftOps for Simd { + fn shl(self, rhs: Self) -> Self::Output; + fn shr(self, rhs: Self) -> Self::Output; + } } /// Automatically implements operators over references in addition to the provided operator. @@ -85,12 +186,6 @@ macro_rules! impl_op { { impl Rem for $scalar:ty } => { impl_op! { @binary $scalar, Rem::rem, simd_rem } }; - { impl Shl for $scalar:ty } => { - impl_op! { @binary $scalar, Shl::shl, simd_shl } - }; - { impl Shr for $scalar:ty } => { - impl_op! { @binary $scalar, Shr::shr, simd_shr } - }; { impl BitAnd for $scalar:ty } => { impl_op! { @binary $scalar, BitAnd::bitand, simd_and } }; @@ -202,51 +297,6 @@ macro_rules! impl_unsigned_int_ops { } } } - - // shifts panic on overflow - impl_ref_ops! { - impl core::ops::Shl for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn shl(self, rhs: Self) -> Self::Output { - // TODO there is probably a better way of doing this - if rhs.as_array() - .iter() - .copied() - .any(invalid_shift_rhs) - { - panic!("attempt to shift left with overflow"); - } - unsafe { intrinsics::simd_shl(self, rhs) } - } - } - } - - impl_ref_ops! { - impl core::ops::Shr for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn shr(self, rhs: Self) -> Self::Output { - // TODO there is probably a better way of doing this - if rhs.as_array() - .iter() - .copied() - .any(invalid_shift_rhs) - { - panic!("attempt to shift with overflow"); - } - unsafe { intrinsics::simd_shr(self, rhs) } - } - } - } )* }; } From 8aef340b8b0658e34b54fdea59e5ffc5ec581106 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 8 Dec 2021 17:23:54 -0800 Subject: [PATCH 016/161] Refactor bitops with `#[must_use]` --- crates/core_simd/src/ops.rs | 131 +++++++++++++++++++++++++++--------- 1 file changed, 98 insertions(+), 33 deletions(-) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 2ebcef3d829..5e775d6ca13 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -32,6 +32,29 @@ where } } +macro_rules! unsafe_base_op { + ($(impl $op:ident for Simd<$scalar:ty, LANES> { + fn $call:ident(self, rhs: Self) -> Self::Output { + unsafe{ $simd_call:ident } + } + })*) => { + $(impl $op for Simd<$scalar, LANES> + where + $scalar: SimdElement, + LaneCount: SupportedLaneCount, + { + type Output = Self; + + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: Self) -> Self::Output { + unsafe { $crate::intrinsics::$simd_call(self, rhs) } + } + } + )* + } +} + /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. /// It handles performing a bitand in addition to calling the shift operator, so that the result /// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= ::BITS @@ -41,13 +64,13 @@ where /// // FIXME: Consider implementing this in cg_llvm instead? // cg_clif defaults to this, and scalar MIR shifts also default to wrapping -macro_rules! wrap_bitshift_inner { - (impl $op:ident for Simd<$int:ty, LANES> { +macro_rules! wrap_bitshift { + ($(impl $op:ident for Simd<$int:ty, LANES> { fn $call:ident(self, rhs: Self) -> Self::Output { unsafe { $simd_call:ident } } - }) => { - impl $op for Simd<$int, LANES> + })*) => { + $(impl $op for Simd<$int, LANES> where $int: SimdElement, LaneCount: SupportedLaneCount, @@ -61,24 +84,45 @@ macro_rules! wrap_bitshift_inner { $crate::intrinsics::$simd_call(self, rhs.bitand(Simd::splat(<$int>::BITS as $int - 1))) } } - } + })* }; } -macro_rules! wrap_bitshifts { - ($(impl ShiftOps for Simd<$int:ty, LANES> { +macro_rules! bitops { + ($(impl BitOps for Simd<$int:ty, LANES> { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; })*) => { $( - wrap_bitshift_inner! { + unsafe_base_op!{ + impl BitAnd for Simd<$int, LANES> { + fn bitand(self, rhs: Self) -> Self::Output { + unsafe { simd_and } + } + } + + impl BitOr for Simd<$int, LANES> { + fn bitor(self, rhs: Self) -> Self::Output { + unsafe { simd_or } + } + } + + impl BitXor for Simd<$int, LANES> { + fn bitxor(self, rhs: Self) -> Self::Output { + unsafe { simd_xor } + } + } + } + wrap_bitshift! { impl Shl for Simd<$int, LANES> { fn shl(self, rhs: Self) -> Self::Output { unsafe { simd_shl } } } - } - wrap_bitshift_inner! { + impl Shr for Simd<$int, LANES> { fn shr(self, rhs: Self) -> Self::Output { // This automatically monomorphizes to lshr or ashr, depending, @@ -91,53 +135,86 @@ macro_rules! wrap_bitshifts { }; } -wrap_bitshifts! { - impl ShiftOps for Simd { +// Integers can always accept bitand, bitor, and bitxor. +// The only question is how to handle shifts >= ::BITS? +// Our current solution uses wrapping logic. +bitops! { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } - impl ShiftOps for Simd { + impl BitOps for Simd { + fn bitand(self, rhs: Self) -> Self::Output; + fn bitor(self, rhs: Self) -> Self::Output; + fn bitxor(self, rhs: Self) -> Self::Output; fn shl(self, rhs: Self) -> Self::Output; fn shr(self, rhs: Self) -> Self::Output; } @@ -186,15 +263,6 @@ macro_rules! impl_op { { impl Rem for $scalar:ty } => { impl_op! { @binary $scalar, Rem::rem, simd_rem } }; - { impl BitAnd for $scalar:ty } => { - impl_op! { @binary $scalar, BitAnd::bitand, simd_and } - }; - { impl BitOr for $scalar:ty } => { - impl_op! { @binary $scalar, BitOr::bitor, simd_or } - }; - { impl BitXor for $scalar:ty } => { - impl_op! { @binary $scalar, BitXor::bitxor, simd_xor } - }; // generic binary op with assignment when output is `Self` { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $intrinsic:ident } => { @@ -236,9 +304,6 @@ macro_rules! impl_unsigned_int_ops { impl_op! { impl Add for $scalar } impl_op! { impl Sub for $scalar } impl_op! { impl Mul for $scalar } - impl_op! { impl BitAnd for $scalar } - impl_op! { impl BitOr for $scalar } - impl_op! { impl BitXor for $scalar } // Integers panic on divide by 0 impl_ref_ops! { From 049e8ca7f7fc42501b98afcb9c32fd51080bd75a Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 8 Dec 2021 17:31:19 -0800 Subject: [PATCH 017/161] Refactor float arith with `#[must_use]` --- crates/core_simd/src/ops.rs | 78 ++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 5e775d6ca13..65b461d3981 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -220,6 +220,70 @@ bitops! { } } +macro_rules! float_arith { + ($(impl FloatArith for Simd<$float:ty, LANES> { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + })*) => { + $( + unsafe_base_op!{ + impl Add for Simd<$float, LANES> { + fn add(self, rhs: Self) -> Self::Output { + unsafe { simd_add } + } + } + + impl Mul for Simd<$float, LANES> { + fn mul(self, rhs: Self) -> Self::Output { + unsafe { simd_mul } + } + } + + impl Sub for Simd<$float, LANES> { + fn sub(self, rhs: Self) -> Self::Output { + unsafe { simd_sub } + } + } + + impl Div for Simd<$float, LANES> { + fn div(self, rhs: Self) -> Self::Output { + unsafe { simd_div } + } + } + + impl Rem for Simd<$float, LANES> { + fn rem(self, rhs: Self) -> Self::Output { + unsafe { simd_rem } + } + } + } + )* + }; +} + +// We don't need any special precautions here: +// Floats always accept arithmetic ops, but may become NaN. +float_arith! { + impl FloatArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl FloatArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } +} + /// Automatically implements operators over references in addition to the provided operator. macro_rules! impl_ref_ops { // binary op @@ -284,19 +348,6 @@ macro_rules! impl_op { }; } -/// Implements floating-point operators for the provided types. -macro_rules! impl_float_ops { - { $($scalar:ty),* } => { - $( - impl_op! { impl Add for $scalar } - impl_op! { impl Sub for $scalar } - impl_op! { impl Mul for $scalar } - impl_op! { impl Div for $scalar } - impl_op! { impl Rem for $scalar } - )* - }; -} - /// Implements unsigned integer operators for the provided types. macro_rules! impl_unsigned_int_ops { { $($scalar:ty),* } => { @@ -375,4 +426,3 @@ macro_rules! impl_signed_int_ops { impl_unsigned_int_ops! { u8, u16, u32, u64, usize } impl_signed_int_ops! { i8, i16, i32, i64, isize } -impl_float_ops! { f32, f64 } From 5dcd397f47a17aec3b049af2d7541530b859e47b Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 8 Dec 2021 18:42:06 -0800 Subject: [PATCH 018/161] Finish refactoring ints in ops.rs This should perform a SIMD check for whether or not we can div/rem, so that we can panic several times faster! --- crates/core_simd/src/ops.rs | 281 +++++++++++++++++++----------------- 1 file changed, 152 insertions(+), 129 deletions(-) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 65b461d3981..e6d7e695391 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -1,5 +1,4 @@ -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; use core::ops::{Add, Mul}; use core::ops::{BitAnd, BitOr, BitXor}; use core::ops::{Div, Rem, Sub}; @@ -284,145 +283,169 @@ float_arith! { } } -/// Automatically implements operators over references in addition to the provided operator. -macro_rules! impl_ref_ops { - // binary op - { - impl core::ops::$trait:ident<$rhs:ty> for $type:ty - where - LaneCount<$lanes2:ident>: SupportedLaneCount, - { - type Output = $output:ty; - - $(#[$attrs:meta])* - fn $fn:ident($self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) -> Self::Output $body:tt +// Division by zero is poison, according to LLVM. +// So is dividing the MIN value of a signed integer by -1, +// since that would return MAX + 1. +// FIXME: Rust allows ::MIN / -1, +// so we should probably figure out how to make that safe. +macro_rules! int_divrem_guard { + ($(impl $op:ident for Simd<$sint:ty, LANES> { + const PANIC_ZERO: &'static str = $zero:literal; + const PANIC_OVERFLOW: &'static str = $overflow:literal; + fn $call:ident { + unsafe { $simd_call:ident } } - } => { - impl core::ops::$trait<$rhs> for $type + })*) => { + $(impl $op for Simd<$sint, LANES> where - LaneCount<$lanes2>: SupportedLaneCount, + $sint: SimdElement, + LaneCount: SupportedLaneCount, { - type Output = $output; - - $(#[$attrs])* - fn $fn($self_tok, $rhs_arg: $rhs_arg_ty) -> Self::Output $body - } + type Output = Self; + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: Self) -> Self::Output { + if rhs.lanes_eq(Simd::splat(0)).any() { + panic!("attempt to calculate the remainder with a divisor of zero"); + } else if <$sint>::MIN != 0 && self.lanes_eq(Simd::splat(<$sint>::MIN)) & rhs.lanes_eq(Simd::splat(-1 as _)) + != Mask::splat(false) + { + panic!("attempt to calculate the remainder with overflow"); + } else { + unsafe { $crate::intrinsics::$simd_call(self, rhs) } + } + } + })* }; } -/// Automatically implements operators over vectors and scalars for a particular vector. -macro_rules! impl_op { - { impl Add for $scalar:ty } => { - impl_op! { @binary $scalar, Add::add, simd_add } - }; - { impl Sub for $scalar:ty } => { - impl_op! { @binary $scalar, Sub::sub, simd_sub } - }; - { impl Mul for $scalar:ty } => { - impl_op! { @binary $scalar, Mul::mul, simd_mul } - }; - { impl Div for $scalar:ty } => { - impl_op! { @binary $scalar, Div::div, simd_div } - }; - { impl Rem for $scalar:ty } => { - impl_op! { @binary $scalar, Rem::rem, simd_rem } - }; - - // generic binary op with assignment when output is `Self` - { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $intrinsic:ident } => { - impl_ref_ops! { - impl core::ops::$trait for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn $trait_fn(self, rhs: Self) -> Self::Output { - unsafe { - intrinsics::$intrinsic(self, rhs) - } - } - } - } - }; -} - -/// Implements unsigned integer operators for the provided types. -macro_rules! impl_unsigned_int_ops { - { $($scalar:ty),* } => { +macro_rules! int_arith { + ($(impl IntArith for Simd<$sint:ty, LANES> { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + })*) => { $( - impl_op! { impl Add for $scalar } - impl_op! { impl Sub for $scalar } - impl_op! { impl Mul for $scalar } - - // Integers panic on divide by 0 - impl_ref_ops! { - impl core::ops::Div for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn div(self, rhs: Self) -> Self::Output { - if rhs.as_array() - .iter() - .any(|x| *x == 0) - { - panic!("attempt to divide by zero"); - } - - // Guards for div(MIN, -1), - // this check only applies to signed ints - if <$scalar>::MIN != 0 && self.as_array().iter() - .zip(rhs.as_array().iter()) - .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) { - panic!("attempt to divide with overflow"); - } - unsafe { intrinsics::simd_div(self, rhs) } - } + unsafe_base_op!{ + impl Add for Simd<$sint, LANES> { + fn add(self, rhs: Self) -> Self::Output { + unsafe { simd_add } } } - // remainder panics on zero divisor - impl_ref_ops! { - impl core::ops::Rem for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - fn rem(self, rhs: Self) -> Self::Output { - if rhs.as_array() - .iter() - .any(|x| *x == 0) - { - panic!("attempt to calculate the remainder with a divisor of zero"); - } - - // Guards for rem(MIN, -1) - // this branch applies the check only to signed ints - if <$scalar>::MIN != 0 && self.as_array().iter() - .zip(rhs.as_array().iter()) - .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) { - panic!("attempt to calculate the remainder with overflow"); - } - unsafe { intrinsics::simd_rem(self, rhs) } - } + impl Mul for Simd<$sint, LANES> { + fn mul(self, rhs: Self) -> Self::Output { + unsafe { simd_mul } } } - )* - }; + + impl Sub for Simd<$sint, LANES> { + fn sub(self, rhs: Self) -> Self::Output { + unsafe { simd_sub } + } + } + } + + int_divrem_guard!{ + impl Div for Simd<$sint, LANES> { + const PANIC_ZERO: &'static str = "attempt to divide by zero"; + const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow"; + fn div { + unsafe { simd_div } + } + } + + impl Rem for Simd<$sint, LANES> { + const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; + const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow"; + fn rem { + unsafe { simd_rem } + } + } + })* + } } -/// Implements unsigned integer operators for the provided types. -macro_rules! impl_signed_int_ops { - { $($scalar:ty),* } => { - impl_unsigned_int_ops! { $($scalar),* } - }; -} +int_arith! { + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } -impl_unsigned_int_ops! { u8, u16, u32, u64, usize } -impl_signed_int_ops! { i8, i16, i32, i64, isize } + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } + + impl IntArith for Simd { + fn add(self, rhs: Self) -> Self::Output; + fn mul(self, rhs: Self) -> Self::Output; + fn sub(self, rhs: Self) -> Self::Output; + fn div(self, rhs: Self) -> Self::Output; + fn rem(self, rhs: Self) -> Self::Output; + } +} From 533f0fc81ab9ba097779fcd27c8f9ea12261fef5 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Fri, 10 Dec 2021 20:11:54 -0800 Subject: [PATCH 019/161] Use relative intrinsics paths for bitmasks --- crates/core_simd/src/masks/bitmask.rs | 8 ++------ crates/core_simd/src/masks/full_masks.rs | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 4c964cb52e1..b4217dc87ba 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -105,18 +105,14 @@ where #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_int(self) -> Simd { unsafe { - crate::intrinsics::simd_select_bitmask( - self.0, - Simd::splat(T::TRUE), - Simd::splat(T::FALSE), - ) + intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE)) } } #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { - unsafe { Self(crate::intrinsics::simd_bitmask(value), PhantomData) } + unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } } #[cfg(feature = "generic_const_exprs")] diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 5421ccbe3d8..e5bb784bb91 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -115,7 +115,7 @@ where pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { unsafe { let mut bitmask: [u8; LaneCount::::BITMASK_LEN] = - crate::intrinsics::simd_bitmask(self.0); + intrinsics::simd_bitmask(self.0); // There is a bug where LLVM appears to implement this operation with the wrong // bit order. @@ -144,7 +144,7 @@ where } } - Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask( + Self::from_int_unchecked(intrinsics::simd_select_bitmask( bitmask, Self::splat(true).to_int(), Self::splat(false).to_int(), From bc326a2bbccdccb321328e7a1cde3ad3734a5953 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 21 Dec 2021 18:28:57 -0800 Subject: [PATCH 020/161] Refactor ops.rs with a recursive macro This approaches reducing macro nesting in a slightly different way. Instead of just flattening details, make one macro apply another. This allows specifying all details up-front in the first macro invocation, making it easier to audit and refactor in the future. --- crates/core_simd/src/ops.rs | 518 +++++++++++------------------------- 1 file changed, 152 insertions(+), 366 deletions(-) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index e6d7e695391..6cfc8f80b53 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -31,27 +31,10 @@ where } } -macro_rules! unsafe_base_op { - ($(impl $op:ident for Simd<$scalar:ty, LANES> { - fn $call:ident(self, rhs: Self) -> Self::Output { - unsafe{ $simd_call:ident } - } - })*) => { - $(impl $op for Simd<$scalar, LANES> - where - $scalar: SimdElement, - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - #[must_use = "operator returns a new vector without mutating the inputs"] - fn $call(self, rhs: Self) -> Self::Output { - unsafe { $crate::intrinsics::$simd_call(self, rhs) } - } - } - )* - } +macro_rules! unsafe_base { + ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { + unsafe { $crate::intrinsics::$simd_call($lhs, $rhs) } + }; } /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. @@ -64,388 +47,191 @@ macro_rules! unsafe_base_op { // FIXME: Consider implementing this in cg_llvm instead? // cg_clif defaults to this, and scalar MIR shifts also default to wrapping macro_rules! wrap_bitshift { - ($(impl $op:ident for Simd<$int:ty, LANES> { - fn $call:ident(self, rhs: Self) -> Self::Output { - unsafe { $simd_call:ident } + ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { + unsafe { + $crate::intrinsics::$simd_call($lhs, $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1))) } - })*) => { - $(impl $op for Simd<$int, LANES> - where - $int: SimdElement, - LaneCount: SupportedLaneCount, - { - type Output = Self; - - #[inline] - #[must_use = "operator returns a new vector without mutating the inputs"] - fn $call(self, rhs: Self) -> Self::Output { - unsafe { - $crate::intrinsics::$simd_call(self, rhs.bitand(Simd::splat(<$int>::BITS as $int - 1))) - } - } - })* }; } -macro_rules! bitops { - ($(impl BitOps for Simd<$int:ty, LANES> { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - })*) => { - $( - unsafe_base_op!{ - impl BitAnd for Simd<$int, LANES> { - fn bitand(self, rhs: Self) -> Self::Output { - unsafe { simd_and } - } - } - - impl BitOr for Simd<$int, LANES> { - fn bitor(self, rhs: Self) -> Self::Output { - unsafe { simd_or } - } - } - - impl BitXor for Simd<$int, LANES> { - fn bitxor(self, rhs: Self) -> Self::Output { - unsafe { simd_xor } - } - } - } - wrap_bitshift! { - impl Shl for Simd<$int, LANES> { - fn shl(self, rhs: Self) -> Self::Output { - unsafe { simd_shl } - } - } - - impl Shr for Simd<$int, LANES> { - fn shr(self, rhs: Self) -> Self::Output { - // This automatically monomorphizes to lshr or ashr, depending, - // so it's fine to use it for both UInts and SInts. - unsafe { simd_shr } - } - } - } - )* - }; -} - -// Integers can always accept bitand, bitor, and bitxor. -// The only question is how to handle shifts >= ::BITS? -// Our current solution uses wrapping logic. -bitops! { - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } - - impl BitOps for Simd { - fn bitand(self, rhs: Self) -> Self::Output; - fn bitor(self, rhs: Self) -> Self::Output; - fn bitxor(self, rhs: Self) -> Self::Output; - fn shl(self, rhs: Self) -> Self::Output; - fn shr(self, rhs: Self) -> Self::Output; - } -} - -macro_rules! float_arith { - ($(impl FloatArith for Simd<$float:ty, LANES> { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; - })*) => { - $( - unsafe_base_op!{ - impl Add for Simd<$float, LANES> { - fn add(self, rhs: Self) -> Self::Output { - unsafe { simd_add } - } - } - - impl Mul for Simd<$float, LANES> { - fn mul(self, rhs: Self) -> Self::Output { - unsafe { simd_mul } - } - } - - impl Sub for Simd<$float, LANES> { - fn sub(self, rhs: Self) -> Self::Output { - unsafe { simd_sub } - } - } - - impl Div for Simd<$float, LANES> { - fn div(self, rhs: Self) -> Self::Output { - unsafe { simd_div } - } - } - - impl Rem for Simd<$float, LANES> { - fn rem(self, rhs: Self) -> Self::Output { - unsafe { simd_rem } - } - } - } - )* - }; -} - -// We don't need any special precautions here: -// Floats always accept arithmetic ops, but may become NaN. -float_arith! { - impl FloatArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; - } - - impl FloatArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; - } -} - // Division by zero is poison, according to LLVM. // So is dividing the MIN value of a signed integer by -1, // since that would return MAX + 1. // FIXME: Rust allows ::MIN / -1, // so we should probably figure out how to make that safe. macro_rules! int_divrem_guard { - ($(impl $op:ident for Simd<$sint:ty, LANES> { - const PANIC_ZERO: &'static str = $zero:literal; - const PANIC_OVERFLOW: &'static str = $overflow:literal; - fn $call:ident { - unsafe { $simd_call:ident } - } - })*) => { - $(impl $op for Simd<$sint, LANES> - where - $sint: SimdElement, - LaneCount: SupportedLaneCount, + ( $lhs:ident, + $rhs:ident, + { const PANIC_ZERO: &'static str = $zero:literal; + const PANIC_OVERFLOW: &'static str = $overflow:literal; + $simd_call:ident + }, + $int:ident ) => { + if $rhs.lanes_eq(Simd::splat(0)).any() { + panic!($zero); + } else if <$int>::MIN != 0 + && $lhs.lanes_eq(Simd::splat(<$int>::MIN)) & $rhs.lanes_eq(Simd::splat(-1 as _)) + != Mask::splat(false) { - type Output = Self; - #[inline] - #[must_use = "operator returns a new vector without mutating the inputs"] - fn $call(self, rhs: Self) -> Self::Output { - if rhs.lanes_eq(Simd::splat(0)).any() { - panic!("attempt to calculate the remainder with a divisor of zero"); - } else if <$sint>::MIN != 0 && self.lanes_eq(Simd::splat(<$sint>::MIN)) & rhs.lanes_eq(Simd::splat(-1 as _)) - != Mask::splat(false) - { - panic!("attempt to calculate the remainder with overflow"); - } else { - unsafe { $crate::intrinsics::$simd_call(self, rhs) } - } - } - })* + panic!($overflow); + } else { + unsafe { $crate::intrinsics::$simd_call($lhs, $rhs) } + } }; } -macro_rules! int_arith { - ($(impl IntArith for Simd<$sint:ty, LANES> { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; - })*) => { - $( - unsafe_base_op!{ - impl Add for Simd<$sint, LANES> { - fn add(self, rhs: Self) -> Self::Output { - unsafe { simd_add } - } - } +macro_rules! for_base_types { + ( T = ($($scalar:ident),*); + type Lhs = Simd; + type Rhs = Simd; + type Output = $out:ty; - impl Mul for Simd<$sint, LANES> { - fn mul(self, rhs: Self) -> Self::Output { - unsafe { simd_mul } - } - } + impl $op:ident::$call:ident { + $macro_impl:ident $inner:tt + }) => { + $( + impl $op for Simd<$scalar, N> + where + $scalar: SimdElement, + LaneCount: SupportedLaneCount, + { + type Output = $out; - impl Sub for Simd<$sint, LANES> { - fn sub(self, rhs: Self) -> Self::Output { - unsafe { simd_sub } - } - } + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: Self) -> Self::Output { + $macro_impl!(self, rhs, $inner, $scalar) + } + })* + } +} + +// A "TokenTree muncher": takes a set of scalar types `T = {};` +// type parameters for the ops it implements, `Op::fn` names, +// and a macro that expands into an expr, substituting in an intrinsic. +// It passes that to for_base_types, which expands an impl for the types, +// using the expanded expr in the function, and recurses with itself. +// +// tl;dr impls a set of ops::{Traits} for a set of types +macro_rules! for_base_ops { + ( + T = $types:tt; + type Lhs = Simd; + type Rhs = Simd; + type Output = $out:ident; + impl $op:ident::$call:ident + $inner:tt + $($rest:tt)* + ) => { + for_base_types! { + T = $types; + type Lhs = Simd; + type Rhs = Simd; + type Output = $out; + impl $op::$call + $inner } - - int_divrem_guard!{ - impl Div for Simd<$sint, LANES> { - const PANIC_ZERO: &'static str = "attempt to divide by zero"; - const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow"; - fn div { - unsafe { simd_div } - } - } - - impl Rem for Simd<$sint, LANES> { - const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; - const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow"; - fn rem { - unsafe { simd_rem } - } - } - })* + for_base_ops! { + T = $types; + type Lhs = Simd; + type Rhs = Simd; + type Output = $out; + $($rest)* + } + }; + ($($done:tt)*) => { + // Done. } } -int_arith! { - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; +// Integers can always accept add, mul, sub, bitand, bitor, and bitxor. +// For all of these operations, simd_* intrinsics apply wrapping logic. +for_base_ops! { + T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); + type Lhs = Simd; + type Rhs = Simd; + type Output = Self; + + impl Add::add { + unsafe_base { simd_add } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl Mul::mul { + unsafe_base { simd_mul } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl Sub::sub { + unsafe_base { simd_sub } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl BitAnd::bitand { + unsafe_base { simd_and } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl BitOr::bitor { + unsafe_base { simd_or } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl BitXor::bitxor { + unsafe_base { simd_xor } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl Div::div { + int_divrem_guard { + const PANIC_ZERO: &'static str = "attempt to divide by zero"; + const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow"; + simd_div + } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl Rem::rem { + int_divrem_guard { + const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; + const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow"; + simd_rem + } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + // The only question is how to handle shifts >= ::BITS? + // Our current solution uses wrapping logic. + impl Shl::shl { + wrap_bitshift { simd_shl } } - impl IntArith for Simd { - fn add(self, rhs: Self) -> Self::Output; - fn mul(self, rhs: Self) -> Self::Output; - fn sub(self, rhs: Self) -> Self::Output; - fn div(self, rhs: Self) -> Self::Output; - fn rem(self, rhs: Self) -> Self::Output; + impl Shr::shr { + wrap_bitshift { + // This automatically monomorphizes to lshr or ashr, depending, + // so it's fine to use it for both UInts and SInts. + simd_shr + } + } +} + +// We don't need any special precautions here: +// Floats always accept arithmetic ops, but may become NaN. +for_base_ops! { + T = (f32, f64); + type Lhs = Simd; + type Rhs = Simd; + type Output = Self; + + impl Add::add { + unsafe_base { simd_add } + } + + impl Mul::mul { + unsafe_base { simd_mul } + } + + impl Sub::sub { + unsafe_base { simd_sub } + } + + impl Div::div { + unsafe_base { simd_div } + } + + impl Rem::rem { + unsafe_base { simd_rem } } } From a42420583bdb6ea788c2f7ec0a0360d99934f2a7 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 23 Dec 2021 23:14:13 -0800 Subject: [PATCH 021/161] Use Mask::any in div check --- crates/core_simd/src/ops.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 6cfc8f80b53..82b007aa696 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::ops::{Add, Mul}; use core::ops::{BitAnd, BitOr, BitXor}; use core::ops::{Div, Rem, Sub}; @@ -70,8 +70,7 @@ macro_rules! int_divrem_guard { if $rhs.lanes_eq(Simd::splat(0)).any() { panic!($zero); } else if <$int>::MIN != 0 - && $lhs.lanes_eq(Simd::splat(<$int>::MIN)) & $rhs.lanes_eq(Simd::splat(-1 as _)) - != Mask::splat(false) + && ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) & $rhs.lanes_eq(Simd::splat(-1 as _))).any() { panic!($overflow); } else { From ecc00efee0f4bf950f6fa8ee00d88fefa73a8c0b Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 21 Dec 2021 15:00:35 -0800 Subject: [PATCH 022/161] impl std::simd::StdFloat While consulting with Simulacrum on how to make available the float functions that currently require runtime support for `Simd` and `Simd`, we realized breaking coherence with the classic approach of lang items was, since `{core,std}::simd::Simd` is a `ty::Adt`, likely to be quite a bit nasty. The project group has a long-term plan for how to get around this kind of issue and move the associated functions into libcore, but that will likely take time as well. Since all routes forward are temporally costly, we probably will skip the lang item approach entirely and go the "proper" route, but in the interests of having something this year for people to play around with, this extension trait was whipped up. For now, while it involves a lot of fairly internal details most users shouldn't have to care about, I went ahead and fully documented the situation for any passerby to read on the trait, as the situation is quite unusual and puzzling to begin with. --- Cargo.toml | 1 + crates/std_float/Cargo.toml | 13 +++ crates/std_float/src/lib.rs | 165 ++++++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 crates/std_float/Cargo.toml create mode 100644 crates/std_float/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 3f1abd73519..9802386e456 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,5 +2,6 @@ members = [ "crates/core_simd", + "crates/std_float", "crates/test_helpers", ] diff --git a/crates/std_float/Cargo.toml b/crates/std_float/Cargo.toml new file mode 100644 index 00000000000..82f66b8dcb7 --- /dev/null +++ b/crates/std_float/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "std_float" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +core_simd = { path = "../core_simd" } + +[features] +default = ["as_crate"] +as_crate = [] diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs new file mode 100644 index 00000000000..4bd4d4c05e3 --- /dev/null +++ b/crates/std_float/src/lib.rs @@ -0,0 +1,165 @@ +#![cfg_attr(feature = "as_crate", no_std)] // We are std! +#![cfg_attr( + feature = "as_crate", + feature(platform_intrinsics), + feature(portable_simd) +)] +#[cfg(not(feature = "as_crate"))] +use core::simd; +#[cfg(feature = "as_crate")] +use core_simd::simd; + +use simd::{LaneCount, Simd, SupportedLaneCount}; + +#[cfg(feature = "as_crate")] +mod experimental { + pub trait Sealed {} +} + +#[cfg(feature = "as_crate")] +use experimental as sealed; + +use crate::sealed::Sealed; + +// "platform intrinsics" are essentially "codegen intrinsics" +// each of these may be scalarized and lowered to a libm call +extern "platform-intrinsic" { + // ceil + fn simd_ceil(x: T) -> T; + + // floor + fn simd_floor(x: T) -> T; + + // round + fn simd_round(x: T) -> T; + + // trunc + fn simd_trunc(x: T) -> T; + + // fsqrt + fn simd_fsqrt(x: T) -> T; + + // fma + fn simd_fma(x: T, y: T, z: T) -> T; +} + +/// This trait provides a possibly-temporary implementation of float functions +/// that may, in the absence of hardware support, canonicalize to calling an +/// operating system's `math.h` dynamically-loaded library (also known as a +/// shared object). As these conditionally require runtime support, they +/// should only appear in binaries built assuming OS support: `std`. +/// +/// However, there is no reason SIMD types, in general, need OS support, +/// as for many architectures an embedded binary may simply configure that +/// support itself. This means these types must be visible in `core` +/// but have these functions available in `std`. +/// +/// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but +/// due to compiler limitations, it is harder to implement this approach for +/// abstract data types like [`Simd`]. From that need, this trait is born. +/// +/// It is possible this trait will be replaced in some manner in the future, +/// when either the compiler or its supporting runtime functions are improved. +/// For now this trait is available to permit experimentation with SIMD float +/// operations that may lack hardware support, such as `mul_add`. +pub trait StdFloat: Sealed + Sized { + /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, + /// yielding a more accurate result than an unfused multiply-add. + /// + /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target + /// architecture has a dedicated `fma` CPU instruction. However, this is not always + /// true, and will be heavily dependent on designing algorithms with specific target + /// hardware in mind. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn mul_add(self, a: Self, b: Self) -> Self { + unsafe { simd_fma(self, a, b) } + } + + /// Produces a vector where every lane has the square root value + /// of the equivalently-indexed lane in `self` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn sqrt(self) -> Self { + unsafe { simd_fsqrt(self) } + } + + /// Returns the smallest integer greater than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn ceil(self) -> Self { + unsafe { simd_ceil(self) } + } + + /// Returns the largest integer value less than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn floor(self) -> Self { + unsafe { simd_floor(self) } + } + + /// Rounds to the nearest integer value. Ties round toward zero. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn round(self) -> Self { + unsafe { simd_round(self) } + } + + /// Returns the floating point's integer value, with its fractional part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn trunc(self) -> Self { + unsafe { simd_trunc(self) } + } + + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn fract(self) -> Self; +} + +impl Sealed for Simd where LaneCount: SupportedLaneCount {} +impl Sealed for Simd where LaneCount: SupportedLaneCount {} + +// We can safely just use all the defaults. +impl StdFloat for Simd +where + LaneCount: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +impl StdFloat for Simd +where + LaneCount: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use simd::*; + + #[test] + fn everything_works() { + let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); + let x2 = x + x; + let _xc = x.ceil(); + let _xf = x.floor(); + let _xr = x.round(); + let _xt = x.trunc(); + let _xfma = x.mul_add(x, x); + let _xsqrt = x.sqrt(); + let _ = x2.abs() * x2; + } +} From af26e3b9fd5c21492eb603ec57bc72aee8e7f84b Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 21 Dec 2021 15:29:29 -0800 Subject: [PATCH 023/161] Tear down and rewrite support for float testing --- crates/core_simd/Cargo.toml | 3 ++ crates/core_simd/examples/nbody.rs | 10 +++---- crates/core_simd/src/intrinsics.rs | 26 ------------------ crates/core_simd/src/round.rs | 41 ---------------------------- crates/core_simd/src/vector/float.rs | 23 ---------------- crates/core_simd/tests/ops_macros.rs | 2 ++ crates/core_simd/tests/round.rs | 2 ++ 7 files changed, 12 insertions(+), 95 deletions(-) diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index a103ef115a5..d2ff5f3b1b1 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -26,3 +26,6 @@ features = ["alloc"] [dev-dependencies.test_helpers] path = "../test_helpers" + +[dev-dependencies] +std_float = { path = "../std_float/", features = ["as_crate"] } diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index 43280feebbd..7b1e6840f64 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -1,11 +1,13 @@ -#![cfg_attr(feature = "std", feature(portable_simd))] +#![feature(portable_simd)] +extern crate std_float; /// Benchmarks game nbody code /// Taken from the `packed_simd` crate /// Run this benchmark with `cargo test --example nbody` -#[cfg(feature = "std")] mod nbody { - use core_simd::*; + use core_simd::simd::*; + #[allow(unused)] // False positive? + use std_float::StdFloat; use std::f64::consts::PI; const SOLAR_MASS: f64 = 4.0 * PI * PI; @@ -167,7 +169,6 @@ mod nbody { } } -#[cfg(feature = "std")] #[cfg(test)] mod tests { // Good enough for demonstration purposes, not going for strictness here. @@ -184,7 +185,6 @@ mod tests { } fn main() { - #[cfg(feature = "std")] { let (energy_before, energy_after) = nbody::run(1000); println!("Energy before: {}", energy_before); diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 6a6d26d10a7..0bc241af1f1 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -87,29 +87,3 @@ extern "platform-intrinsic" { #[allow(unused)] pub(crate) fn simd_select_bitmask(m: M, a: T, b: T) -> T; } - -#[cfg(feature = "std")] -mod std { - extern "platform-intrinsic" { - // ceil - pub(crate) fn simd_ceil(x: T) -> T; - - // floor - pub(crate) fn simd_floor(x: T) -> T; - - // round - pub(crate) fn simd_round(x: T) -> T; - - // trunc - pub(crate) fn simd_trunc(x: T) -> T; - - // fsqrt - pub(crate) fn simd_fsqrt(x: T) -> T; - - // fma - pub(crate) fn simd_fma(x: T, y: T, z: T) -> T; - } -} - -#[cfg(feature = "std")] -pub(crate) use crate::simd::intrinsics::std::*; diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index 09789e11492..06ccab3ec49 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -5,47 +5,6 @@ macro_rules! implement { { $type:ty, $int_type:ty } => { - #[cfg(feature = "std")] - impl Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Returns the smallest integer greater than or equal to each lane. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn ceil(self) -> Self { - unsafe { intrinsics::simd_ceil(self) } - } - - /// Returns the largest integer value less than or equal to each lane. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn floor(self) -> Self { - unsafe { intrinsics::simd_floor(self) } - } - - /// Rounds to the nearest integer value. Ties round toward zero. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn round(self) -> Self { - unsafe { intrinsics::simd_round(self) } - } - - /// Returns the floating point's integer value, with its fractional part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn trunc(self) -> Self { - unsafe { intrinsics::simd_trunc(self) } - } - - /// Returns the floating point's fractional value, with its integer part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn fract(self) -> Self { - self - self.trunc() - } - } - impl Simd<$type, LANES> where LaneCount: SupportedLaneCount, diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index 4a4b23238c4..3528a420351 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -38,29 +38,6 @@ macro_rules! impl_float_vector { unsafe { intrinsics::simd_fabs(self) } } - /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, - /// yielding a more accurate result than an unfused multiply-add. - /// - /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target - /// architecture has a dedicated `fma` CPU instruction. However, this is not always - /// true, and will be heavily dependent on designing algorithms with specific target - /// hardware in mind. - #[cfg(feature = "std")] - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn mul_add(self, a: Self, b: Self) -> Self { - unsafe { intrinsics::simd_fma(self, a, b) } - } - - /// Produces a vector where every lane has the square root value - /// of the equivalently-indexed lane in `self` - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - #[cfg(feature = "std")] - pub fn sqrt(self) -> Self { - unsafe { intrinsics::simd_fsqrt(self) } - } - /// Takes the reciprocal (inverse) of each lane, `1/x`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 43ddde4c55e..4fb9de198ee 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -546,6 +546,8 @@ macro_rules! impl_float_tests { #[cfg(feature = "std")] mod std { + use std_float::StdFloat; + use super::*; test_helpers::test_lanes! { fn sqrt() { diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 11d617a6c2c..1a1bc9ebca7 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -3,6 +3,8 @@ macro_rules! float_rounding_test { { $scalar:tt, $int_scalar:tt } => { mod $scalar { + use std_float::StdFloat; + type Vector = core_simd::Simd<$scalar, LANES>; type Scalar = $scalar; type IntScalar = $int_scalar; From 65cb2c90a0688c110d983a2dbb9932900cd6b5d9 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 9 Jan 2022 13:12:22 -0500 Subject: [PATCH 024/161] Fix mask alias --- crates/core_simd/src/masks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 191e9690313..c1ffcaf9116 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -516,7 +516,7 @@ pub type mask16x8 = Mask; pub type mask16x16 = Mask; /// Vector of 32 16-bit masks -pub type mask16x32 = Mask; +pub type mask16x32 = Mask; /// Vector of two 32-bit masks pub type mask32x2 = Mask; From 138b9cf4bf8f483c60e4454f0a7e64973474ca07 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 13 Jan 2022 17:59:55 -0500 Subject: [PATCH 025/161] Use intrinsic for min/max --- crates/core_simd/src/intrinsics.rs | 4 ++++ crates/core_simd/src/vector/float.rs | 12 ++---------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 0bc241af1f1..70f1d47c08b 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -46,6 +46,10 @@ extern "platform-intrinsic" { /// fabs pub(crate) fn simd_fabs(x: T) -> T; + // minnum/maxnum + pub(crate) fn simd_fmin(x: T, y: T) -> T; + pub(crate) fn simd_fmax(x: T, y: T) -> T; + pub(crate) fn simd_eq(x: T, y: T) -> U; pub(crate) fn simd_ne(x: T, y: T) -> U; pub(crate) fn simd_lt(x: T, y: T) -> U; diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index 3528a420351..0e179d6fa76 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -141,11 +141,7 @@ macro_rules! impl_float_vector { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] pub fn min(self, other: Self) -> Self { - // TODO consider using an intrinsic - self.is_nan().select( - other, - self.lanes_ge(other).select(other, self) - ) + unsafe { intrinsics::simd_fmin(self, other) } } /// Returns the maximum of each lane. @@ -154,11 +150,7 @@ macro_rules! impl_float_vector { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] pub fn max(self, other: Self) -> Self { - // TODO consider using an intrinsic - self.is_nan().select( - other, - self.lanes_le(other).select(other, self) - ) + unsafe { intrinsics::simd_fmax(self, other) } } /// Restrict each lane to a certain interval unless it is NaN. From a4f5f01b8aa92780e695d471e72e699ef10abe30 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sat, 13 Nov 2021 15:06:48 -0800 Subject: [PATCH 026/161] Use intrinsics for Mask::{to,from}_array This significantly simplifies codegen and should improve mask perf. Co-authored-by: Jacob Lifshay --- crates/core_simd/src/masks.rs | 40 +++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index c1ffcaf9116..ae1fef53da8 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -12,9 +12,10 @@ )] mod mask_impl; +use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::cmp::Ordering; -use core::fmt; +use core::{fmt, mem}; mod sealed { use super::*; @@ -105,22 +106,39 @@ where Self(mask_impl::Mask::splat(value)) } - /// Converts an array to a SIMD vector. + /// Converts an array of bools to a SIMD mask. pub fn from_array(array: [bool; LANES]) -> Self { - let mut vector = Self::splat(false); - for (i, v) in array.iter().enumerate() { - vector.set(i, *v); + // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of + // true: 0b_0000_0001 + // false: 0b_0000_0000 + // Thus, an array of bools is also a valid array of bytes: [u8; N] + // This would be hypothetically valid as an "in-place" transmute, + // but these are "dependently-sized" types, so copy elision it is! + unsafe { + let bytes: [u8; LANES] = mem::transmute_copy(&array); + let bools: Simd = + intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); + Mask::from_int_unchecked(intrinsics::simd_cast(bools)) } - vector } - /// Converts a SIMD vector to an array. + /// Converts a SIMD mask to an array of bools. pub fn to_array(self) -> [bool; LANES] { - let mut array = [false; LANES]; - for (i, v) in array.iter_mut().enumerate() { - *v = self.test(i); + // This follows mostly the same logic as from_array. + // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of + // true: 0b_0000_0001 + // false: 0b_0000_0000 + // Thus, an array of bools is also a valid array of bytes: [u8; N] + // Since our masks are equal to integers where all bits are set, + // we can simply convert them to i8s, and then bitand them by the + // bitpattern for Rust's "true" bool. + // This would be hypothetically valid as an "in-place" transmute, + // but these are "dependently-sized" types, so copy elision it is! + unsafe { + let mut bytes: Simd = intrinsics::simd_cast(self.to_int()); + bytes &= Simd::splat(1i8); + mem::transmute_copy(&bytes) } - array } /// Converts a vector of integers to a mask, where 0 represents `false` and -1 From 56566d816deda17b2ddaf3e3e603f2af16e26653 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 20 Jan 2022 15:48:46 -0800 Subject: [PATCH 027/161] Annotate signed type in int_divrem_guard The way the macro expands, it may sometimes infer "this is a uint, but doesn't impl Neg???" Also, I made the "wrong path for intrinsics" error. These fixes allow integration into libcore. --- crates/core_simd/src/ops.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 82b007aa696..b65038933bf 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -33,7 +33,7 @@ where macro_rules! unsafe_base { ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { - unsafe { $crate::intrinsics::$simd_call($lhs, $rhs) } + unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } }; } @@ -49,7 +49,10 @@ macro_rules! unsafe_base { macro_rules! wrap_bitshift { ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { unsafe { - $crate::intrinsics::$simd_call($lhs, $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1))) + $crate::simd::intrinsics::$simd_call( + $lhs, + $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), + ) } }; } @@ -70,11 +73,13 @@ macro_rules! int_divrem_guard { if $rhs.lanes_eq(Simd::splat(0)).any() { panic!($zero); } else if <$int>::MIN != 0 - && ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) & $rhs.lanes_eq(Simd::splat(-1 as _))).any() + && ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) + // type inference can break here, so cut an SInt to size + & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any() { panic!($overflow); } else { - unsafe { $crate::intrinsics::$simd_call($lhs, $rhs) } + unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } } }; } From 4fc62c20829487a87fe161d8f0ea4789df6c1a01 Mon Sep 17 00:00:00 2001 From: Alec Goncharow Date: Sun, 23 Jan 2022 16:42:57 -0500 Subject: [PATCH 028/161] fix documentation typo Remove redundant "neither" in the documentation comment. --- crates/core_simd/src/vector/float.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index 0e179d6fa76..ae900ff32a0 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -105,7 +105,7 @@ macro_rules! impl_float_vector { self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0)) } - /// Returns true for each lane if its value is neither neither zero, infinite, + /// Returns true for each lane if its value is neither zero, infinite, /// subnormal, or `NaN`. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] From 36cca22f16e1e67076ac4490cddd6002b3ddea2b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 24 Jan 2022 20:11:17 -0500 Subject: [PATCH 029/161] Update crates/core_simd/src/vector/float.rs Co-authored-by: Alexander Ronald Altman --- crates/core_simd/src/vector/float.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index ae900ff32a0..fcc7f6d8d1c 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -106,7 +106,7 @@ macro_rules! impl_float_vector { } /// Returns true for each lane if its value is neither zero, infinite, - /// subnormal, or `NaN`. + /// subnormal, nor `NaN`. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_normal(self) -> Mask<$mask_ty, LANES> { From a991d48e95911c0e94f47bda10cbb50200852ec2 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jan 2022 16:58:38 -0800 Subject: [PATCH 030/161] Add Simd::cast --- crates/core_simd/src/intrinsics.rs | 3 +++ crates/core_simd/src/vector.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 70f1d47c08b..2291400537c 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -39,6 +39,9 @@ extern "platform-intrinsic" { /// fptoui/fptosi/uitofp/sitofp pub(crate) fn simd_cast(x: T) -> U; + /// follows Rust's `T as U` semantics, including saturating float casts + /// which amounts to the same as `simd_cast` for many cases + pub(crate) fn simd_as(x: T) -> U; /// neg/fneg pub(crate) fn simd_neg(x: T) -> T; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 7c5ec2bc314..a9e99a18c2d 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -75,6 +75,35 @@ where Self(array) } + /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type. + /// This follows the semantics of Rust's `as` conversion for casting + /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`), + /// and from floats to integers (truncating, or saturating at the limits) for each lane, + /// or vice versa. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "std")] use core_simd::Simd; + /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// let floats: Simd = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]); + /// let ints = floats.cast::(); + /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0])); + /// + /// // Formally equivalent, but `Simd::cast` can optimize better. + /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32))); + /// + /// // The float conversion does not round-trip. + /// let floats_again = ints.cast(); + /// assert_ne!(floats, floats_again); + /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0])); + /// ``` + #[must_use] + #[inline] + pub fn cast(self) -> Simd { + unsafe { intrinsics::simd_as(self) } + } + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. /// If an index is out-of-bounds, the lane is instead selected from the `or` vector. /// From 0031b02cee0c7679120c7a942c378cd13bfb5021 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jan 2022 20:54:05 -0800 Subject: [PATCH 031/161] Add core_simd/tests/cast.rs --- crates/core_simd/tests/cast.rs | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 crates/core_simd/tests/cast.rs diff --git a/crates/core_simd/tests/cast.rs b/crates/core_simd/tests/cast.rs new file mode 100644 index 00000000000..ab5650f0713 --- /dev/null +++ b/crates/core_simd/tests/cast.rs @@ -0,0 +1,37 @@ +#![feature(portable_simd)] +macro_rules! cast_types { + ($start:ident, $($target:ident),*) => { + mod $start { + use core_simd::simd::Simd; + type Vector = Simd<$start, N>; + $( + mod $target { + use super::*; + test_helpers::test_lanes! { + fn cast_as() { + test_helpers::test_unary_elementwise( + &Vector::::cast::<$target>, + &|x| x as $target, + &|_| true, + ) + } + } + } + )* + } + }; +} + +// The hypothesis is that widening conversions aren't terribly interesting. +cast_types!(f32, f64, i8, u8, usize, isize); +cast_types!(f64, f32, i8, u8, usize, isize); +cast_types!(i8, u8, f32); +cast_types!(u8, i8, f32); +cast_types!(i16, u16, i8, u8, f32); +cast_types!(u16, i16, i8, u8, f32); +cast_types!(i32, u32, i8, u8, f32, f64); +cast_types!(u32, i32, i8, u8, f32, f64); +cast_types!(i64, u64, i8, u8, isize, usize, f32, f64); +cast_types!(u64, i64, i8, u8, isize, usize, f32, f64); +cast_types!(isize, usize, i8, u8, f32, f64); +cast_types!(usize, isize, i8, u8, f32, f64); From 03f6fbb21e6050da2a05b3ce8f480c020b384916 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 27 Jan 2022 09:07:15 -0800 Subject: [PATCH 032/161] Omit Simd::cast during bootstrap --- crates/core_simd/src/intrinsics.rs | 1 + crates/core_simd/src/vector.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 2291400537c..233657202f7 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -41,6 +41,7 @@ extern "platform-intrinsic" { pub(crate) fn simd_cast(x: T) -> U; /// follows Rust's `T as U` semantics, including saturating float casts /// which amounts to the same as `simd_cast` for many cases + #[cfg(not(bootstrap))] pub(crate) fn simd_as(x: T) -> U; /// neg/fneg diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index a9e99a18c2d..b7ef7a56c73 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -100,6 +100,7 @@ where /// ``` #[must_use] #[inline] + #[cfg(not(bootstrap))] pub fn cast(self) -> Simd { unsafe { intrinsics::simd_as(self) } } From ebf65de2ce331f428a174f4ade13f60c8654472f Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 2 Feb 2022 18:14:16 -0800 Subject: [PATCH 033/161] Delete outmoded fn round_from_int --- crates/core_simd/src/round.rs | 7 ------- crates/core_simd/tests/round.rs | 8 -------- 2 files changed, 15 deletions(-) diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index 06ccab3ec49..08339593600 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -22,13 +22,6 @@ macro_rules! implement { pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> { unsafe { intrinsics::simd_cast(self) } } - - /// Creates a floating-point vector from an integer vector. Rounds values that are - /// not exactly representable. - #[inline] - pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self { - unsafe { intrinsics::simd_cast(value) } - } } } } diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 1a1bc9ebca7..90547d5782c 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -53,14 +53,6 @@ macro_rules! float_rounding_test { } test_helpers::test_lanes! { - fn from_int() { - test_helpers::test_unary_elementwise( - &Vector::::round_from_int, - &|x| x as Scalar, - &|_| true, - ) - } - fn to_int_unchecked() { // The maximum integer that can be represented by the equivalently sized float has // all of the mantissa digits set to 1, pushed up to the MSB. From 4910274686bcd144228a04d8d4d5dece4c7f5e3d Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 2 Feb 2022 18:21:12 -0800 Subject: [PATCH 034/161] Genericize to_int_unchecked --- crates/core_simd/src/lib.rs | 1 + crates/core_simd/src/round.rs | 15 ++++++++++----- crates/core_simd/tests/round.rs | 6 +++--- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 960a6640083..41f64e972d9 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,6 +1,7 @@ #![cfg_attr(not(feature = "std"), no_std)] #![feature( const_fn_trait_bound, + convert_float_to_int, decl_macro, platform_intrinsics, repr_simd, diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index 08339593600..f1724cbc263 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -1,9 +1,10 @@ use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use core::convert::FloatToInt; macro_rules! implement { { - $type:ty, $int_type:ty + $type:ty } => { impl Simd<$type, LANES> where @@ -19,12 +20,16 @@ macro_rules! implement { /// * Not be infinite /// * Be representable in the return type, after truncating off its fractional part #[inline] - pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> { + pub unsafe fn to_int_unchecked(self) -> Simd + where + $type: FloatToInt, + I: SimdElement, + { unsafe { intrinsics::simd_cast(self) } } } } } -implement! { f32, i32 } -implement! { f64, i64 } +implement! { f32 } +implement! { f64 } diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 90547d5782c..53732329237 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -64,11 +64,11 @@ macro_rules! float_rounding_test { runner.run( &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE), |x| { - let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() }; + let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::().to_array() }; let result_2 = { - let mut result = [0; LANES]; + let mut result: [IntScalar; LANES] = [0; LANES]; for (i, o) in x.iter().zip(result.iter_mut()) { - *o = unsafe { i.to_int_unchecked() }; + *o = unsafe { i.to_int_unchecked::() }; } result }; From 672bfebfd89b7d7ebdac3dbcf714c6010430d5fc Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Mon, 7 Feb 2022 21:24:21 -0800 Subject: [PATCH 035/161] Remove overflow panic from divrem Includes some remarks in intrinsics.rs, generated while auditing the interface for remaining UB. --- crates/core_simd/src/intrinsics.rs | 9 +++++++ crates/core_simd/src/ops.rs | 39 +++++++++++++++++----------- crates/core_simd/tests/ops_macros.rs | 24 ++++++++--------- 3 files changed, 45 insertions(+), 27 deletions(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 233657202f7..b5d0df7548f 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -17,9 +17,15 @@ extern "platform-intrinsic" { pub(crate) fn simd_mul(x: T, y: T) -> T; /// udiv/sdiv/fdiv + /// ints and uints: {s,u}div incur UB if division by zero occurs. + /// ints: sdiv is UB for int::MIN / -1. + /// floats: fdiv is never UB, but may create NaNs or infinities. pub(crate) fn simd_div(x: T, y: T) -> T; /// urem/srem/frem + /// ints and uints: {s,u}rem incur UB if division by zero occurs. + /// ints: srem is UB for int::MIN / -1. + /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno. pub(crate) fn simd_rem(x: T, y: T) -> T; /// shl @@ -45,6 +51,9 @@ extern "platform-intrinsic" { pub(crate) fn simd_as(x: T) -> U; /// neg/fneg + /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`. + /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it. + /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`. pub(crate) fn simd_neg(x: T) -> T; /// fabs diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index b65038933bf..1b35b3e717a 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -57,29 +57,40 @@ macro_rules! wrap_bitshift { }; } -// Division by zero is poison, according to LLVM. -// So is dividing the MIN value of a signed integer by -1, -// since that would return MAX + 1. -// FIXME: Rust allows ::MIN / -1, -// so we should probably figure out how to make that safe. +/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. +/// It guards against LLVM's UB conditions for integer div or rem using masks and selects, +/// thus guaranteeing a Rust value returns instead. +/// +/// | | LLVM | Rust +/// | :--------------: | :--- | :---------- +/// | N {/,%} 0 | UB | panic!() +/// | <$int>::MIN / -1 | UB | <$int>::MIN +/// | <$int>::MIN % -1 | UB | 0 +/// macro_rules! int_divrem_guard { ( $lhs:ident, $rhs:ident, { const PANIC_ZERO: &'static str = $zero:literal; - const PANIC_OVERFLOW: &'static str = $overflow:literal; $simd_call:ident }, $int:ident ) => { if $rhs.lanes_eq(Simd::splat(0)).any() { panic!($zero); - } else if <$int>::MIN != 0 - && ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) - // type inference can break here, so cut an SInt to size - & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any() - { - panic!($overflow); } else { - unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } + // Prevent otherwise-UB overflow on the MIN / -1 case. + let rhs = if <$int>::MIN != 0 { + // This should, at worst, optimize to a few branchless logical ops + // Ideally, this entire conditional should evaporate + // Fire LLVM and implement those manually if it doesn't get the hint + ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) + // type inference can break here, so cut an SInt to size + & $rhs.lanes_eq(Simd::splat(-1i64 as _))) + .select(Simd::splat(1), $rhs) + } else { + // Nice base case to make it easy to const-fold away the other branch. + $rhs + }; + unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) } } }; } @@ -183,7 +194,6 @@ for_base_ops! { impl Div::div { int_divrem_guard { const PANIC_ZERO: &'static str = "attempt to divide by zero"; - const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow"; simd_div } } @@ -191,7 +201,6 @@ for_base_ops! { impl Rem::rem { int_divrem_guard { const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; - const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow"; simd_rem } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 4fb9de198ee..9ba66fb8dd9 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -210,15 +210,21 @@ macro_rules! impl_signed_tests { ) } + fn div_min_may_overflow() { + let a = Vector::::splat(Scalar::MIN); + let b = Vector::::splat(-1); + assert_eq!(a / b, a / (b * b)); + } + + fn rem_min_may_overflow() { + let a = Vector::::splat(Scalar::MIN); + let b = Vector::::splat(-1); + assert_eq!(a % b, a % (b * b)); + } + } test_helpers::test_lanes_panic! { - fn div_min_overflow_panics() { - let a = Vector::::splat(Scalar::MIN); - let b = Vector::::splat(-1); - let _ = a / b; - } - fn div_by_all_zeros_panics() { let a = Vector::::splat(42); let b = Vector::::splat(0); @@ -232,12 +238,6 @@ macro_rules! impl_signed_tests { let _ = a / b; } - fn rem_min_overflow_panic() { - let a = Vector::::splat(Scalar::MIN); - let b = Vector::::splat(-1); - let _ = a % b; - } - fn rem_zero_panic() { let a = Vector::::splat(42); let b = Vector::::splat(0); From e628a2991c47a771340cc5f8f06826c918f79609 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 8 Feb 2022 14:15:45 -0800 Subject: [PATCH 036/161] Document Simd is Simd, N> and other quirks like panicking and the equivalence to zipping and mapping binary ops --- crates/core_simd/src/vector.rs | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index b7ef7a56c73..0a2f681f66b 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -12,7 +12,39 @@ pub(crate) mod ptr; use crate::simd::intrinsics; use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount}; -/// A SIMD vector of `LANES` elements of type `T`. +/// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. +/// This type is commonly known by names like `f32x4` or `Vec4` in many programming languages. +/// +/// Two vectors of the same type and length will, by convention, support the binary operations (+, *, etc.) that `T` does. +/// These take the lanes at each index on the left-hand side and right-hand side, perform the binary operation, +/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping +/// the two arrays together and mapping the operator over each lane. +/// +/// ```rust +/// # #![feature(array_zip, portable_simd)] +/// # use core::simd::{Simd}; +/// let a0: [i32; 4] = [-2, 0, 2, 4]; +/// let a1 = [10, 9, 8, 7]; +/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs); +/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs); +/// +/// // `Simd` implements `From<[T; N]> +/// let [v0, v1] = [a0, a1].map(|a| Simd::from(a)); +/// // Which means arrays implement `Into>`. +/// assert_eq!(v0 + v1, zm_add.into()); +/// assert_eq!(v0 * v1, zm_mul.into()); +/// ``` +/// +/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping`]. +/// Thus, `Simd` does not implement `wrapping_add`, because that is the behavior of the normal operation. +/// This means there is no warning on overflows, even in "debug" builds. +/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap, +/// and even "debug builds" are unlikely to tolerate the loss of performance. +/// You may want to consider using explicitly checked arithmetic if such is required. +/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable. +/// +/// [`Wrapping`]: core::num::Wrapping +/// #[repr(simd)] pub struct Simd([T; LANES]) where From 5d52455c65bf5a5eb258ed11591b8ebfa61ea5c7 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Tue, 8 Feb 2022 17:38:21 -0800 Subject: [PATCH 037/161] Review for clarity and concision Co-authored-by: Caleb Zulawski --- crates/core_simd/src/vector.rs | 9 ++++----- crates/core_simd/tests/ops_macros.rs | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 0a2f681f66b..5bd8ed69535 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -13,10 +13,9 @@ use crate::simd::intrinsics; use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount}; /// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. -/// This type is commonly known by names like `f32x4` or `Vec4` in many programming languages. /// -/// Two vectors of the same type and length will, by convention, support the binary operations (+, *, etc.) that `T` does. -/// These take the lanes at each index on the left-hand side and right-hand side, perform the binary operation, +/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does. +/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation, /// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping /// the two arrays together and mapping the operator over each lane. /// @@ -29,14 +28,14 @@ use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount}; /// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs); /// /// // `Simd` implements `From<[T; N]> -/// let [v0, v1] = [a0, a1].map(|a| Simd::from(a)); +/// let (v0, v1) = (Simd::from(a0), Simd::from(a1)); /// // Which means arrays implement `Into>`. /// assert_eq!(v0 + v1, zm_add.into()); /// assert_eq!(v0 * v1, zm_mul.into()); /// ``` /// /// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping`]. -/// Thus, `Simd` does not implement `wrapping_add`, because that is the behavior of the normal operation. +/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior. /// This means there is no warning on overflows, even in "debug" builds. /// For most applications where `Simd` is appropriate, it is "not a bug" to wrap, /// and even "debug builds" are unlikely to tolerate the loss of performance. diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 9ba66fb8dd9..50f7a4ca170 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -213,13 +213,13 @@ macro_rules! impl_signed_tests { fn div_min_may_overflow() { let a = Vector::::splat(Scalar::MIN); let b = Vector::::splat(-1); - assert_eq!(a / b, a / (b * b)); + assert_eq!(a / b, a); } fn rem_min_may_overflow() { let a = Vector::::splat(Scalar::MIN); let b = Vector::::splat(-1); - assert_eq!(a % b, a % (b * b)); + assert_eq!(a % b, Vector::::splat(0)); } } From dddfffcfb3e49c752d6a28039735ada2552d0307 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 5 Nov 2021 01:42:29 +0000 Subject: [PATCH 038/161] Add some safety comments --- crates/core_simd/src/comparisons.rs | 12 ++++++++++++ crates/core_simd/src/masks.rs | 9 +++++++++ crates/core_simd/src/masks/bitmask.rs | 1 + crates/core_simd/src/masks/full_masks.rs | 6 ++++++ crates/core_simd/src/math.rs | 4 ++++ crates/core_simd/src/reduction.rs | 8 ++++++++ crates/core_simd/src/swizzle.rs | 2 ++ crates/core_simd/src/to_bytes.rs | 2 ++ crates/core_simd/src/vector.rs | 12 +++++++----- crates/core_simd/src/vector/ptr.rs | 4 ++++ crates/core_simd/src/vendor.rs | 2 ++ 11 files changed, 57 insertions(+), 5 deletions(-) diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs index edef5af3687..d024cf4ddbe 100644 --- a/crates/core_simd/src/comparisons.rs +++ b/crates/core_simd/src/comparisons.rs @@ -10,6 +10,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_eq(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } } @@ -17,6 +19,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_ne(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } } } @@ -30,6 +34,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_lt(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } } @@ -37,6 +43,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_gt(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } } @@ -44,6 +52,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_le(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } } @@ -51,6 +61,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_ge(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } } } diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index ae1fef53da8..b1f98d9eb4e 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -42,6 +42,9 @@ mod sealed { use sealed::Sealed; /// Marker trait for types that may be used as SIMD mask elements. +/// +/// # Safety +/// Type must be a signed integer. pub unsafe trait MaskElement: SimdElement + Sealed {} macro_rules! impl_element { @@ -149,6 +152,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { + // Safety: the caller must confirm this invariant unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } } @@ -161,6 +165,7 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_int(value: Simd) -> Self { assert!(T::valid(value), "all values must be either 0 or -1",); + // Safety: the validity has been checked unsafe { Self::from_int_unchecked(value) } } @@ -179,6 +184,7 @@ where #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, lane: usize) -> bool { + // Safety: the caller must confirm this invariant unsafe { self.0.test_unchecked(lane) } } @@ -190,6 +196,7 @@ where #[must_use = "method returns a new bool and does not mutate the original value"] pub fn test(&self, lane: usize) -> bool { assert!(lane < LANES, "lane index out of range"); + // Safety: the lane index has been checked unsafe { self.test_unchecked(lane) } } @@ -199,6 +206,7 @@ where /// `lane` must be less than `LANES`. #[inline] pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { + // Safety: the caller must confirm this invariant unsafe { self.0.set_unchecked(lane, value); } @@ -211,6 +219,7 @@ where #[inline] pub fn set(&mut self, lane: usize, value: bool) { assert!(lane < LANES, "lane index out of range"); + // Safety: the lane index has been checked unsafe { self.set_unchecked(lane, value); } diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index b4217dc87ba..b7f8b2c236e 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -137,6 +137,7 @@ where where U: MaskElement, { + // Safety: bitmask layout does not depend on the element width unsafe { core::mem::transmute_copy(&self) } } diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index e5bb784bb91..02b5593c8f4 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -106,6 +106,7 @@ where where U: MaskElement, { + // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type. unsafe { Mask(intrinsics::simd_cast(self.0)) } } @@ -155,12 +156,14 @@ where #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { + // Safety: use `self` as an integer vector unsafe { intrinsics::simd_reduce_any(self.to_int()) } } #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] pub fn all(self) -> bool { + // Safety: use `self` as an integer vector unsafe { intrinsics::simd_reduce_all(self.to_int()) } } } @@ -184,6 +187,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) } } } @@ -197,6 +201,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) } } } @@ -210,6 +215,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) } } } diff --git a/crates/core_simd/src/math.rs b/crates/core_simd/src/math.rs index 7435b6df918..0b4e40983af 100644 --- a/crates/core_simd/src/math.rs +++ b/crates/core_simd/src/math.rs @@ -22,6 +22,7 @@ macro_rules! impl_uint_arith { /// ``` #[inline] pub fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_add(self, second) } } @@ -41,6 +42,7 @@ macro_rules! impl_uint_arith { /// assert_eq!(sat, Simd::splat(0)); #[inline] pub fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_sub(self, second) } } })+ @@ -68,6 +70,7 @@ macro_rules! impl_int_arith { /// ``` #[inline] pub fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_add(self, second) } } @@ -87,6 +90,7 @@ macro_rules! impl_int_arith { /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0])); #[inline] pub fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_sub(self, second) } } diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index e79a185816b..e1cd743e442 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -14,24 +14,28 @@ macro_rules! impl_integer_reductions { /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. #[inline] pub fn horizontal_sum(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_add_ordered(self, 0) } } /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. #[inline] pub fn horizontal_product(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_mul_ordered(self, 1) } } /// Horizontal maximum. Returns the maximum lane in the vector. #[inline] pub fn horizontal_max(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_max(self) } } /// Horizontal minimum. Returns the minimum lane in the vector. #[inline] pub fn horizontal_min(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_min(self) } } } @@ -63,6 +67,7 @@ macro_rules! impl_float_reductions { if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().sum() } else { + // Safety: `self` is a float vector unsafe { simd_reduce_add_ordered(self, 0.) } } } @@ -74,6 +79,7 @@ macro_rules! impl_float_reductions { if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().product() } else { + // Safety: `self` is a float vector unsafe { simd_reduce_mul_ordered(self, 1.) } } } @@ -84,6 +90,7 @@ macro_rules! impl_float_reductions { /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] pub fn horizontal_max(self) -> $scalar { + // Safety: `self` is a float vector unsafe { simd_reduce_max(self) } } @@ -93,6 +100,7 @@ macro_rules! impl_float_reductions { /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] pub fn horizontal_min(self) -> $scalar { + // Safety: `self` is a float vector unsafe { simd_reduce_min(self) } } } diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index bdc489774a5..08b2add1166 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -95,6 +95,7 @@ pub trait Swizzle { LaneCount: SupportedLaneCount, LaneCount: SupportedLaneCount, { + // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32. unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) } } } @@ -119,6 +120,7 @@ pub trait Swizzle2 { LaneCount: SupportedLaneCount, LaneCount: SupportedLaneCount, { + // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32. unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) } } } diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index 8d9b3e8ff85..b36b1a347b2 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -8,12 +8,14 @@ macro_rules! impl_to_bytes { /// Return the memory representation of this integer as a byte array in native byte /// order. pub fn to_ne_bytes(self) -> crate::simd::Simd { + // Safety: transmuting between vectors is safe unsafe { core::mem::transmute_copy(&self) } } /// Create a native endian integer value from its memory representation as a byte array /// in native endianness. pub fn from_ne_bytes(bytes: crate::simd::Simd) -> Self { + // Safety: transmuting between vectors is safe unsafe { core::mem::transmute_copy(&bytes) } } } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 5bd8ed69535..e452fa8bfc8 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -206,7 +206,7 @@ where or: Self, ) -> Self { let enable: Mask = enable & idxs.lanes_lt(Simd::splat(slice.len())); - // SAFETY: We have masked-off out-of-bounds lanes. + // Safety: We have masked-off out-of-bounds lanes. unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) } } @@ -247,7 +247,7 @@ where let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr()); // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); - // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah + // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) } } @@ -299,7 +299,7 @@ where idxs: Simd, ) { let enable: Mask = enable & idxs.lanes_lt(Simd::splat(slice.len())); - // SAFETY: We have masked-off out-of-bounds lanes. + // Safety: We have masked-off out-of-bounds lanes. unsafe { self.scatter_select_unchecked(slice, enable, idxs) } } @@ -338,7 +338,7 @@ where enable: Mask, idxs: Simd, ) { - // SAFETY: This block works with *mut T derived from &mut 'a [T], + // Safety: This block works with *mut T derived from &mut 'a [T], // which means it is delicate in Rust's borrowing model, circa 2021: // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts! // Even though this block is largely safe methods, it must be exactly this way @@ -518,7 +518,9 @@ mod sealed { use sealed::Sealed; /// Marker trait for types that may be used as SIMD vector elements. -/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize +/// +/// # Safety +/// This trait, when implemented, asserts the compiler can monomorphize /// `#[repr(simd)]` structs with the marked type as an element. /// Strictly, it is valid to impl if the vector will not be miscompiled. /// Practically, it is user-unfriendly to impl it if the vector won't compile, diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs index c668d9a6eae..417d255c28d 100644 --- a/crates/core_simd/src/vector/ptr.rs +++ b/crates/core_simd/src/vector/ptr.rs @@ -21,6 +21,8 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { + // Safety: converting pointers to usize and vice-versa is safe + // (even if using that pointer is not) unsafe { let x: Simd = mem::transmute_copy(&self); mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) @@ -47,6 +49,8 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { + // Safety: converting pointers to usize and vice-versa is safe + // (even if using that pointer is not) unsafe { let x: Simd = mem::transmute_copy(&self); mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) diff --git a/crates/core_simd/src/vendor.rs b/crates/core_simd/src/vendor.rs index e8ce7176b4f..9fb70218c95 100644 --- a/crates/core_simd/src/vendor.rs +++ b/crates/core_simd/src/vendor.rs @@ -9,6 +9,8 @@ macro_rules! from_transmute { impl core::convert::From<$from> for $to { #[inline] fn from(value: $from) -> $to { + // Safety: transmuting between vectors is safe, but the caller of this macro + // checks the invariants unsafe { core::mem::transmute(value) } } } From 78a18c3433c13bbd6259242185667c385eafe859 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Thu, 10 Feb 2022 09:32:44 -0800 Subject: [PATCH 039/161] rust-lang/portable-simd#245: Explain unsafe contracts of core::simd * Explain unsafe contracts of core::simd This permeates the module with remarks on safety for pub methods, layout of the Simd type, correct use of intrinsics, et cetera. This is mostly to help others curious about how core::simd works, including other Rust contributors, `unsafe` library authors, and eventually ourselves. --- crates/core_simd/src/intrinsics.rs | 59 ++++++++++++++++++++++++------ crates/core_simd/src/lib.rs | 1 + crates/core_simd/src/round.rs | 5 +++ crates/core_simd/src/select.rs | 4 ++ crates/core_simd/src/vector.rs | 42 +++++++++++++++++++++ 5 files changed, 100 insertions(+), 11 deletions(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index b5d0df7548f..e150946c705 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -2,16 +2,31 @@ //! crate. //! //! The LLVM assembly language is documented here: +//! +//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef: +//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT +//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons, +//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either. +//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note: +//! this means that division by poison or undef is like division by zero, which means it inflicts... +//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program: +//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end". +//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior, +//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract. +//! +//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory. +//! +//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths. /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are -/// simply lowered to the matching LLVM instructions by the compiler. The associated instruction -/// is documented alongside each intrinsic. +/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner. +/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function. extern "platform-intrinsic" { /// add/fadd pub(crate) fn simd_add(x: T, y: T) -> T; /// sub/fsub - pub(crate) fn simd_sub(x: T, y: T) -> T; + pub(crate) fn simd_sub(lhs: T, rhs: T) -> T; /// mul/fmul pub(crate) fn simd_mul(x: T, y: T) -> T; @@ -20,19 +35,22 @@ extern "platform-intrinsic" { /// ints and uints: {s,u}div incur UB if division by zero occurs. /// ints: sdiv is UB for int::MIN / -1. /// floats: fdiv is never UB, but may create NaNs or infinities. - pub(crate) fn simd_div(x: T, y: T) -> T; + pub(crate) fn simd_div(lhs: T, rhs: T) -> T; /// urem/srem/frem /// ints and uints: {s,u}rem incur UB if division by zero occurs. /// ints: srem is UB for int::MIN / -1. /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno. - pub(crate) fn simd_rem(x: T, y: T) -> T; + pub(crate) fn simd_rem(lhs: T, rhs: T) -> T; /// shl - pub(crate) fn simd_shl(x: T, y: T) -> T; + /// for (u)ints. poison if rhs >= lhs::BITS + pub(crate) fn simd_shl(lhs: T, rhs: T) -> T; - /// lshr/ashr - pub(crate) fn simd_shr(x: T, y: T) -> T; + /// ints: ashr + /// uints: lshr + /// poison if rhs >= lhs::BITS + pub(crate) fn simd_shr(lhs: T, rhs: T) -> T; /// and pub(crate) fn simd_and(x: T, y: T) -> T; @@ -44,6 +62,9 @@ extern "platform-intrinsic" { pub(crate) fn simd_xor(x: T, y: T) -> T; /// fptoui/fptosi/uitofp/sitofp + /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5 + /// but the truncated value must fit in the target type or the result is poison. + /// use `simd_as` instead for a cast that performs a saturating conversion. pub(crate) fn simd_cast(x: T) -> U; /// follows Rust's `T as U` semantics, including saturating float casts /// which amounts to the same as `simd_cast` for many cases @@ -63,6 +84,7 @@ extern "platform-intrinsic" { pub(crate) fn simd_fmin(x: T, y: T) -> T; pub(crate) fn simd_fmax(x: T, y: T) -> T; + // these return Simd with the same BITS size as the inputs pub(crate) fn simd_eq(x: T, y: T) -> U; pub(crate) fn simd_ne(x: T, y: T) -> U; pub(crate) fn simd_lt(x: T, y: T) -> U; @@ -71,19 +93,31 @@ extern "platform-intrinsic" { pub(crate) fn simd_ge(x: T, y: T) -> U; // shufflevector + // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s pub(crate) fn simd_shuffle(x: T, y: T, idx: U) -> V; + /// llvm.masked.gather + /// like a loop of pointer reads + /// val: vector of values to select if a lane is masked + /// ptr: vector of pointers to read from + /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val) + /// note, the LLVM intrinsic accepts a mask vector of + /// FIXME: review this if/when we fix up our mask story in general? pub(crate) fn simd_gather(val: T, ptr: U, mask: V) -> T; + /// llvm.masked.scatter + /// like gather, but more spicy, as it writes instead of reads pub(crate) fn simd_scatter(val: T, ptr: U, mask: V); // {s,u}add.sat pub(crate) fn simd_saturating_add(x: T, y: T) -> T; // {s,u}sub.sat - pub(crate) fn simd_saturating_sub(x: T, y: T) -> T; + pub(crate) fn simd_saturating_sub(lhs: T, rhs: T) -> T; // reductions + // llvm.vector.reduce.{add,fadd} pub(crate) fn simd_reduce_add_ordered(x: T, y: U) -> U; + // llvm.vector.reduce.{mul,fmul} pub(crate) fn simd_reduce_mul_ordered(x: T, y: U) -> U; #[allow(unused)] pub(crate) fn simd_reduce_all(x: T) -> bool; @@ -100,7 +134,10 @@ extern "platform-intrinsic" { pub(crate) fn simd_bitmask(x: T) -> U; // select - pub(crate) fn simd_select(m: M, a: T, b: T) -> T; + // first argument is a vector of integers, -1 (all bits 1) is "true" + // logically equivalent to (yes & m) | (no & (m^-1), + // but you can use it on floats. + pub(crate) fn simd_select(m: M, yes: T, no: T) -> T; #[allow(unused)] - pub(crate) fn simd_select_bitmask(m: M, a: T, b: T) -> T; + pub(crate) fn simd_select_bitmask(m: M, yes: T, no: T) -> T; } diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 41f64e972d9..91ae34c05e0 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -3,6 +3,7 @@ const_fn_trait_bound, convert_float_to_int, decl_macro, + intra_doc_pointers, platform_intrinsics, repr_simd, simd_ffi, diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index f1724cbc263..556bc2cc1fe 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -19,6 +19,11 @@ macro_rules! implement { /// * Not be NaN /// * Not be infinite /// * Be representable in the return type, after truncating off its fractional part + /// + /// If these requirements are infeasible or costly, consider using the safe function [cast], + /// which saturates on conversion. + /// + /// [cast]: Simd::cast #[inline] pub unsafe fn to_int_unchecked(self) -> Simd where diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index 8d521057fbd..3acf07260e1 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -11,6 +11,7 @@ where /// For each lane in the mask, choose the corresponding lane from `true_values` if /// that lane mask is true, and `false_values` if that lane mask is false. /// + /// # Examples /// ``` /// # #![feature(portable_simd)] /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask}; @@ -31,6 +32,8 @@ where where U: SimdElement, { + // Safety: The mask has been cast to a vector of integers, + // and the operands to select between are vectors of the same type and length. unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) } } @@ -39,6 +42,7 @@ where /// For each lane in the mask, choose the corresponding lane from `true_values` if /// that lane mask is true, and `false_values` if that lane mask is false. /// + /// # Examples /// ``` /// # #![feature(portable_simd)] /// # #[cfg(feature = "std")] use core_simd::Mask; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index e452fa8bfc8..ff1b2c756ad 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -44,6 +44,47 @@ use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount}; /// /// [`Wrapping`]: core::num::Wrapping /// +/// # Layout +/// `Simd` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment. +/// `[T; N]` is aligned to `T`, but `Simd` will have an alignment based on both `T` and `N`. +/// It is thus sound to [`transmute`] `Simd` to `[T; N]`, and will typically optimize to zero cost, +/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide. +/// +/// # ABI "Features" +/// Due to Rust's safety guarantees, `Simd` is currently passed to and from functions via memory, not SIMD registers, +/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd` or return it. +/// The need for this may be corrected in the future. +/// +/// # Safe SIMD with Unsafe Rust +/// +/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code. +/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from. +/// In particular, the layout of `Simd` may be similar to `[T; N]`, and may allow some transmutations, +/// but references to `[T; N]` are not interchangeable with those to `Simd`. +/// Thus, when using `unsafe` Rust to read and write `Simd` through [raw pointers], it is a good idea to first try with +/// [`read_unaligned`] and [`write_unaligned`]. This is because: +/// - [`read`] and [`write`] require full alignment (in this case, `Simd`'s alignment) +/// - the likely source for reading or destination for writing `Simd` is [`[T]`](slice) and similar types, aligned to `T` +/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior** +/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization +/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime +/// +/// By imposing less obligations, unaligned functions are less likely to make the program unsound, +/// and may be just as fast as stricter alternatives. +/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd]`, +/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail. +/// If these are not sufficient, then it is most ideal to design data structures to be already aligned +/// to the `Simd` you wish to use before using `unsafe` Rust to read or write. +/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first, +/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`]. +/// +/// [`transmute`]: core::mem::transmute +/// [raw pointers]: pointer +/// [`read_unaligned`]: pointer::read_unaligned +/// [`write_unaligned`]: pointer::write_unaligned +/// [`read`]: pointer::read +/// [`write`]: pointer::write +/// [as_simd]: slice::as_simd #[repr(simd)] pub struct Simd([T; LANES]) where @@ -133,6 +174,7 @@ where #[inline] #[cfg(not(bootstrap))] pub fn cast(self) -> Simd { + // Safety: The input argument is a vector of a known SIMD type. unsafe { intrinsics::simd_as(self) } } From 842ac87747c4a6f8002ada6bab04d97320d206fc Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 13 Jan 2022 21:20:17 -0500 Subject: [PATCH 040/161] Use bitmask trait --- crates/core_simd/src/masks.rs | 22 ++----- crates/core_simd/src/masks/bitmask.rs | 12 +--- crates/core_simd/src/masks/full_masks.rs | 35 ++--------- crates/core_simd/src/masks/to_bitmask.rs | 78 ++++++++++++++++++++++++ crates/core_simd/tests/masks.rs | 6 +- 5 files changed, 93 insertions(+), 60 deletions(-) create mode 100644 crates/core_simd/src/masks/to_bitmask.rs diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index ae1fef53da8..22514728ffa 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -12,8 +12,10 @@ )] mod mask_impl; -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +mod to_bitmask; +pub use to_bitmask::ToBitMask; + +use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::cmp::Ordering; use core::{fmt, mem}; @@ -216,22 +218,6 @@ where } } - /// Convert this mask to a bitmask, with one bit set per lane. - #[cfg(feature = "generic_const_exprs")] - #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { - self.0.to_bitmask() - } - - /// Convert a bitmask to a mask. - #[cfg(feature = "generic_const_exprs")] - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask(bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { - Self(mask_impl::Mask::from_bitmask(bitmask)) - } - /// Returns true if any lane is set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index b4217dc87ba..f20f83ecb38 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -115,20 +115,14 @@ where unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { - // Safety: these are the same type and we are laundering the generic + pub unsafe fn to_bitmask_intrinsic(self) -> U { unsafe { core::mem::transmute_copy(&self.0) } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask(bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { - // Safety: these are the same type and we are laundering the generic - Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData) + pub unsafe fn from_bitmask_intrinsic(bitmask: U) -> Self { + unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) } } #[inline] diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index e5bb784bb91..b20b0a4b708 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -109,41 +109,16 @@ where unsafe { Mask(intrinsics::simd_cast(self.0)) } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { - unsafe { - let mut bitmask: [u8; LaneCount::::BITMASK_LEN] = - intrinsics::simd_bitmask(self.0); - - // There is a bug where LLVM appears to implement this operation with the wrong - // bit order. - // TODO fix this in a better way - if cfg!(target_endian = "big") { - for x in bitmask.as_mut() { - *x = x.reverse_bits(); - } - } - - bitmask - } + pub unsafe fn to_bitmask_intrinsic(self) -> U { + // Safety: caller must only return bitmask types + unsafe { intrinsics::simd_bitmask(self.0) } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask(mut bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { + pub unsafe fn from_bitmask_intrinsic(bitmask: U) -> Self { + // Safety: caller must only pass bitmask types unsafe { - // There is a bug where LLVM appears to implement this operation with the wrong - // bit order. - // TODO fix this in a better way - if cfg!(target_endian = "big") { - for x in bitmask.as_mut() { - *x = x.reverse_bits(); - } - } - Self::from_int_unchecked(intrinsics::simd_select_bitmask( bitmask, Self::splat(true).to_int(), diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs new file mode 100644 index 00000000000..3a9f89f19eb --- /dev/null +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -0,0 +1,78 @@ +use super::{mask_impl, Mask, MaskElement}; + +/// Converts masks to and from bitmasks. +/// +/// In a bitmask, each bit represents if the corresponding lane in the mask is set. +pub trait ToBitMask { + /// Converts a mask to a bitmask. + fn to_bitmask(self) -> BitMask; + + /// Converts a bitmask to a mask. + fn from_bitmask(bitmask: BitMask) -> Self; +} + +macro_rules! impl_integer_intrinsic { + { $(unsafe impl ToBitMask<$int:ty> for Mask<_, $lanes:literal>)* } => { + $( + impl ToBitMask<$int> for Mask { + fn to_bitmask(self) -> $int { + unsafe { self.0.to_bitmask_intrinsic() } + } + + fn from_bitmask(bitmask: $int) -> Self { + unsafe { Self(mask_impl::Mask::from_bitmask_intrinsic(bitmask)) } + } + } + )* + } +} + +impl_integer_intrinsic! { + unsafe impl ToBitMask for Mask<_, 8> + unsafe impl ToBitMask for Mask<_, 16> + unsafe impl ToBitMask for Mask<_, 32> + unsafe impl ToBitMask for Mask<_, 64> +} + +macro_rules! impl_integer_via { + { $(impl ToBitMask<$int:ty, via $via:ty> for Mask<_, $lanes:literal>)* } => { + $( + impl ToBitMask<$int> for Mask { + fn to_bitmask(self) -> $int { + let bitmask: $via = self.to_bitmask(); + bitmask as _ + } + + fn from_bitmask(bitmask: $int) -> Self { + Self::from_bitmask(bitmask as $via) + } + } + )* + } +} + +impl_integer_via! { + impl ToBitMask for Mask<_, 8> + impl ToBitMask for Mask<_, 8> + impl ToBitMask for Mask<_, 8> + + impl ToBitMask for Mask<_, 16> + impl ToBitMask for Mask<_, 16> + + impl ToBitMask for Mask<_, 32> +} + +#[cfg(target_pointer_width = "32")] +impl_integer_via! { + impl ToBitMask for Mask<_, 8> + impl ToBitMask for Mask<_, 16> + impl ToBitMask for Mask<_, 32> +} + +#[cfg(target_pointer_width = "64")] +impl_integer_via! { + impl ToBitMask for Mask<_, 8> + impl ToBitMask for Mask<_, 16> + impl ToBitMask for Mask<_, 32> + impl ToBitMask for Mask<_, 64> +} diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 6a8ecd33a73..965c0fa2635 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -68,16 +68,16 @@ macro_rules! test_mask_api { assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask); } - #[cfg(feature = "generic_const_exprs")] #[test] fn roundtrip_bitmask_conversion() { + use core_simd::ToBitMask; let values = [ true, false, false, true, false, false, true, false, true, true, false, false, false, false, false, true, ]; let mask = core_simd::Mask::<$type, 16>::from_array(values); - let bitmask = mask.to_bitmask(); - assert_eq!(bitmask, [0b01001001, 0b10000011]); + let bitmask: u16 = mask.to_bitmask(); + assert_eq!(bitmask, 0b1000001101001001); assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask); } } From 11c3eefa3594055192612d0d6f844e764dcbda15 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 6 Feb 2022 03:25:27 +0000 Subject: [PATCH 041/161] Manually implement for supported lanes --- crates/core_simd/src/masks/bitmask.rs | 5 +- crates/core_simd/src/masks/full_masks.rs | 6 +- crates/core_simd/src/masks/to_bitmask.rs | 88 +++++++++--------------- crates/core_simd/tests/masks.rs | 2 +- 4 files changed, 40 insertions(+), 61 deletions(-) diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index f20f83ecb38..e27b2689606 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -116,12 +116,13 @@ where } #[inline] - pub unsafe fn to_bitmask_intrinsic(self) -> U { + pub unsafe fn to_bitmask_integer(self) -> U { unsafe { core::mem::transmute_copy(&self.0) } } + // Safety: U must be the integer with the exact number of bits required to hold the bitmask for #[inline] - pub unsafe fn from_bitmask_intrinsic(bitmask: U) -> Self { + pub unsafe fn from_bitmask_integer(bitmask: U) -> Self { unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) } } diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index b20b0a4b708..90af486a887 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -110,13 +110,15 @@ where } #[inline] - pub unsafe fn to_bitmask_intrinsic(self) -> U { + pub unsafe fn to_bitmask_integer(self) -> U { // Safety: caller must only return bitmask types unsafe { intrinsics::simd_bitmask(self.0) } } + // Safety: U must be the integer with the exact number of bits required to hold the bitmask for + // this mask #[inline] - pub unsafe fn from_bitmask_intrinsic(bitmask: U) -> Self { + pub unsafe fn from_bitmask_integer(bitmask: U) -> Self { // Safety: caller must only pass bitmask types unsafe { Self::from_int_unchecked(intrinsics::simd_select_bitmask( diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 3a9f89f19eb..86143f2331f 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -1,26 +1,45 @@ use super::{mask_impl, Mask, MaskElement}; -/// Converts masks to and from bitmasks. +/// Converts masks to and from integer bitmasks. /// -/// In a bitmask, each bit represents if the corresponding lane in the mask is set. -pub trait ToBitMask { +/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB. +pub trait ToBitMask { + /// The integer bitmask type. + type BitMask; + /// Converts a mask to a bitmask. - fn to_bitmask(self) -> BitMask; + fn to_bitmask(self) -> Self::BitMask; /// Converts a bitmask to a mask. - fn from_bitmask(bitmask: BitMask) -> Self; + fn from_bitmask(bitmask: Self::BitMask) -> Self; +} + +/// Converts masks to and from byte array bitmasks. +/// +/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. +pub trait ToBitMaskArray { + /// The length of the bitmask array. + const BYTES: usize; + + /// Converts a mask to a bitmask. + fn to_bitmask_array(self) -> [u8; Self::BYTES]; + + /// Converts a bitmask to a mask. + fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self; } macro_rules! impl_integer_intrinsic { - { $(unsafe impl ToBitMask<$int:ty> for Mask<_, $lanes:literal>)* } => { + { $(unsafe impl ToBitMask for Mask<_, $lanes:literal>)* } => { $( - impl ToBitMask<$int> for Mask { + impl ToBitMask for Mask { + type BitMask = $int; + fn to_bitmask(self) -> $int { - unsafe { self.0.to_bitmask_intrinsic() } + unsafe { self.0.to_bitmask_integer() } } fn from_bitmask(bitmask: $int) -> Self { - unsafe { Self(mask_impl::Mask::from_bitmask_intrinsic(bitmask)) } + unsafe { Self(mask_impl::Mask::from_bitmask_integer(bitmask)) } } } )* @@ -28,51 +47,8 @@ macro_rules! impl_integer_intrinsic { } impl_integer_intrinsic! { - unsafe impl ToBitMask for Mask<_, 8> - unsafe impl ToBitMask for Mask<_, 16> - unsafe impl ToBitMask for Mask<_, 32> - unsafe impl ToBitMask for Mask<_, 64> -} - -macro_rules! impl_integer_via { - { $(impl ToBitMask<$int:ty, via $via:ty> for Mask<_, $lanes:literal>)* } => { - $( - impl ToBitMask<$int> for Mask { - fn to_bitmask(self) -> $int { - let bitmask: $via = self.to_bitmask(); - bitmask as _ - } - - fn from_bitmask(bitmask: $int) -> Self { - Self::from_bitmask(bitmask as $via) - } - } - )* - } -} - -impl_integer_via! { - impl ToBitMask for Mask<_, 8> - impl ToBitMask for Mask<_, 8> - impl ToBitMask for Mask<_, 8> - - impl ToBitMask for Mask<_, 16> - impl ToBitMask for Mask<_, 16> - - impl ToBitMask for Mask<_, 32> -} - -#[cfg(target_pointer_width = "32")] -impl_integer_via! { - impl ToBitMask for Mask<_, 8> - impl ToBitMask for Mask<_, 16> - impl ToBitMask for Mask<_, 32> -} - -#[cfg(target_pointer_width = "64")] -impl_integer_via! { - impl ToBitMask for Mask<_, 8> - impl ToBitMask for Mask<_, 16> - impl ToBitMask for Mask<_, 32> - impl ToBitMask for Mask<_, 64> + unsafe impl ToBitMask for Mask<_, 8> + unsafe impl ToBitMask for Mask<_, 16> + unsafe impl ToBitMask for Mask<_, 32> + unsafe impl ToBitMask for Mask<_, 64> } diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 965c0fa2635..3aec36ca7b7 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -76,7 +76,7 @@ macro_rules! test_mask_api { true, true, false, false, false, false, false, true, ]; let mask = core_simd::Mask::<$type, 16>::from_array(values); - let bitmask: u16 = mask.to_bitmask(); + let bitmask = mask.to_bitmask(); assert_eq!(bitmask, 0b1000001101001001); assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask); } From 20fa4b76235afb6a2ad543781a10a14e8013b143 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 9 Feb 2022 04:54:05 +0000 Subject: [PATCH 042/161] Make internal mask implementation safe --- crates/core_simd/src/masks/bitmask.rs | 15 +++++-- crates/core_simd/src/masks/full_masks.rs | 51 ++++++++++++++++++++---- crates/core_simd/src/masks/to_bitmask.rs | 39 +++++++++--------- 3 files changed, 75 insertions(+), 30 deletions(-) diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index e27b2689606..7bf2add2036 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -1,7 +1,7 @@ #![allow(unused_imports)] use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; use core::marker::PhantomData; /// A mask where each lane is represented by a single bit. @@ -116,13 +116,20 @@ where } #[inline] - pub unsafe fn to_bitmask_integer(self) -> U { + pub fn to_bitmask_integer(self) -> U + where + super::Mask: ToBitMask, + { + // Safety: these are the same types unsafe { core::mem::transmute_copy(&self.0) } } - // Safety: U must be the integer with the exact number of bits required to hold the bitmask for #[inline] - pub unsafe fn from_bitmask_integer(bitmask: U) -> Self { + pub fn from_bitmask_integer(bitmask: U) -> Self + where + super::Mask: ToBitMask, + { + // Safety: these are the same types unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) } } diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 90af486a887..848997a0792 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -2,7 +2,7 @@ use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; #[repr(transparent)] pub struct Mask(Simd) @@ -66,6 +66,23 @@ where } } +// Used for bitmask bit order workaround +pub(crate) trait ReverseBits { + fn reverse_bits(self) -> Self; +} + +macro_rules! impl_reverse_bits { + { $($int:ty),* } => { + $( + impl ReverseBits for $int { + fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) } + } + )* + } +} + +impl_reverse_bits! { u8, u16, u32, u64 } + impl Mask where T: MaskElement, @@ -110,16 +127,34 @@ where } #[inline] - pub unsafe fn to_bitmask_integer(self) -> U { - // Safety: caller must only return bitmask types - unsafe { intrinsics::simd_bitmask(self.0) } + pub(crate) fn to_bitmask_integer(self) -> U + where + super::Mask: ToBitMask, + { + // Safety: U is required to be the appropriate bitmask type + let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) }; + + // LLVM assumes bit order should match endianness + if cfg!(target_endian = "big") { + bitmask.reverse_bits() + } else { + bitmask + } } - // Safety: U must be the integer with the exact number of bits required to hold the bitmask for - // this mask #[inline] - pub unsafe fn from_bitmask_integer(bitmask: U) -> Self { - // Safety: caller must only pass bitmask types + pub(crate) fn from_bitmask_integer(bitmask: U) -> Self + where + super::Mask: ToBitMask, + { + // LLVM assumes bit order should match endianness + let bitmask = if cfg!(target_endian = "big") { + bitmask.reverse_bits() + } else { + bitmask + }; + + // Safety: U is required to be the appropriate bitmask type unsafe { Self::from_int_unchecked(intrinsics::simd_select_bitmask( bitmask, diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 86143f2331f..1c2037764c1 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -1,9 +1,26 @@ use super::{mask_impl, Mask, MaskElement}; +use crate::simd::{LaneCount, SupportedLaneCount}; + +mod sealed { + pub trait Sealed {} +} +pub use sealed::Sealed; + +impl Sealed for Mask +where + T: MaskElement, + LaneCount: SupportedLaneCount, +{ +} /// Converts masks to and from integer bitmasks. /// /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB. -pub trait ToBitMask { +/// +/// # Safety +/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in +/// the mask. +pub unsafe trait ToBitMask: Sealed { /// The integer bitmask type. type BitMask; @@ -14,32 +31,18 @@ pub trait ToBitMask { fn from_bitmask(bitmask: Self::BitMask) -> Self; } -/// Converts masks to and from byte array bitmasks. -/// -/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. -pub trait ToBitMaskArray { - /// The length of the bitmask array. - const BYTES: usize; - - /// Converts a mask to a bitmask. - fn to_bitmask_array(self) -> [u8; Self::BYTES]; - - /// Converts a bitmask to a mask. - fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self; -} - macro_rules! impl_integer_intrinsic { { $(unsafe impl ToBitMask for Mask<_, $lanes:literal>)* } => { $( - impl ToBitMask for Mask { + unsafe impl ToBitMask for Mask { type BitMask = $int; fn to_bitmask(self) -> $int { - unsafe { self.0.to_bitmask_integer() } + self.0.to_bitmask_integer() } fn from_bitmask(bitmask: $int) -> Self { - unsafe { Self(mask_impl::Mask::from_bitmask_integer(bitmask)) } + Self(mask_impl::Mask::from_bitmask_integer(bitmask)) } } )* From a5789d17bfc8bb794ea6e3f1497b8026d07edd60 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Mon, 28 Feb 2022 19:56:31 -0800 Subject: [PATCH 043/161] rust-lang/portable-simd#248: Remove default features Now that we are thoroughly embedded in libcore, we don't need these on by default. Indeed, their presence may provide confusing results during integration attempts. --- crates/core_simd/Cargo.toml | 2 +- crates/core_simd/tests/round.rs | 1 - crates/std_float/Cargo.toml | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index d2ff5f3b1b1..8877c6df66e 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -9,7 +9,7 @@ categories = ["hardware-support", "no-std"] license = "MIT OR Apache-2.0" [features] -default = ["std", "generic_const_exprs"] +default = [] std = [] generic_const_exprs = [] diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 53732329237..7feb0320a16 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -9,7 +9,6 @@ macro_rules! float_rounding_test { type Scalar = $scalar; type IntScalar = $int_scalar; - #[cfg(feature = "std")] test_helpers::test_lanes! { fn ceil() { test_helpers::test_unary_elementwise( diff --git a/crates/std_float/Cargo.toml b/crates/std_float/Cargo.toml index 82f66b8dcb7..84c69774cbd 100644 --- a/crates/std_float/Cargo.toml +++ b/crates/std_float/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -core_simd = { path = "../core_simd" } +core_simd = { path = "../core_simd", default-features = false } [features] default = ["as_crate"] From 30975615b7c206ee96eddbf84fc0f11ee896f849 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Tue, 1 Mar 2022 16:10:49 -0800 Subject: [PATCH 044/161] rust-lang/portable-simd#250: Add bitmask i{N <8} -> u8 impls ...and copy the notes for why they're legal. --- crates/core_simd/src/intrinsics.rs | 8 ++++++++ crates/core_simd/src/masks/to_bitmask.rs | 3 +++ 2 files changed, 11 insertions(+) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index e150946c705..47edff4a66a 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -130,6 +130,14 @@ extern "platform-intrinsic" { pub(crate) fn simd_reduce_xor(x: T) -> U; // truncate integer vector to bitmask + // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and + // returns either an unsigned integer or array of `u8`. + // Every element in the vector becomes a single bit in the returned bitmask. + // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits. + // The bit order of the result depends on the byte endianness. LSB-first for little + // endian and MSB-first for big endian. + // + // UB if called on a vector with values other than 0 and -1. #[allow(unused)] pub(crate) fn simd_bitmask(x: T) -> U; diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 1c2037764c1..c263f6a4eec 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -50,6 +50,9 @@ macro_rules! impl_integer_intrinsic { } impl_integer_intrinsic! { + unsafe impl ToBitMask for Mask<_, 1> + unsafe impl ToBitMask for Mask<_, 2> + unsafe impl ToBitMask for Mask<_, 4> unsafe impl ToBitMask for Mask<_, 8> unsafe impl ToBitMask for Mask<_, 16> unsafe impl ToBitMask for Mask<_, 32> From 4ddcc006a9d8b4db65d7809881d058bf9b6cd8b0 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 4 Mar 2022 14:14:49 -0500 Subject: [PATCH 045/161] rust-lang/portable-simd#252: extern blocks don't have doc comments --- crates/core_simd/src/intrinsics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 47edff4a66a..ee7408b62de 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -18,9 +18,9 @@ //! //! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths. -/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are -/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner. -/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function. +// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are +// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner. +// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function. extern "platform-intrinsic" { /// add/fadd pub(crate) fn simd_add(x: T, y: T) -> T; From 2e5e0ec3807972cb699a97cd15dee5d6dbc936a5 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 8 Mar 2022 10:18:06 -0800 Subject: [PATCH 046/161] Remove #![feature(const_fn_trait_bound)] --- crates/core_simd/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 91ae34c05e0..42212aef132 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,6 +1,5 @@ #![cfg_attr(not(feature = "std"), no_std)] #![feature( - const_fn_trait_bound, convert_float_to_int, decl_macro, intra_doc_pointers, From 2d13059ae9162997c5b88997bda44443615f12ab Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 8 Mar 2022 10:28:27 -0800 Subject: [PATCH 047/161] Clean up use of cargo feature "std" --- crates/core_simd/src/lib.rs | 2 +- crates/core_simd/src/math.rs | 21 +++++++-------------- crates/core_simd/src/select.rs | 6 ++---- crates/core_simd/src/swizzle.rs | 12 ++++-------- crates/core_simd/src/vector.rs | 24 ++++++++---------------- 5 files changed, 22 insertions(+), 43 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 42212aef132..2632073622e 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "std"), no_std)] +#![no_std] #![feature( convert_float_to_int, decl_macro, diff --git a/crates/core_simd/src/math.rs b/crates/core_simd/src/math.rs index 0b4e40983af..606021e983e 100644 --- a/crates/core_simd/src/math.rs +++ b/crates/core_simd/src/math.rs @@ -10,8 +10,7 @@ macro_rules! impl_uint_arith { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; #[doc = concat!("# use core::", stringify!($ty), "::MAX;")] /// let x = Simd::from_array([2, 1, 0, MAX]); /// let max = Simd::splat(MAX); @@ -31,8 +30,7 @@ macro_rules! impl_uint_arith { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; #[doc = concat!("# use core::", stringify!($ty), "::MAX;")] /// let x = Simd::from_array([2, 1, 0, MAX]); /// let max = Simd::splat(MAX); @@ -58,8 +56,7 @@ macro_rules! impl_int_arith { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] /// let x = Simd::from_array([MIN, 0, 1, MAX]); /// let max = Simd::splat(MAX); @@ -79,8 +76,7 @@ macro_rules! impl_int_arith { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] /// let x = Simd::from_array([MIN, -2, -1, MAX]); /// let max = Simd::splat(MAX); @@ -100,8 +96,7 @@ macro_rules! impl_int_arith { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]); /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0])); @@ -119,8 +114,7 @@ macro_rules! impl_int_arith { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] /// let xs = Simd::from_array([MIN, -2, 0, 3]); /// let unsat = xs.abs(); @@ -142,8 +136,7 @@ macro_rules! impl_int_arith { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] /// let x = Simd::from_array([MIN, -2, 3, MAX]); /// let unsat = -x; diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index 3acf07260e1..065c5987d3f 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -14,8 +14,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask}; - /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask}; + /// # use core::simd::{Simd, Mask}; /// let a = Simd::from_array([0, 1, 2, 3]); /// let b = Simd::from_array([4, 5, 6, 7]); /// let mask = Mask::from_array([true, false, false, true]); @@ -45,8 +44,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Mask; - /// # #[cfg(not(feature = "std"))] use core::simd::Mask; + /// # use core::simd::Mask; /// let a = Mask::::from_array([true, true, false, false]); /// let b = Mask::::from_array([false, false, true, true]); /// let mask = Mask::::from_array([true, false, false, true]); diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 08b2add1166..ef47c4f3a4c 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -12,8 +12,7 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; /// ## One source vector /// ``` /// # #![feature(portable_simd)] -/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle}; -/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle}; +/// # use core::simd::{Simd, simd_swizzle}; /// let v = Simd::::from_array([0., 1., 2., 3.]); /// /// // Keeping the same size @@ -28,8 +27,7 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; /// ## Two source vectors /// ``` /// # #![feature(portable_simd)] -/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle, Which}; -/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle, Which}; +/// # use core::simd::{Simd, simd_swizzle, Which}; /// use Which::*; /// let a = Simd::::from_array([0., 1., 2., 3.]); /// let b = Simd::::from_array([4., 5., 6., 7.]); @@ -273,8 +271,7 @@ where /// /// ``` /// #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; /// let a = Simd::from_array([0, 1, 2, 3]); /// let b = Simd::from_array([4, 5, 6, 7]); /// let (x, y) = a.interleave(b); @@ -337,8 +334,7 @@ where /// /// ``` /// #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; /// let a = Simd::from_array([0, 4, 1, 5]); /// let b = Simd::from_array([2, 6, 3, 7]); /// let (x, y) = a.deinterleave(b); diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index ff1b2c756ad..2405c1429b3 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -156,8 +156,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; /// let floats: Simd = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]); /// let ints = floats.cast::(); /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0])); @@ -184,8 +183,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 5]); /// let alt = Simd::from_array([-5, -4, -3, -2]); @@ -205,8 +203,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 5]); /// @@ -229,8 +226,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask}; - /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask}; + /// # use core::simd::{Simd, Mask}; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 5]); /// let alt = Simd::from_array([-5, -4, -3, -2]); @@ -264,8 +260,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask}; - /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask}; + /// # use core::simd::{Simd, Mask}; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 5]); /// let alt = Simd::from_array([-5, -4, -3, -2]); @@ -300,8 +295,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// # use core::simd::Simd; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); @@ -323,8 +317,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask}; - /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask}; + /// # use core::simd::{Simd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); @@ -358,8 +351,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask}; - /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask}; + /// # use core::simd::{Simd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); From adbd47973e7b5c1973d512c67ab8f373556202dc Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 5 Mar 2022 19:07:41 -0500 Subject: [PATCH 048/161] reduce number of tests being run under Miri --- crates/test_helpers/src/lib.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 7edd6096381..8bf7f5ed3d2 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -77,11 +77,21 @@ impl DefaultStrategy } } +#[cfg(not(miri))] +fn make_runner() -> proptest::test_runner::TestRunner { + Default::default() +} +#[cfg(miri)] +fn make_runner() -> proptest::test_runner::TestRunner { + // Only run a few tests on Miri + proptest::test_runner::TestRunner::new(proptest::test_runner::Config::with_cases(4)) +} + /// Test a function that takes a single value. pub fn test_1( f: &dyn Fn(A) -> proptest::test_runner::TestCaseResult, ) { - let mut runner = proptest::test_runner::TestRunner::default(); + let mut runner = make_runner(); runner.run(&A::default_strategy(), f).unwrap(); } @@ -89,7 +99,7 @@ pub fn test_1( pub fn test_2( f: &dyn Fn(A, B) -> proptest::test_runner::TestCaseResult, ) { - let mut runner = proptest::test_runner::TestRunner::default(); + let mut runner = make_runner(); runner .run(&(A::default_strategy(), B::default_strategy()), |(a, b)| { f(a, b) @@ -105,7 +115,7 @@ pub fn test_3< >( f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult, ) { - let mut runner = proptest::test_runner::TestRunner::default(); + let mut runner = make_runner(); runner .run( &( @@ -361,24 +371,28 @@ macro_rules! test_lanes { #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow fn lanes_8() { implementation::<8>(); } #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow fn lanes_16() { implementation::<16>(); } #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow fn lanes_32() { implementation::<32>(); } #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow fn lanes_64() { implementation::<64>(); } From 4023d77e39c3af4a735b8d0974414ec06d5391c7 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 9 Mar 2022 09:00:46 -0500 Subject: [PATCH 049/161] run Miri on CI (but allowed to fail) --- .github/workflows/ci.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d50dfa1be4c..54d74764790 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,6 +58,23 @@ jobs: - name: Run Clippy run: cargo clippy --all-targets --target ${{ matrix.target }} + miri: + name: "miri" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install Miri + run: | + rustup toolchain install nightly --component miri + rustup override set nightly + cargo miri setup + - name: Test with Miri (failures allowed) + continue-on-error: true + run: | + cargo miri test --test i32_ops + cargo miri test --test f32_ops + cargo miri test --test cast + x86-tests: name: "${{ matrix.target_feature }} on ${{ matrix.target }}" runs-on: ${{ matrix.os }} From 86b9f69622badf863df71993e8b1e3468cf92eeb Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Fri, 11 Mar 2022 00:12:40 +0100 Subject: [PATCH 050/161] rust-lang/portable-simd#260: Add `.min` and `.max` for integers --- crates/core_simd/src/comparisons.rs | 34 ++++++++++++++++++++++++++++ crates/core_simd/tests/i16_ops.rs | 15 ++++++++++++ crates/core_simd/tests/ops_macros.rs | 17 ++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs index d024cf4ddbe..88270a9b7e9 100644 --- a/crates/core_simd/src/comparisons.rs +++ b/crates/core_simd/src/comparisons.rs @@ -66,3 +66,37 @@ where unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } } } + +macro_rules! impl_min_max_vector { + { $type:ty } => { + impl Simd<$type, LANES> + where + LaneCount: SupportedLaneCount, + { + /// Returns the lane-wise minimum with other + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + pub fn min(self, other: Self) -> Self { + self.lanes_gt(other).select(other, self) + } + + /// Returns the lane-wise maximum with other + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + pub fn max(self, other: Self) -> Self { + self.lanes_lt(other).select(other, self) + } + } + } +} + +impl_min_max_vector!(i8); +impl_min_max_vector!(i16); +impl_min_max_vector!(i32); +impl_min_max_vector!(i64); +impl_min_max_vector!(isize); +impl_min_max_vector!(u8); +impl_min_max_vector!(u16); +impl_min_max_vector!(u32); +impl_min_max_vector!(u64); +impl_min_max_vector!(usize); diff --git a/crates/core_simd/tests/i16_ops.rs b/crates/core_simd/tests/i16_ops.rs index f6c5d74fbbc..cd6cadc2d5e 100644 --- a/crates/core_simd/tests/i16_ops.rs +++ b/crates/core_simd/tests/i16_ops.rs @@ -1,5 +1,20 @@ #![feature(portable_simd)] +use core_simd::i16x2; #[macro_use] mod ops_macros; impl_signed_tests! { i16 } + +#[test] +fn max_is_not_lexicographic() { + let a = i16x2::splat(10); + let b = i16x2::from_array([-4, 12]); + assert_eq!(a.max(b), i16x2::from_array([10, 12])); +} + +#[test] +fn min_is_not_lexicographic() { + let a = i16x2::splat(10); + let b = i16x2::from_array([12, -4]); + assert_eq!(a.min(b), i16x2::from_array([10, -4])); +} diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 50f7a4ca170..96da8c1b8dc 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -222,6 +222,23 @@ macro_rules! impl_signed_tests { assert_eq!(a % b, Vector::::splat(0)); } + fn min() { + let a = Vector::::splat(Scalar::MIN); + let b = Vector::::splat(0); + assert_eq!(a.min(b), a); + let a = Vector::::splat(Scalar::MAX); + let b = Vector::::splat(0); + assert_eq!(a.min(b), b); + } + + fn max() { + let a = Vector::::splat(Scalar::MIN); + let b = Vector::::splat(0); + assert_eq!(a.max(b), b); + let a = Vector::::splat(Scalar::MAX); + let b = Vector::::splat(0); + assert_eq!(a.max(b), a); + } } test_helpers::test_lanes_panic! { From c196b8abaafaf360647513c64fac0c8568f9b747 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Fri, 11 Mar 2022 14:49:06 -0800 Subject: [PATCH 051/161] replace horizontal_* with reduce_* --- crates/core_simd/examples/matrix_inversion.rs | 2 +- crates/core_simd/examples/nbody.rs | 8 ++-- crates/core_simd/examples/spectral_norm.rs | 4 +- crates/core_simd/src/reduction.rs | 44 +++++++++---------- crates/core_simd/tests/ops_macros.rs | 44 +++++++++---------- 5 files changed, 51 insertions(+), 51 deletions(-) diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index c51a566deb5..39f530f68f5 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -233,7 +233,7 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option { let det = det.rotate_lanes_right::<2>() + det; let det = det.reverse().rotate_lanes_right::<2>() + det; - if det.horizontal_sum() == 0. { + if det.reduce_sum() == 0. { return None; } // calculate the reciprocal diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index 7b1e6840f64..664a0454bbd 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -107,10 +107,10 @@ mod nbody { let mut e = 0.; for i in 0..N_BODIES { let bi = &bodies[i]; - e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5; + e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5; for bj in bodies.iter().take(N_BODIES).skip(i + 1) { let dx = bi.x - bj.x; - e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt() + e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt() } } e @@ -134,8 +134,8 @@ mod nbody { let mut mag = [0.0; N]; for i in (0..N).step_by(2) { let d2s = f64x2::from_array([ - (r[i] * r[i]).horizontal_sum(), - (r[i + 1] * r[i + 1]).horizontal_sum(), + (r[i] * r[i]).reduce_sum(), + (r[i + 1] * r[i + 1]).reduce_sum(), ]); let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt()); mag[i] = dmags[0]; diff --git a/crates/core_simd/examples/spectral_norm.rs b/crates/core_simd/examples/spectral_norm.rs index c515dad4dea..012182e090b 100644 --- a/crates/core_simd/examples/spectral_norm.rs +++ b/crates/core_simd/examples/spectral_norm.rs @@ -20,7 +20,7 @@ fn mult_av(v: &[f64], out: &mut [f64]) { sum += b / a; j += 2 } - *out = sum.horizontal_sum(); + *out = sum.reduce_sum(); } } @@ -38,7 +38,7 @@ fn mult_atv(v: &[f64], out: &mut [f64]) { sum += b / a; j += 2 } - *out = sum.horizontal_sum(); + *out = sum.reduce_sum(); } } diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index e1cd743e442..3177fd167fc 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -11,30 +11,30 @@ macro_rules! impl_integer_reductions { where LaneCount: SupportedLaneCount, { - /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. + /// Reducing wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. #[inline] - pub fn horizontal_sum(self) -> $scalar { + pub fn reduce_sum(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_add_ordered(self, 0) } } - /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. + /// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. #[inline] - pub fn horizontal_product(self) -> $scalar { + pub fn reduce_product(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_mul_ordered(self, 1) } } - /// Horizontal maximum. Returns the maximum lane in the vector. + /// Reducing maximum. Returns the maximum lane in the vector. #[inline] - pub fn horizontal_max(self) -> $scalar { + pub fn reduce_max(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_max(self) } } - /// Horizontal minimum. Returns the minimum lane in the vector. + /// Reducing minimum. Returns the minimum lane in the vector. #[inline] - pub fn horizontal_min(self) -> $scalar { + pub fn reduce_min(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_min(self) } } @@ -60,9 +60,9 @@ macro_rules! impl_float_reductions { LaneCount: SupportedLaneCount, { - /// Horizontal add. Returns the sum of the lanes of the vector. + /// Reducing add. Returns the sum of the lanes of the vector. #[inline] - pub fn horizontal_sum(self) -> $scalar { + pub fn reduce_sum(self) -> $scalar { // LLVM sum is inaccurate on i586 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().sum() @@ -72,9 +72,9 @@ macro_rules! impl_float_reductions { } } - /// Horizontal multiply. Returns the product of the lanes of the vector. + /// Reducing multiply. Returns the product of the lanes of the vector. #[inline] - pub fn horizontal_product(self) -> $scalar { + pub fn reduce_product(self) -> $scalar { // LLVM product is inaccurate on i586 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().product() @@ -84,22 +84,22 @@ macro_rules! impl_float_reductions { } } - /// Horizontal maximum. Returns the maximum lane in the vector. + /// Reducing maximum. Returns the maximum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] - pub fn horizontal_max(self) -> $scalar { + pub fn reduce_max(self) -> $scalar { // Safety: `self` is a float vector unsafe { simd_reduce_max(self) } } - /// Horizontal minimum. Returns the minimum lane in the vector. + /// Reducing minimum. Returns the minimum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] - pub fn horizontal_min(self) -> $scalar { + pub fn reduce_min(self) -> $scalar { // Safety: `self` is a float vector unsafe { simd_reduce_min(self) } } @@ -116,10 +116,10 @@ where T: SimdElement + BitAnd, LaneCount: SupportedLaneCount, { - /// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of + /// Reducing bitwise "and". Returns the cumulative bitwise "and" across the lanes of /// the vector. #[inline] - pub fn horizontal_and(self) -> T { + pub fn reduce_and(self) -> T { unsafe { simd_reduce_and(self) } } } @@ -130,10 +130,10 @@ where T: SimdElement + BitOr, LaneCount: SupportedLaneCount, { - /// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of + /// Reducing bitwise "or". Returns the cumulative bitwise "or" across the lanes of /// the vector. #[inline] - pub fn horizontal_or(self) -> T { + pub fn reduce_or(self) -> T { unsafe { simd_reduce_or(self) } } } @@ -144,10 +144,10 @@ where T: SimdElement + BitXor, LaneCount: SupportedLaneCount, { - /// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of + /// Reducing bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of /// the vector. #[inline] - pub fn horizontal_xor(self) -> T { + pub fn reduce_xor(self) -> T { unsafe { simd_reduce_xor(self) } } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 96da8c1b8dc..0b3f8979a37 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -94,70 +94,70 @@ macro_rules! impl_binary_checked_op_test { macro_rules! impl_common_integer_tests { { $vector:ident, $scalar:ident } => { test_helpers::test_lanes! { - fn horizontal_sum() { + fn reduce_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_sum(), + $vector::::from_array(x).reduce_sum(), x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), ); Ok(()) }); } - fn horizontal_product() { + fn reduce_product() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_product(), + $vector::::from_array(x).reduce_product(), x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), ); Ok(()) }); } - fn horizontal_and() { + fn reduce_and() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_and(), + $vector::::from_array(x).reduce_and(), x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand), ); Ok(()) }); } - fn horizontal_or() { + fn reduce_or() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_or(), + $vector::::from_array(x).reduce_or(), x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor), ); Ok(()) }); } - fn horizontal_xor() { + fn reduce_xor() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_xor(), + $vector::::from_array(x).reduce_xor(), x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor), ); Ok(()) }); } - fn horizontal_max() { + fn reduce_max() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_max(), + $vector::::from_array(x).reduce_max(), x.iter().copied().max().unwrap(), ); Ok(()) }); } - fn horizontal_min() { + fn reduce_min() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_min(), + $vector::::from_array(x).reduce_min(), x.iter().copied().min().unwrap(), ); Ok(()) @@ -516,29 +516,29 @@ macro_rules! impl_float_tests { }) } - fn horizontal_sum() { + fn reduce_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).horizontal_sum(), + Vector::::from_array(x).reduce_sum(), x.iter().sum(), ); Ok(()) }); } - fn horizontal_product() { + fn reduce_product() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).horizontal_product(), + Vector::::from_array(x).reduce_product(), x.iter().product(), ); Ok(()) }); } - fn horizontal_max() { + fn reduce_max() { test_helpers::test_1(&|x| { - let vmax = Vector::::from_array(x).horizontal_max(); + let vmax = Vector::::from_array(x).reduce_max(); let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max); // 0 and -0 are treated the same if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { @@ -548,9 +548,9 @@ macro_rules! impl_float_tests { }); } - fn horizontal_min() { + fn reduce_min() { test_helpers::test_1(&|x| { - let vmax = Vector::::from_array(x).horizontal_min(); + let vmax = Vector::::from_array(x).reduce_min(); let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min); // 0 and -0 are treated the same if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { From b6e03f58864dde979dbe97b7d983d0ba29b16227 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Fri, 11 Mar 2022 14:54:36 -0800 Subject: [PATCH 052/161] Change beginner's guide to explain Reducing rather than Horizontal. --- beginners-guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beginners-guide.md b/beginners-guide.md index dfd357c4592..75158e5aa85 100644 --- a/beginners-guide.md +++ b/beginners-guide.md @@ -33,7 +33,7 @@ SIMD has a few special vocabulary terms you should know: * **Vertical:** When an operation is "vertical", each lane processes individually without regard to the other lanes in the same vector. For example, a "vertical add" between two vectors would add lane 0 in `a` with lane 0 in `b`, with the total in lane 0 of `out`, and then the same thing for lanes 1, 2, etc. Most SIMD operations are vertical operations, so if your problem is a vertical problem then you can probably solve it with SIMD. -* **Horizontal:** When an operation is "horizontal", the lanes within a single vector interact in some way. A "horizontal add" might add up lane 0 of `a` with lane 1 of `a`, with the total in lane 0 of `out`. +* **Reducing/Reduce:** When an operation is "reducing" (functions named `reduce_*`), the lanes within a single vector are merged using some operation such as addition, returning the merged value as a scalar. For instance, a reducing add would return the sum of all the lanes' values. * **Target Feature:** Rust calls a CPU architecture extension a `target_feature`. Proper SIMD requires various CPU extensions to be enabled (details below). Don't confuse this with `feature`, which is a Cargo crate concept. @@ -83,4 +83,4 @@ Fortunately, most SIMD types have a fairly predictable size. `i32x4` is bit-equi However, this is not the same as alignment. Computer architectures generally prefer aligned accesses, especially when moving data between memory and vector registers, and while some support specialized operations that can bend the rules to help with this, unaligned access is still typically slow, or even undefined behavior. In addition, different architectures can require different alignments when interacting with their native SIMD types. For this reason, any `#[repr(simd)]` type has a non-portable alignment. If it is necessary to directly interact with the alignment of these types, it should be via [`mem::align_of`]. [`mem::transmute`]: https://doc.rust-lang.org/core/mem/fn.transmute.html -[`mem::align_of`]: https://doc.rust-lang.org/core/mem/fn.align_of.html \ No newline at end of file +[`mem::align_of`]: https://doc.rust-lang.org/core/mem/fn.align_of.html From b6ee5293f43a6e9ec1c0d977a4037462bd4d02ca Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 11 Mar 2022 21:14:52 -0500 Subject: [PATCH 053/161] rust-lang/portable-simd#259: remove Miri from CI --- .github/workflows/ci.yml | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 54d74764790..d50dfa1be4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,23 +58,6 @@ jobs: - name: Run Clippy run: cargo clippy --all-targets --target ${{ matrix.target }} - miri: - name: "miri" - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Install Miri - run: | - rustup toolchain install nightly --component miri - rustup override set nightly - cargo miri setup - - name: Test with Miri (failures allowed) - continue-on-error: true - run: | - cargo miri test --test i32_ops - cargo miri test --test f32_ops - cargo miri test --test cast - x86-tests: name: "${{ matrix.target_feature }} on ${{ matrix.target }}" runs-on: ${{ matrix.os }} From 49043f4434b02d0a8a68c87d7672cc2e944a3deb Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 12 Mar 2022 18:32:28 -0500 Subject: [PATCH 054/161] rust-lang/portable-simd#262: also implement clamp for integer vectors * add test from issue rust-lang/portable-simd#253 --- crates/core_simd/src/comparisons.rs | 44 ++++++++++++++++++++-------- crates/core_simd/tests/i16_ops.rs | 12 ++++++++ crates/core_simd/tests/ops_macros.rs | 12 ++++++++ 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs index 88270a9b7e9..7b0d0a6864b 100644 --- a/crates/core_simd/src/comparisons.rs +++ b/crates/core_simd/src/comparisons.rs @@ -67,36 +67,54 @@ where } } -macro_rules! impl_min_max_vector { +macro_rules! impl_ord_methods_vector { { $type:ty } => { impl Simd<$type, LANES> where LaneCount: SupportedLaneCount, { - /// Returns the lane-wise minimum with other + /// Returns the lane-wise minimum with `other`. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] pub fn min(self, other: Self) -> Self { self.lanes_gt(other).select(other, self) } - /// Returns the lane-wise maximum with other + /// Returns the lane-wise maximum with `other`. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] pub fn max(self, other: Self) -> Self { self.lanes_lt(other).select(other, self) } + + /// Restrict each lane to a certain interval. + /// + /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is + /// less than `min`. Otherwise returns `self`. + /// + /// # Panics + /// + /// Panics if `min > max` on any lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + pub fn clamp(self, min: Self, max: Self) -> Self { + assert!( + min.lanes_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + self.max(min).min(max) + } } } } -impl_min_max_vector!(i8); -impl_min_max_vector!(i16); -impl_min_max_vector!(i32); -impl_min_max_vector!(i64); -impl_min_max_vector!(isize); -impl_min_max_vector!(u8); -impl_min_max_vector!(u16); -impl_min_max_vector!(u32); -impl_min_max_vector!(u64); -impl_min_max_vector!(usize); +impl_ord_methods_vector!(i8); +impl_ord_methods_vector!(i16); +impl_ord_methods_vector!(i32); +impl_ord_methods_vector!(i64); +impl_ord_methods_vector!(isize); +impl_ord_methods_vector!(u8); +impl_ord_methods_vector!(u16); +impl_ord_methods_vector!(u32); +impl_ord_methods_vector!(u64); +impl_ord_methods_vector!(usize); diff --git a/crates/core_simd/tests/i16_ops.rs b/crates/core_simd/tests/i16_ops.rs index cd6cadc2d5e..171e5b472fa 100644 --- a/crates/core_simd/tests/i16_ops.rs +++ b/crates/core_simd/tests/i16_ops.rs @@ -18,3 +18,15 @@ fn min_is_not_lexicographic() { let b = i16x2::from_array([12, -4]); assert_eq!(a.min(b), i16x2::from_array([10, -4])); } + +#[test] +fn clamp_is_not_lexicographic() { + let a = i16x2::splat(10); + let lo = i16x2::from_array([-12, -4]); + let up = i16x2::from_array([-4, 12]); + assert_eq!(a.clamp(lo, up), i16x2::from_array([-4, 10])); + + let x = i16x2::from_array([1, 10]); + let y = x.clamp(i16x2::splat(0), i16x2::splat(9)); + assert_eq!(y, i16x2::from_array([1, 9])); +} diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 96da8c1b8dc..bea02750ef2 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -239,6 +239,18 @@ macro_rules! impl_signed_tests { let b = Vector::::splat(0); assert_eq!(a.max(b), a); } + + fn clamp() { + let min = Vector::::splat(Scalar::MIN); + let max = Vector::::splat(Scalar::MAX); + let zero = Vector::::splat(0); + let one = Vector::::splat(1); + let negone = Vector::::splat(-1); + assert_eq!(zero.clamp(min, max), zero); + assert_eq!(zero.clamp(min, one), zero); + assert_eq!(zero.clamp(one, max), one); + assert_eq!(zero.clamp(min, negone), negone); + } } test_helpers::test_lanes_panic! { From 80469c66d650735a0520ae471c5fe5ebaf8971b5 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 13 Mar 2022 19:07:36 +0000 Subject: [PATCH 055/161] Move comparisons to SimdPartialOrd and SimdOrd traits --- crates/core_simd/src/comparisons.rs | 120 --------------- crates/core_simd/src/eq.rs | 73 +++++++++ crates/core_simd/src/masks.rs | 4 +- crates/core_simd/src/mod.rs | 5 +- crates/core_simd/src/ops.rs | 10 +- crates/core_simd/src/ord.rs | 222 +++++++++++++++++++++++++++ crates/core_simd/src/vector.rs | 16 +- crates/core_simd/src/vector/float.rs | 26 ++-- crates/core_simd/src/vector/int.rs | 6 +- crates/core_simd/tests/i16_ops.rs | 27 ---- crates/core_simd/tests/ops_macros.rs | 45 +++--- 11 files changed, 354 insertions(+), 200 deletions(-) delete mode 100644 crates/core_simd/src/comparisons.rs create mode 100644 crates/core_simd/src/eq.rs create mode 100644 crates/core_simd/src/ord.rs diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs deleted file mode 100644 index 7b0d0a6864b..00000000000 --- a/crates/core_simd/src/comparisons.rs +++ /dev/null @@ -1,120 +0,0 @@ -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; - -impl Simd -where - T: SimdElement + PartialEq, - LaneCount: SupportedLaneCount, -{ - /// Test if each lane is equal to the corresponding lane in `other`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn lanes_eq(self, other: Self) -> Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } - } - - /// Test if each lane is not equal to the corresponding lane in `other`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn lanes_ne(self, other: Self) -> Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } - } -} - -impl Simd -where - T: SimdElement + PartialOrd, - LaneCount: SupportedLaneCount, -{ - /// Test if each lane is less than the corresponding lane in `other`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn lanes_lt(self, other: Self) -> Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } - } - - /// Test if each lane is greater than the corresponding lane in `other`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn lanes_gt(self, other: Self) -> Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } - } - - /// Test if each lane is less than or equal to the corresponding lane in `other`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn lanes_le(self, other: Self) -> Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } - } - - /// Test if each lane is greater than or equal to the corresponding lane in `other`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn lanes_ge(self, other: Self) -> Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } - } -} - -macro_rules! impl_ord_methods_vector { - { $type:ty } => { - impl Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Returns the lane-wise minimum with `other`. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn min(self, other: Self) -> Self { - self.lanes_gt(other).select(other, self) - } - - /// Returns the lane-wise maximum with `other`. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn max(self, other: Self) -> Self { - self.lanes_lt(other).select(other, self) - } - - /// Restrict each lane to a certain interval. - /// - /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is - /// less than `min`. Otherwise returns `self`. - /// - /// # Panics - /// - /// Panics if `min > max` on any lane. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn clamp(self, min: Self, max: Self) -> Self { - assert!( - min.lanes_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", - ); - self.max(min).min(max) - } - } - } -} - -impl_ord_methods_vector!(i8); -impl_ord_methods_vector!(i16); -impl_ord_methods_vector!(i32); -impl_ord_methods_vector!(i64); -impl_ord_methods_vector!(isize); -impl_ord_methods_vector!(u8); -impl_ord_methods_vector!(u16); -impl_ord_methods_vector!(u32); -impl_ord_methods_vector!(u64); -impl_ord_methods_vector!(usize); diff --git a/crates/core_simd/src/eq.rs b/crates/core_simd/src/eq.rs new file mode 100644 index 00000000000..c7111f720a8 --- /dev/null +++ b/crates/core_simd/src/eq.rs @@ -0,0 +1,73 @@ +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; + +/// Parallel `PartialEq`. +pub trait SimdPartialEq { + /// The mask type returned by each comparison. + type Mask; + + /// Test if each lane is equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_eq(self, other: Self) -> Self::Mask; + + /// Test if each lane is equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_ne(self, other: Self) -> Self::Mask; +} + +macro_rules! impl_number { + { $($number:ty),* } => { + $( + impl SimdPartialEq for Simd<$number, LANES> + where + LaneCount: SupportedLaneCount, + { + type Mask = Mask<<$number as SimdElement>::Mask, LANES>; + + #[inline] + fn simd_eq(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } + } + + #[inline] + fn simd_ne(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } + } + } + )* + } +} + +impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } + +macro_rules! impl_mask { + { $($integer:ty),* } => { + $( + impl SimdPartialEq for Mask<$integer, LANES> + where + LaneCount: SupportedLaneCount, + { + type Mask = Self; + + #[inline] + fn simd_eq(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_eq(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_ne(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_ne(self.to_int(), other.to_int())) } + } + } + )* + } +} + +impl_mask! { i8, i16, i32, i64, isize } diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index e1cd7930450..d4e57ed90bf 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -15,7 +15,7 @@ mod mask_impl; mod to_bitmask; pub use to_bitmask::ToBitMask; -use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; use core::cmp::Ordering; use core::{fmt, mem}; @@ -56,7 +56,7 @@ macro_rules! impl_element { where LaneCount: SupportedLaneCount, { - (value.lanes_eq(Simd::splat(0)) | value.lanes_eq(Simd::splat(-1))).all() + (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all() } fn eq(self, other: Self) -> bool { self == other } diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 85026265956..42257f4e119 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -9,13 +9,14 @@ pub(crate) mod intrinsics; #[cfg(feature = "generic_const_exprs")] mod to_bytes; -mod comparisons; +mod eq; mod fmt; mod iter; mod lane_count; mod masks; mod math; mod ops; +mod ord; mod round; mod select; mod vector; @@ -25,8 +26,10 @@ mod vendor; pub mod simd { pub(crate) use crate::core_simd::intrinsics; + pub use crate::core_simd::eq::*; pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; + pub use crate::core_simd::ord::*; pub use crate::core_simd::swizzle::*; pub use crate::core_simd::vector::*; } diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 1b35b3e717a..d39b4091df9 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; use core::ops::{Add, Mul}; use core::ops::{BitAnd, BitOr, BitXor}; use core::ops::{Div, Rem, Sub}; @@ -74,7 +74,7 @@ macro_rules! int_divrem_guard { $simd_call:ident }, $int:ident ) => { - if $rhs.lanes_eq(Simd::splat(0)).any() { + if $rhs.simd_eq(Simd::splat(0 as _)).any() { panic!($zero); } else { // Prevent otherwise-UB overflow on the MIN / -1 case. @@ -82,10 +82,10 @@ macro_rules! int_divrem_guard { // This should, at worst, optimize to a few branchless logical ops // Ideally, this entire conditional should evaporate // Fire LLVM and implement those manually if it doesn't get the hint - ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) + ($lhs.simd_eq(Simd::splat(<$int>::MIN)) // type inference can break here, so cut an SInt to size - & $rhs.lanes_eq(Simd::splat(-1i64 as _))) - .select(Simd::splat(1), $rhs) + & $rhs.simd_eq(Simd::splat(-1i64 as _))) + .select(Simd::splat(1 as _), $rhs) } else { // Nice base case to make it easy to const-fold away the other branch. $rhs diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/ord.rs new file mode 100644 index 00000000000..befa4594595 --- /dev/null +++ b/crates/core_simd/src/ord.rs @@ -0,0 +1,222 @@ +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; + +/// Parallel `PartialOrd`. +pub trait SimdPartialOrd { + /// The mask type returned by each comparison. + type Mask; + + /// Test if each lane is less than the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_lt(self, other: Self) -> Self::Mask; + + /// Test if each lane is less than or equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_le(self, other: Self) -> Self::Mask; + + /// Test if each lane is greater than the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_gt(self, other: Self) -> Self::Mask; + + /// Test if each lane is greater than or equal to the corresponding lane in `other`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn simd_ge(self, other: Self) -> Self::Mask; +} + +/// Parallel `Ord`. +pub trait SimdOrd: SimdPartialOrd { + /// Returns the lane-wise maximum with `other`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_max(self, other: Self) -> Self; + + /// Returns the lane-wise minimum with `other`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_min(self, other: Self) -> Self; + + /// Restrict each lane to a certain interval. + /// + /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is + /// less than `min`. Otherwise returns `self`. + /// + /// # Panics + /// + /// Panics if `min > max` on any lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_clamp(self, min: Self, max: Self) -> Self; +} + +macro_rules! impl_integer { + { $($integer:ty),* } => { + $( + impl SimdPartialOrd for Simd<$integer, LANES> + where + LaneCount: SupportedLaneCount, + { + type Mask = Mask<<$integer as SimdElement>::Mask, LANES>; + + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + } + } + + impl SimdOrd for Simd<$integer, LANES> + where + LaneCount: SupportedLaneCount, + { + #[inline] + fn simd_max(self, other: Self) -> Self { + self.simd_lt(other).select(other, self) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + self.simd_gt(other).select(other, self) + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + self.simd_max(min).simd_min(max) + } + } + )* + } +} + +impl_integer! { u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } + +macro_rules! impl_float { + { $($float:ty),* } => { + $( + impl SimdPartialOrd for Simd<$float, LANES> + where + LaneCount: SupportedLaneCount, + { + type Mask = Mask<<$float as SimdElement>::Mask, LANES>; + + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + } + } + )* + } +} + +impl_float! { f32, f64 } + +macro_rules! impl_mask { + { $($integer:ty),* } => { + $( + impl SimdPartialOrd for Mask<$integer, LANES> + where + LaneCount: SupportedLaneCount, + { + type Mask = Self; + + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_lt(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_le(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_gt(self.to_int(), other.to_int())) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Self::from_int_unchecked(intrinsics::simd_ge(self.to_int(), other.to_int())) } + } + } + + impl SimdOrd for Mask<$integer, LANES> + where + LaneCount: SupportedLaneCount, + { + #[inline] + fn simd_max(self, other: Self) -> Self { + self.simd_gt(other).select_mask(other, self) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + self.simd_lt(other).select_mask(other, self) + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + self.simd_max(min).simd_min(max) + } + } + )* + } +} + +impl_mask! { i8, i16, i32, i64, isize } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 2405c1429b3..13e35ecfa49 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -10,7 +10,7 @@ pub use uint::*; pub(crate) mod ptr; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount}; +use crate::simd::{LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount}; /// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. /// @@ -243,7 +243,7 @@ where idxs: Simd, or: Self, ) -> Self { - let enable: Mask = enable & idxs.lanes_lt(Simd::splat(slice.len())); + let enable: Mask = enable & idxs.simd_lt(Simd::splat(slice.len())); // Safety: We have masked-off out-of-bounds lanes. unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) } } @@ -260,13 +260,13 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::{Simd, Mask}; + /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask}; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 5]); /// let alt = Simd::from_array([-5, -4, -3, -2]); /// let enable = Mask::from_array([true, true, true, false]); // Note the final mask lane. /// // If this mask was used to gather, it would be unsound. Let's fix that. - /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len())); + /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); /// /// // We have masked the OOB lane, so it's safe to gather now. /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) }; @@ -317,7 +317,7 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::{Simd, Mask}; + /// # use core_simd::simd::{Simd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); @@ -333,7 +333,7 @@ where enable: Mask, idxs: Simd, ) { - let enable: Mask = enable & idxs.lanes_lt(Simd::splat(slice.len())); + let enable: Mask = enable & idxs.simd_lt(Simd::splat(slice.len())); // Safety: We have masked-off out-of-bounds lanes. unsafe { self.scatter_select_unchecked(slice, enable, idxs) } } @@ -351,13 +351,13 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::{Simd, Mask}; + /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane. /// // If this mask was used to scatter, it would be unsound. Let's fix that. - /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len())); + /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); /// /// // We have masked the OOB lane, so it's safe to scatter now. /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); } diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index fcc7f6d8d1c..ebe4fbcb6fa 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -1,7 +1,7 @@ #![allow(non_camel_case_types)] use crate::simd::intrinsics; -use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SimdPartialOrd, SupportedLaneCount}; /// Implements inherent methods for a float vector containing multiple /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary @@ -74,35 +74,35 @@ macro_rules! impl_float_vector { #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_sign_negative(self) -> Mask<$mask_ty, LANES> { let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1); - sign_bits.lanes_gt(Simd::splat(0)) + sign_bits.simd_gt(Simd::splat(0)) } /// Returns true for each lane if its value is `NaN`. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_nan(self) -> Mask<$mask_ty, LANES> { - self.lanes_ne(self) + self.simd_ne(self) } /// Returns true for each lane if its value is positive infinity or negative infinity. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_infinite(self) -> Mask<$mask_ty, LANES> { - self.abs().lanes_eq(Self::splat(<$type>::INFINITY)) + self.abs().simd_eq(Self::splat(<$type>::INFINITY)) } /// Returns true for each lane if its value is neither infinite nor `NaN`. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_finite(self) -> Mask<$mask_ty, LANES> { - self.abs().lanes_lt(Self::splat(<$type>::INFINITY)) + self.abs().simd_lt(Self::splat(<$type>::INFINITY)) } /// Returns true for each lane if its value is subnormal. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> { - self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0)) + self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).simd_eq(Simd::splat(0)) } /// Returns true for each lane if its value is neither zero, infinite, @@ -110,7 +110,7 @@ macro_rules! impl_float_vector { #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_normal(self) -> Mask<$mask_ty, LANES> { - !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite()) + !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite()) } /// Replaces each lane with a number that represents its sign. @@ -140,7 +140,7 @@ macro_rules! impl_float_vector { /// If one of the values is `NAN`, then the other value is returned. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn min(self, other: Self) -> Self { + pub fn simd_min(self, other: Self) -> Self { unsafe { intrinsics::simd_fmin(self, other) } } @@ -149,7 +149,7 @@ macro_rules! impl_float_vector { /// If one of the values is `NAN`, then the other value is returned. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn max(self, other: Self) -> Self { + pub fn simd_max(self, other: Self) -> Self { unsafe { intrinsics::simd_fmax(self, other) } } @@ -160,14 +160,14 @@ macro_rules! impl_float_vector { /// than `min`. Otherwise returns the lane in `self`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn clamp(self, min: Self, max: Self) -> Self { + pub fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( - min.lanes_le(max).all(), + min.simd_le(max).all(), "each lane in `min` must be less than or equal to the corresponding lane in `max`", ); let mut x = self; - x = x.lanes_lt(min).select(min, x); - x = x.lanes_gt(max).select(max, x); + x = x.simd_lt(min).select(min, x); + x = x.simd_gt(max).select(max, x); x } } diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs index 3eac02a2761..85fabdc4e00 100644 --- a/crates/core_simd/src/vector/int.rs +++ b/crates/core_simd/src/vector/int.rs @@ -1,6 +1,6 @@ #![allow(non_camel_case_types)] -use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Mask, Simd, SimdPartialOrd, SupportedLaneCount}; /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`. macro_rules! impl_integer_vector { @@ -12,13 +12,13 @@ macro_rules! impl_integer_vector { /// Returns true for each positive lane and false if it is zero or negative. #[inline] pub fn is_positive(self) -> Mask<$type, LANES> { - self.lanes_gt(Self::splat(0)) + self.simd_gt(Self::splat(0)) } /// Returns true for each negative lane and false if it is zero or positive. #[inline] pub fn is_negative(self) -> Mask<$type, LANES> { - self.lanes_lt(Self::splat(0)) + self.simd_lt(Self::splat(0)) } /// Returns numbers representing the sign of each lane. diff --git a/crates/core_simd/tests/i16_ops.rs b/crates/core_simd/tests/i16_ops.rs index 171e5b472fa..f6c5d74fbbc 100644 --- a/crates/core_simd/tests/i16_ops.rs +++ b/crates/core_simd/tests/i16_ops.rs @@ -1,32 +1,5 @@ #![feature(portable_simd)] -use core_simd::i16x2; #[macro_use] mod ops_macros; impl_signed_tests! { i16 } - -#[test] -fn max_is_not_lexicographic() { - let a = i16x2::splat(10); - let b = i16x2::from_array([-4, 12]); - assert_eq!(a.max(b), i16x2::from_array([10, 12])); -} - -#[test] -fn min_is_not_lexicographic() { - let a = i16x2::splat(10); - let b = i16x2::from_array([12, -4]); - assert_eq!(a.min(b), i16x2::from_array([10, -4])); -} - -#[test] -fn clamp_is_not_lexicographic() { - let a = i16x2::splat(10); - let lo = i16x2::from_array([-12, -4]); - let up = i16x2::from_array([-4, 12]); - assert_eq!(a.clamp(lo, up), i16x2::from_array([-4, 10])); - - let x = i16x2::from_array([1, 10]); - let y = x.clamp(i16x2::splat(0), i16x2::splat(9)); - assert_eq!(y, i16x2::from_array([1, 9])); -} diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 7c9b17673ef..f8389c910c6 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -222,34 +222,37 @@ macro_rules! impl_signed_tests { assert_eq!(a % b, Vector::::splat(0)); } - fn min() { + fn simd_min() { + use core_simd::simd::SimdOrd; let a = Vector::::splat(Scalar::MIN); let b = Vector::::splat(0); - assert_eq!(a.min(b), a); + assert_eq!(a.simd_min(b), a); let a = Vector::::splat(Scalar::MAX); let b = Vector::::splat(0); - assert_eq!(a.min(b), b); + assert_eq!(a.simd_min(b), b); } - fn max() { + fn simd_max() { + use core_simd::simd::SimdOrd; let a = Vector::::splat(Scalar::MIN); let b = Vector::::splat(0); - assert_eq!(a.max(b), b); + assert_eq!(a.simd_max(b), b); let a = Vector::::splat(Scalar::MAX); let b = Vector::::splat(0); - assert_eq!(a.max(b), a); + assert_eq!(a.simd_max(b), a); } - fn clamp() { + fn simd_clamp() { + use core_simd::simd::SimdOrd; let min = Vector::::splat(Scalar::MIN); let max = Vector::::splat(Scalar::MAX); let zero = Vector::::splat(0); let one = Vector::::splat(1); let negone = Vector::::splat(-1); - assert_eq!(zero.clamp(min, max), zero); - assert_eq!(zero.clamp(min, one), zero); - assert_eq!(zero.clamp(one, max), one); - assert_eq!(zero.clamp(min, negone), negone); + assert_eq!(zero.simd_clamp(min, max), zero); + assert_eq!(zero.simd_clamp(min, one), zero); + assert_eq!(zero.simd_clamp(one, max), one); + assert_eq!(zero.simd_clamp(min, negone), negone); } } @@ -458,10 +461,10 @@ macro_rules! impl_float_tests { ) } - fn min() { + fn simd_min() { // Regular conditions (both values aren't zero) test_helpers::test_binary_elementwise( - &Vector::::min, + &Vector::::simd_min, &Scalar::min, // Reject the case where both values are zero with different signs &|a, b| { @@ -477,14 +480,14 @@ macro_rules! impl_float_tests { // Special case where both values are zero let p_zero = Vector::::splat(0.); let n_zero = Vector::::splat(-0.); - assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.)); - assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.)); + assert!(p_zero.simd_min(n_zero).to_array().iter().all(|x| *x == 0.)); + assert!(n_zero.simd_min(p_zero).to_array().iter().all(|x| *x == 0.)); } - fn max() { + fn simd_max() { // Regular conditions (both values aren't zero) test_helpers::test_binary_elementwise( - &Vector::::max, + &Vector::::simd_max, &Scalar::max, // Reject the case where both values are zero with different signs &|a, b| { @@ -500,11 +503,11 @@ macro_rules! impl_float_tests { // Special case where both values are zero let p_zero = Vector::::splat(0.); let n_zero = Vector::::splat(-0.); - assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.)); - assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.)); + assert!(p_zero.simd_max(n_zero).to_array().iter().all(|x| *x == 0.)); + assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.)); } - fn clamp() { + fn simd_clamp() { test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { for (min, max) in min.iter_mut().zip(max.iter_mut()) { if max < min { @@ -522,7 +525,7 @@ macro_rules! impl_float_tests { for i in 0..LANES { result_scalar[i] = value[i].clamp(min[i], max[i]); } - let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array(); + let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array(); test_helpers::prop_assert_biteq!(result_scalar, result_vector); Ok(()) }) From 2a02c4d9cbee120915535f6fc8379849e10e8373 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 13 Mar 2022 19:57:06 +0000 Subject: [PATCH 056/161] Create SimdFloat trait --- crates/core_simd/src/vector/float.rs | 106 ++++++++++++++++++--------- crates/core_simd/tests/ops_macros.rs | 3 + 2 files changed, 73 insertions(+), 36 deletions(-) diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index ebe4fbcb6fa..f422d161178 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -134,42 +134,6 @@ macro_rules! impl_float_vector { let magnitude = self.to_bits() & !Self::splat(-0.).to_bits(); Self::from_bits(sign_bit | magnitude) } - - /// Returns the minimum of each lane. - /// - /// If one of the values is `NAN`, then the other value is returned. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn simd_min(self, other: Self) -> Self { - unsafe { intrinsics::simd_fmin(self, other) } - } - - /// Returns the maximum of each lane. - /// - /// If one of the values is `NAN`, then the other value is returned. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn simd_max(self, other: Self) -> Self { - unsafe { intrinsics::simd_fmax(self, other) } - } - - /// Restrict each lane to a certain interval unless it is NaN. - /// - /// For each lane in `self`, returns the corresponding lane in `max` if the lane is - /// greater than `max`, and the corresponding lane in `min` if the lane is less - /// than `min`. Otherwise returns the lane in `self`. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn simd_clamp(self, min: Self, max: Self) -> Self { - assert!( - min.simd_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", - ); - let mut x = self; - x = x.simd_lt(min).select(min, x); - x = x.simd_gt(max).select(max, x); - x - } } }; } @@ -197,3 +161,73 @@ pub type f64x4 = Simd; /// Vector of eight `f64` values pub type f64x8 = Simd; + +mod sealed { + pub trait Sealed {} +} +use sealed::Sealed; + +/// SIMD operations on vectors of floating point numbers. +pub trait SimdFloat: Sized + Sealed { + /// Returns the minimum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_min(self, other: Self) -> Self; + + /// Returns the maximum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_max(self, other: Self) -> Self; + + /// Restrict each lane to a certain interval unless it is NaN. + /// + /// For each lane in `self`, returns the corresponding lane in `max` if the lane is + /// greater than `max`, and the corresponding lane in `min` if the lane is less + /// than `min`. Otherwise returns the lane in `self`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_clamp(self, min: Self, max: Self) -> Self; +} + +macro_rules! impl_simd_float { + { $($float:ty),* } => { + $( + impl Sealed for Simd<$float, LANES> + where + LaneCount: SupportedLaneCount, + { + } + + impl SimdFloat for Simd<$float, LANES> + where + LaneCount: SupportedLaneCount, + { + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_min(self, other: Self) -> Self { + unsafe { intrinsics::simd_fmin(self, other) } + } + + #[inline] + fn simd_max(self, other: Self) -> Self { + unsafe { intrinsics::simd_fmax(self, other) } + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + let mut x = self; + x = x.simd_lt(min).select(min, x); + x = x.simd_gt(max).select(max, x); + x + } + } + )* + } +} + +impl_simd_float! { f32, f64 } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index f8389c910c6..47fe49b0982 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -462,6 +462,7 @@ macro_rules! impl_float_tests { } fn simd_min() { + use core_simd::simd::SimdFloat; // Regular conditions (both values aren't zero) test_helpers::test_binary_elementwise( &Vector::::simd_min, @@ -485,6 +486,7 @@ macro_rules! impl_float_tests { } fn simd_max() { + use core_simd::simd::SimdFloat; // Regular conditions (both values aren't zero) test_helpers::test_binary_elementwise( &Vector::::simd_max, @@ -508,6 +510,7 @@ macro_rules! impl_float_tests { } fn simd_clamp() { + use core_simd::simd::SimdFloat; test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { for (min, max) in min.iter_mut().zip(max.iter_mut()) { if max < min { From 60486e08ed58698c7b6c2b5cd62a9fbd9080bc2f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 15 Mar 2022 00:17:14 +0000 Subject: [PATCH 057/161] SimdPartialOrd implies SimdPartialEq --- crates/core_simd/src/ord.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/ord.rs index befa4594595..9a87bc2e344 100644 --- a/crates/core_simd/src/ord.rs +++ b/crates/core_simd/src/ord.rs @@ -1,10 +1,7 @@ -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; /// Parallel `PartialOrd`. -pub trait SimdPartialOrd { - /// The mask type returned by each comparison. - type Mask; - +pub trait SimdPartialOrd: SimdPartialEq { /// Test if each lane is less than the corresponding lane in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_lt(self, other: Self) -> Self::Mask; @@ -51,8 +48,6 @@ macro_rules! impl_integer { where LaneCount: SupportedLaneCount, { - type Mask = Mask<<$integer as SimdElement>::Mask, LANES>; - #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison @@ -118,8 +113,6 @@ macro_rules! impl_float { where LaneCount: SupportedLaneCount, { - type Mask = Mask<<$float as SimdElement>::Mask, LANES>; - #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison @@ -161,8 +154,6 @@ macro_rules! impl_mask { where LaneCount: SupportedLaneCount, { - type Mask = Self; - #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison From 50fbfa4ebab8c8754d625163f4fba8a1ca0ab676 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 16 Mar 2022 20:27:32 -0400 Subject: [PATCH 058/161] add bitmask roundtrip test for vector length below 8 --- crates/core_simd/tests/masks.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 3aec36ca7b7..1c587630a36 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -80,6 +80,18 @@ macro_rules! test_mask_api { assert_eq!(bitmask, 0b1000001101001001); assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask); } + + #[test] + fn roundtrip_bitmask_conversion_short() { + use core_simd::ToBitMask; + let values = [ + false, false, false, true, + ]; + let mask = core_simd::Mask::<$type, 4>::from_array(values); + let bitmask = mask.to_bitmask(); + assert_eq!(bitmask, 0b1000); + assert_eq!(core_simd::Mask::<$type, 4>::from_bitmask(bitmask), mask); + } } } } From 60555b57f1a61962e0df48cd303fbefff2e61ec3 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 17 Mar 2022 10:42:39 -0400 Subject: [PATCH 059/161] fix big-endian bitmasks smaller than a byte --- crates/core_simd/src/masks/full_masks.rs | 20 ++++++++++++++++---- crates/core_simd/tests/masks.rs | 7 +++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 8bbdf637de8..efa688b128f 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -68,14 +68,26 @@ where // Used for bitmask bit order workaround pub(crate) trait ReverseBits { - fn reverse_bits(self) -> Self; + // Reverse the least significant `n` bits of `self`. + // (Remaining bits must be 0.) + fn reverse_bits(self, n: usize) -> Self; } macro_rules! impl_reverse_bits { { $($int:ty),* } => { $( impl ReverseBits for $int { - fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) } + #[inline(always)] + fn reverse_bits(self, n: usize) -> Self { + let rev = <$int>::reverse_bits(self); + let bitsize = core::mem::size_of::<$int>() * 8; + if n < bitsize { + // Shift things back to the right + rev >> (bitsize - n) + } else { + rev + } + } } )* } @@ -137,7 +149,7 @@ where // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { - bitmask.reverse_bits() + bitmask.reverse_bits(LANES) } else { bitmask } @@ -150,7 +162,7 @@ where { // LLVM assumes bit order should match endianness let bitmask = if cfg!(target_endian = "big") { - bitmask.reverse_bits() + bitmask.reverse_bits(LANES) } else { bitmask }; diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 1c587630a36..d10c6610f50 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -84,6 +84,7 @@ macro_rules! test_mask_api { #[test] fn roundtrip_bitmask_conversion_short() { use core_simd::ToBitMask; + let values = [ false, false, false, true, ]; @@ -91,6 +92,12 @@ macro_rules! test_mask_api { let bitmask = mask.to_bitmask(); assert_eq!(bitmask, 0b1000); assert_eq!(core_simd::Mask::<$type, 4>::from_bitmask(bitmask), mask); + + let values = [true, false]; + let mask = core_simd::Mask::<$type, 2>::from_array(values); + let bitmask = mask.to_bitmask(); + assert_eq!(bitmask, 0b01); + assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask); } } } From 35e16a1e0652a0c2f351be8a23c18514bdd35f34 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 20 Mar 2022 19:17:33 -0400 Subject: [PATCH 060/161] rust-lang/portable-simd#266: reduce Miri test count in round.rs --- crates/core_simd/tests/round.rs | 2 +- crates/test_helpers/src/lib.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 7feb0320a16..484fd5bf47d 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -59,7 +59,7 @@ macro_rules! float_rounding_test { const MAX_REPRESENTABLE_VALUE: Scalar = (ALL_MANTISSA_BITS << (core::mem::size_of::() * 8 - ::MANTISSA_DIGITS as usize - 1)) as Scalar; - let mut runner = proptest::test_runner::TestRunner::default(); + let mut runner = test_helpers::make_runner(); runner.run( &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE), |x| { diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 8bf7f5ed3d2..141bee18a9a 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -78,11 +78,11 @@ impl DefaultStrategy } #[cfg(not(miri))] -fn make_runner() -> proptest::test_runner::TestRunner { +pub fn make_runner() -> proptest::test_runner::TestRunner { Default::default() } #[cfg(miri)] -fn make_runner() -> proptest::test_runner::TestRunner { +pub fn make_runner() -> proptest::test_runner::TestRunner { // Only run a few tests on Miri proptest::test_runner::TestRunner::new(proptest::test_runner::Config::with_cases(4)) } From 4e14017f3d9e0379304dd8d34772786631310104 Mon Sep 17 00:00:00 2001 From: Sean Stangl Date: Sun, 3 Apr 2022 15:20:00 -0600 Subject: [PATCH 061/161] Standardize documentation for SIMD vector and mask types --- crates/core_simd/src/masks.rs | 36 ++++++++++++------------- crates/core_simd/src/vector/float.rs | 14 +++++----- crates/core_simd/src/vector/int.rs | 40 ++++++++++++++-------------- crates/core_simd/src/vector/uint.rs | 40 ++++++++++++++-------------- 4 files changed, 65 insertions(+), 65 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index e1cd7930450..a56a154b437 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -507,58 +507,58 @@ where } } -/// Vector of eight 8-bit masks +/// A 64-bit SIMD vector mask for eight elements of 8 bits. pub type mask8x8 = Mask; -/// Vector of 16 8-bit masks +/// A 128-bit SIMD vector mask for 16 elements of 8 bits. pub type mask8x16 = Mask; -/// Vector of 32 8-bit masks +/// A 256-bit SIMD vector mask for 32 elements of 8 bits. pub type mask8x32 = Mask; -/// Vector of 16 8-bit masks +/// A 512-bit SIMD vector mask for 64 elements of 8 bits. pub type mask8x64 = Mask; -/// Vector of four 16-bit masks +/// A 64-bit SIMD vector mask for four elements of 16 bits. pub type mask16x4 = Mask; -/// Vector of eight 16-bit masks +/// A 128-bit SIMD vector mask for eight elements of 16 bits. pub type mask16x8 = Mask; -/// Vector of 16 16-bit masks +/// A 256-bit SIMD vector mask for 16 elements of 16 bits. pub type mask16x16 = Mask; -/// Vector of 32 16-bit masks +/// A 512-bit SIMD vector mask for 32 elements of 16 bits. pub type mask16x32 = Mask; -/// Vector of two 32-bit masks +/// A 64-bit SIMD vector mask for two elements of 32 bits. pub type mask32x2 = Mask; -/// Vector of four 32-bit masks +/// A 128-bit SIMD vector mask for four elements of 32 bits. pub type mask32x4 = Mask; -/// Vector of eight 32-bit masks +/// A 256-bit SIMD vector mask for eight elements of 32 bits. pub type mask32x8 = Mask; -/// Vector of 16 32-bit masks +/// A 512-bit SIMD vector mask for 16 elements of 32 bits. pub type mask32x16 = Mask; -/// Vector of two 64-bit masks +/// A 128-bit SIMD vector mask for two elements of 64 bits. pub type mask64x2 = Mask; -/// Vector of four 64-bit masks +/// A 256-bit SIMD vector mask for four elements of 64 bits. pub type mask64x4 = Mask; -/// Vector of eight 64-bit masks +/// A 512-bit SIMD vector mask for eight elements of 64 bits. pub type mask64x8 = Mask; -/// Vector of two pointer-width masks +/// A SIMD vector mask for two elements of pointer width. pub type masksizex2 = Mask; -/// Vector of four pointer-width masks +/// A SIMD vector mask for four elements of pointer width. pub type masksizex4 = Mask; -/// Vector of eight pointer-width masks +/// A SIMD vector mask for eight elements of pointer width. pub type masksizex8 = Mask; macro_rules! impl_from { diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index fcc7f6d8d1c..f7985b64710 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -177,23 +177,23 @@ macro_rules! impl_float_vector { impl_float_vector! { f32, u32, i32 } impl_float_vector! { f64, u64, i64 } -/// Vector of two `f32` values +/// A 64-bit SIMD vector with two elements of type `f32`. pub type f32x2 = Simd; -/// Vector of four `f32` values +/// A 128-bit SIMD vector with four elements of type `f32`. pub type f32x4 = Simd; -/// Vector of eight `f32` values +/// A 256-bit SIMD vector with eight elements of type `f32`. pub type f32x8 = Simd; -/// Vector of 16 `f32` values +/// A 512-bit SIMD vector with 16 elements of type `f32`. pub type f32x16 = Simd; -/// Vector of two `f64` values +/// A 128-bit SIMD vector with two elements of type `f64`. pub type f64x2 = Simd; -/// Vector of four `f64` values +/// A 256-bit SIMD vector with four elements of type `f64`. pub type f64x4 = Simd; -/// Vector of eight `f64` values +/// A 512-bit SIMD vector with eight elements of type `f64`. pub type f64x8 = Simd; diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs index 3eac02a2761..eec483212b3 100644 --- a/crates/core_simd/src/vector/int.rs +++ b/crates/core_simd/src/vector/int.rs @@ -42,62 +42,62 @@ impl_integer_vector! { i32 } impl_integer_vector! { i64 } impl_integer_vector! { i8 } -/// Vector of two `isize` values +/// A SIMD vector with two elements of type `isize`. pub type isizex2 = Simd; -/// Vector of four `isize` values +/// A SIMD vector with four elements of type `isize`. pub type isizex4 = Simd; -/// Vector of eight `isize` values +/// A SIMD vector with eight elements of type `isize`. pub type isizex8 = Simd; -/// Vector of two `i16` values +/// A 32-bit SIMD vector with two elements of type `i16`. pub type i16x2 = Simd; -/// Vector of four `i16` values +/// A 64-bit SIMD vector with four elements of type `i16`. pub type i16x4 = Simd; -/// Vector of eight `i16` values +/// A 128-bit SIMD vector with eight elements of type `i16`. pub type i16x8 = Simd; -/// Vector of 16 `i16` values +/// A 256-bit SIMD vector with 16 elements of type `i16`. pub type i16x16 = Simd; -/// Vector of 32 `i16` values +/// A 512-bit SIMD vector with 32 elements of type `i16`. pub type i16x32 = Simd; -/// Vector of two `i32` values +/// A 64-bit SIMD vector with two elements of type `i32`. pub type i32x2 = Simd; -/// Vector of four `i32` values +/// A 128-bit SIMD vector with four elements of type `i32`. pub type i32x4 = Simd; -/// Vector of eight `i32` values +/// A 256-bit SIMD vector with eight elements of type `i32`. pub type i32x8 = Simd; -/// Vector of 16 `i32` values +/// A 512-bit SIMD vector with 16 elements of type `i32`. pub type i32x16 = Simd; -/// Vector of two `i64` values +/// A 128-bit SIMD vector with two elements of type `i64`. pub type i64x2 = Simd; -/// Vector of four `i64` values +/// A 256-bit SIMD vector with four elements of type `i64`. pub type i64x4 = Simd; -/// Vector of eight `i64` values +/// A 512-bit SIMD vector with eight elements of type `i64`. pub type i64x8 = Simd; -/// Vector of four `i8` values +/// A 32-bit SIMD vector with four elements of type `i8`. pub type i8x4 = Simd; -/// Vector of eight `i8` values +/// A 64-bit SIMD vector with eight elements of type `i8`. pub type i8x8 = Simd; -/// Vector of 16 `i8` values +/// A 128-bit SIMD vector with 16 elements of type `i8`. pub type i8x16 = Simd; -/// Vector of 32 `i8` values +/// A 256-bit SIMD vector with 32 elements of type `i8`. pub type i8x32 = Simd; -/// Vector of 64 `i8` values +/// A 512-bit SIMD vector with 64 elements of type `i8`. pub type i8x64 = Simd; diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs index ed91fc3640e..b4a69c44363 100644 --- a/crates/core_simd/src/vector/uint.rs +++ b/crates/core_simd/src/vector/uint.rs @@ -2,62 +2,62 @@ use crate::simd::Simd; -/// Vector of two `usize` values +/// A SIMD vector with two elements of type `usize`. pub type usizex2 = Simd; -/// Vector of four `usize` values +/// A SIMD vector with four elements of type `usize`. pub type usizex4 = Simd; -/// Vector of eight `usize` values +/// A SIMD vector with eight elements of type `usize`. pub type usizex8 = Simd; -/// Vector of two `u16` values +/// A 32-bit SIMD vector with two elements of type `u16`. pub type u16x2 = Simd; -/// Vector of four `u16` values +/// A 64-bit SIMD vector with four elements of type `u16`. pub type u16x4 = Simd; -/// Vector of eight `u16` values +/// A 128-bit SIMD vector with eight elements of type `u16`. pub type u16x8 = Simd; -/// Vector of 16 `u16` values +/// A 256-bit SIMD vector with 16 elements of type `u16`. pub type u16x16 = Simd; -/// Vector of 32 `u16` values +/// A 512-bit SIMD vector with 32 elements of type `u16`. pub type u16x32 = Simd; -/// Vector of two `u32` values +/// A 64-bit SIMD vector with two elements of type `u32`. pub type u32x2 = Simd; -/// Vector of four `u32` values +/// A 128-bit SIMD vector with four elements of type `u32`. pub type u32x4 = Simd; -/// Vector of eight `u32` values +/// A 256-bit SIMD vector with eight elements of type `u32`. pub type u32x8 = Simd; -/// Vector of 16 `u32` values +/// A 512-bit SIMD vector with 16 elements of type `u32`. pub type u32x16 = Simd; -/// Vector of two `u64` values +/// A 128-bit SIMD vector with two elements of type `u64`. pub type u64x2 = Simd; -/// Vector of four `u64` values +/// A 256-bit SIMD vector with four elements of type `u64`. pub type u64x4 = Simd; -/// Vector of eight `u64` values +/// A 512-bit SIMD vector with eight elements of type `u64`. pub type u64x8 = Simd; -/// Vector of four `u8` values +/// A 32-bit SIMD vector with four elements of type `u8`. pub type u8x4 = Simd; -/// Vector of eight `u8` values +/// A 64-bit SIMD vector with eight elements of type `u8`. pub type u8x8 = Simd; -/// Vector of 16 `u8` values +/// A 128-bit SIMD vector with 16 elements of type `u8`. pub type u8x16 = Simd; -/// Vector of 32 `u8` values +/// A 256-bit SIMD vector with 32 elements of type `u8`. pub type u8x32 = Simd; -/// Vector of 64 `u8` values +/// A 512-bit SIMD vector with 64 elements of type `u8`. pub type u8x64 = Simd; From c73f1fbdde8bd1c257f2f683a595a5c434f32a2d Mon Sep 17 00:00:00 2001 From: Sean Stangl Date: Sun, 3 Apr 2022 15:59:22 -0600 Subject: [PATCH 062/161] Update crates/core_simd/src/masks.rs Co-authored-by: Jacob Lifshay --- crates/core_simd/src/masks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index a56a154b437..ba4158b3cfc 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -507,7 +507,7 @@ where } } -/// A 64-bit SIMD vector mask for eight elements of 8 bits. +/// A SIMD vector mask of eight elements, for operating with SIMD vectors with 8 bit elements. pub type mask8x8 = Mask; /// A 128-bit SIMD vector mask for 16 elements of 8 bits. From 21b070ce4372c5141cb48ebd6fc6ca45030d703c Mon Sep 17 00:00:00 2001 From: Sean Stangl Date: Sun, 3 Apr 2022 16:15:27 -0600 Subject: [PATCH 063/161] Correct the Mask docs, and get them to fit in search results --- crates/core_simd/src/masks.rs | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index ba4158b3cfc..d0021c91142 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -507,58 +507,58 @@ where } } -/// A SIMD vector mask of eight elements, for operating with SIMD vectors with 8 bit elements. +/// A mask for SIMD vectors with eight elements of 8 bits. pub type mask8x8 = Mask; -/// A 128-bit SIMD vector mask for 16 elements of 8 bits. +/// A mask for SIMD vectors with 16 elements of 8 bits. pub type mask8x16 = Mask; -/// A 256-bit SIMD vector mask for 32 elements of 8 bits. +/// A mask for SIMD vectors with 32 elements of 8 bits. pub type mask8x32 = Mask; -/// A 512-bit SIMD vector mask for 64 elements of 8 bits. +/// A mask for SIMD vectors with 64 elements of 8 bits. pub type mask8x64 = Mask; -/// A 64-bit SIMD vector mask for four elements of 16 bits. +/// A mask for SIMD vectors with four elements of 16 bits. pub type mask16x4 = Mask; -/// A 128-bit SIMD vector mask for eight elements of 16 bits. +/// A mask for SIMD vectors with eight elements of 16 bits. pub type mask16x8 = Mask; -/// A 256-bit SIMD vector mask for 16 elements of 16 bits. +/// A mask for SIMD vectors with 16 elements of 16 bits. pub type mask16x16 = Mask; -/// A 512-bit SIMD vector mask for 32 elements of 16 bits. +/// A mask for SIMD vectors with 32 elements of 16 bits. pub type mask16x32 = Mask; -/// A 64-bit SIMD vector mask for two elements of 32 bits. +/// A mask for SIMD vectors with two elements of 32 bits. pub type mask32x2 = Mask; -/// A 128-bit SIMD vector mask for four elements of 32 bits. +/// A mask for SIMD vectors with four elements of 32 bits. pub type mask32x4 = Mask; -/// A 256-bit SIMD vector mask for eight elements of 32 bits. +/// A mask for SIMD vectors with eight elements of 32 bits. pub type mask32x8 = Mask; -/// A 512-bit SIMD vector mask for 16 elements of 32 bits. +/// A mask for SIMD vectors with 16 elements of 32 bits. pub type mask32x16 = Mask; -/// A 128-bit SIMD vector mask for two elements of 64 bits. +/// A mask for SIMD vectors with two elements of 64 bits. pub type mask64x2 = Mask; -/// A 256-bit SIMD vector mask for four elements of 64 bits. +/// A mask for SIMD vectors with four elements of 64 bits. pub type mask64x4 = Mask; -/// A 512-bit SIMD vector mask for eight elements of 64 bits. +/// A mask for SIMD vectors with eight elements of 64 bits. pub type mask64x8 = Mask; -/// A SIMD vector mask for two elements of pointer width. +/// A mask for SIMD vectors with two elements of pointer width. pub type masksizex2 = Mask; -/// A SIMD vector mask for four elements of pointer width. +/// A mask for SIMD vectors with four elements of pointer width. pub type masksizex4 = Mask; -/// A SIMD vector mask for eight elements of pointer width. +/// A mask for SIMD vectors with eight elements of pointer width. pub type masksizex8 = Mask; macro_rules! impl_from { From 8cd9325e206bf362071b71a812849a3b463bcbcc Mon Sep 17 00:00:00 2001 From: Sean Stangl Date: Sun, 3 Apr 2022 16:22:31 -0600 Subject: [PATCH 064/161] Add a *small* blurb to Mask that is likely unobjectionable --- crates/core_simd/src/masks.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index d0021c91142..b97be97f7e6 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -77,6 +77,8 @@ impl_element! { isize } /// A SIMD vector mask for `LANES` elements of width specified by `Element`. /// +/// Masks represent boolean inclusion/exclusion on a per-lane basis. +/// /// The layout of this type is unspecified. #[repr(transparent)] pub struct Mask(mask_impl::Mask) From 7136841cbd22ba66dbd49331f276bdb16401ec11 Mon Sep 17 00:00:00 2001 From: Sean Stangl Date: Mon, 11 Apr 2022 00:05:14 -0600 Subject: [PATCH 065/161] rust-lang/portable-simd#274: Use SIMD equality for PartialEq on SIMD vectors --- crates/core_simd/src/vector.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 13e35ecfa49..d032f5459fd 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -429,8 +429,26 @@ where { #[inline] fn eq(&self, other: &Self) -> bool { - // TODO use SIMD equality - self.to_array() == other.to_array() + // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. + let mask = unsafe { + let tfvec: Simd<::Mask, LANES> = intrinsics::simd_eq(*self, *other); + Mask::from_int_unchecked(tfvec) + }; + + // Two vectors are equal if all lanes tested true for vertical equality. + mask.all() + } + + #[inline] + fn ne(&self, other: &Self) -> bool { + // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. + let mask = unsafe { + let tfvec: Simd<::Mask, LANES> = intrinsics::simd_ne(*self, *other); + Mask::from_int_unchecked(tfvec) + }; + + // Two vectors are non-equal if any lane tested true for vertical non-equality. + mask.any() } } From fcc5ca0f93e751f2e748c7654c78a541c0392575 Mon Sep 17 00:00:00 2001 From: Sean Stangl Date: Mon, 11 Apr 2022 00:08:34 -0600 Subject: [PATCH 066/161] rust-lang/portable-simd#273: Documentation update for reduce functions, swizzle Working through giving example documentation to every Simd function. The major change in this patch is using doc macros to generate type-specific examples for each function, using a visually-apparent type constructor. This makes it feel nicer to have twelve separate documentation entries for reduce_product(), for example. --- crates/core_simd/src/lane_count.rs | 8 +- crates/core_simd/src/reduction.rs | 120 ++++++++++++++++++++++++++++- crates/core_simd/src/swizzle.rs | 52 +++++++------ crates/core_simd/src/vector.rs | 48 +++++++++++- 4 files changed, 195 insertions(+), 33 deletions(-) diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs index 3b316f12b3e..63723e2ec13 100644 --- a/crates/core_simd/src/lane_count.rs +++ b/crates/core_simd/src/lane_count.rs @@ -3,7 +3,7 @@ mod sealed { } use sealed::Sealed; -/// A type representing a vector lane count. +/// Specifies the number of lanes in a SIMD vector as a type. pub struct LaneCount; impl LaneCount { @@ -11,7 +11,11 @@ impl LaneCount { pub const BITMASK_LEN: usize = (LANES + 7) / 8; } -/// Helper trait for vector lane counts. +/// Statically guarantees that a lane count is marked as supported. +/// +/// This trait is *sealed*: the list of implementors below is total. +/// Users do not have the ability to mark additional `LaneCount` values as supported. +/// Only SIMD vectors with supported lane counts are constructable. pub trait SupportedLaneCount: Sealed { #[doc(hidden)] type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>; diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index 3177fd167fc..642ab319cdd 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -12,13 +12,41 @@ macro_rules! impl_integer_reductions { LaneCount: SupportedLaneCount, { /// Reducing wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] + #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] + /// assert_eq!(v.reduce_sum(), 10); + /// + /// // SIMD integer addition is always wrapping + #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 1, 0, 0]);")] + #[doc = concat!("assert_eq!(v.reduce_sum(), ", stringify!($scalar), "::MIN);")] + /// ``` #[inline] pub fn reduce_sum(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_add_ordered(self, 0) } } - /// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. + /// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] + #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] + /// assert_eq!(v.reduce_product(), 24); + /// + /// // SIMD integer multiplication is always wrapping + #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 2, 1, 1]);")] + #[doc = concat!("assert!(v.reduce_product() < ", stringify!($scalar), "::MAX);")] + /// ``` #[inline] pub fn reduce_product(self) -> $scalar { // Safety: `self` is an integer vector @@ -26,6 +54,16 @@ macro_rules! impl_integer_reductions { } /// Reducing maximum. Returns the maximum lane in the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] + #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] + /// assert_eq!(v.reduce_max(), 4); + /// ``` #[inline] pub fn reduce_max(self) -> $scalar { // Safety: `self` is an integer vector @@ -33,6 +71,16 @@ macro_rules! impl_integer_reductions { } /// Reducing minimum. Returns the minimum lane in the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] + #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] + /// assert_eq!(v.reduce_min(), 1); + /// ``` #[inline] pub fn reduce_min(self) -> $scalar { // Safety: `self` is an integer vector @@ -61,6 +109,16 @@ macro_rules! impl_float_reductions { { /// Reducing add. Returns the sum of the lanes of the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")] + /// assert_eq!(v.reduce_sum(), 3.); + /// ``` #[inline] pub fn reduce_sum(self) -> $scalar { // LLVM sum is inaccurate on i586 @@ -73,6 +131,16 @@ macro_rules! impl_float_reductions { } /// Reducing multiply. Returns the product of the lanes of the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 4.]);")] + /// assert_eq!(v.reduce_product(), 12.); + /// ``` #[inline] pub fn reduce_product(self) -> $scalar { // LLVM product is inaccurate on i586 @@ -87,7 +155,30 @@ macro_rules! impl_float_reductions { /// Reducing maximum. Returns the maximum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may - /// return either. This function will not return `NaN` unless all lanes are `NaN`. + /// return either. + /// + /// This function will not return `NaN` unless all lanes are `NaN`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")] + /// assert_eq!(v.reduce_max(), 2.); + /// + /// // NaN values are skipped... + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")] + /// assert_eq!(v.reduce_max(), 1.); + /// + /// // ...unless all values are NaN + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([", + stringify!($scalar), "::NAN, ", + stringify!($scalar), "::NAN]);" + )] + /// assert!(v.reduce_max().is_nan()); + /// ``` #[inline] pub fn reduce_max(self) -> $scalar { // Safety: `self` is a float vector @@ -97,7 +188,30 @@ macro_rules! impl_float_reductions { /// Reducing minimum. Returns the minimum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may - /// return either. This function will not return `NaN` unless all lanes are `NaN`. + /// return either. + /// + /// This function will not return `NaN` unless all lanes are `NaN`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 7.]);")] + /// assert_eq!(v.reduce_min(), 3.); + /// + /// // NaN values are skipped... + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")] + /// assert_eq!(v.reduce_min(), 1.); + /// + /// // ...unless all values are NaN + #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([", + stringify!($scalar), "::NAN, ", + stringify!($scalar), "::NAN]);" + )] + /// assert!(v.reduce_min().is_nan()); + /// ``` #[inline] pub fn reduce_min(self) -> $scalar { // Safety: `self` is a float vector diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index ef47c4f3a4c..22999d24950 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -1,44 +1,46 @@ use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; -/// Constructs a new vector by selecting values from the lanes of the source vector or vectors to use. +/// Constructs a new SIMD vector by copying elements from selected lanes in other vectors. /// -/// When swizzling one vector, the indices of the result vector are indicated by a `const` array -/// of `usize`, like [`Swizzle`]. -/// When swizzling two vectors, the indices are indicated by a `const` array of [`Which`], like -/// [`Swizzle2`]. +/// When swizzling one vector, lanes are selected by a `const` array of `usize`, +/// like [`Swizzle`]. +/// +/// When swizzling two vectors, lanes are selected by a `const` array of [`Which`], +/// like [`Swizzle2`]. /// /// # Examples -/// ## One source vector +/// +/// With a single SIMD vector, the const array specifies lane indices in that vector: /// ``` /// # #![feature(portable_simd)] -/// # use core::simd::{Simd, simd_swizzle}; -/// let v = Simd::::from_array([0., 1., 2., 3.]); +/// # use core::simd::{u32x2, u32x4, simd_swizzle}; +/// let v = u32x4::from_array([10, 11, 12, 13]); /// /// // Keeping the same size -/// let r = simd_swizzle!(v, [3, 0, 1, 2]); -/// assert_eq!(r.to_array(), [3., 0., 1., 2.]); +/// let r: u32x4 = simd_swizzle!(v, [3, 0, 1, 2]); +/// assert_eq!(r.to_array(), [13, 10, 11, 12]); /// /// // Changing the number of lanes -/// let r = simd_swizzle!(v, [3, 1]); -/// assert_eq!(r.to_array(), [3., 1.]); +/// let r: u32x2 = simd_swizzle!(v, [3, 1]); +/// assert_eq!(r.to_array(), [13, 11]); /// ``` /// -/// ## Two source vectors +/// With two input SIMD vectors, the const array uses `Which` to specify the source of each index: /// ``` /// # #![feature(portable_simd)] -/// # use core::simd::{Simd, simd_swizzle, Which}; -/// use Which::*; -/// let a = Simd::::from_array([0., 1., 2., 3.]); -/// let b = Simd::::from_array([4., 5., 6., 7.]); +/// # use core::simd::{u32x2, u32x4, simd_swizzle, Which}; +/// use Which::{First, Second}; +/// let a = u32x4::from_array([0, 1, 2, 3]); +/// let b = u32x4::from_array([4, 5, 6, 7]); /// /// // Keeping the same size -/// let r = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]); -/// assert_eq!(r.to_array(), [0., 1., 6., 7.]); +/// let r: u32x4 = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]); +/// assert_eq!(r.to_array(), [0, 1, 6, 7]); /// /// // Changing the number of lanes -/// let r = simd_swizzle!(a, b, [First(0), Second(0)]); -/// assert_eq!(r.to_array(), [0., 4.]); +/// let r: u32x2 = simd_swizzle!(a, b, [First(0), Second(0)]); +/// assert_eq!(r.to_array(), [0, 4]); /// ``` #[allow(unused_macros)] pub macro simd_swizzle { @@ -68,12 +70,14 @@ pub macro simd_swizzle { } } -/// An index into one of two vectors. +/// Specifies a lane index into one of two SIMD vectors. +/// +/// This is an input type for [Swizzle2] and helper macros like [simd_swizzle]. #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Which { - /// Indexes the first vector. + /// Index of a lane in the first input SIMD vector. First(usize), - /// Indexes the second vector. + /// Index of a lane in the second input SIMD vector. Second(usize), } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index d032f5459fd..f7989ee762a 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -99,17 +99,44 @@ where /// Number of lanes in this vector. pub const LANES: usize = LANES; - /// Get the number of lanes in this vector. + /// Returns the number of lanes in this SIMD vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::u32x4; + /// let v = u32x4::splat(0); + /// assert_eq!(v.lanes(), 4); + /// ``` pub const fn lanes(&self) -> usize { LANES } - /// Construct a SIMD vector by setting all lanes to the given value. + /// Constructs a new SIMD vector with all lanes set to the given value. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::u32x4; + /// let v = u32x4::splat(8); + /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); + /// ``` pub const fn splat(value: T) -> Self { Self([value; LANES]) } /// Returns an array reference containing the entire SIMD vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::{Simd, u64x4}; + /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]); + /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); + /// ``` pub const fn as_array(&self) -> &[T; LANES] { &self.0 } @@ -129,9 +156,21 @@ where self.0 } - /// Converts a slice to a SIMD vector containing `slice[..LANES]` + /// Converts a slice to a SIMD vector containing `slice[..LANES]`. + /// /// # Panics - /// `from_slice` will panic if the slice's `len` is less than the vector's `Simd::LANES`. + /// + /// Panics if the slice's length is less than the vector's `Simd::LANES`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::{Simd, u32x4}; + /// let source = vec![1, 2, 3, 4, 5, 6]; + /// let v = u32x4::from_slice(&source); + /// assert_eq!(v.as_array(), &[1, 2, 3, 4]); + /// ``` #[must_use] pub const fn from_slice(slice: &[T]) -> Self { assert!( @@ -148,6 +187,7 @@ where } /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type. + /// /// This follows the semantics of Rust's `as` conversion for casting /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`), /// and from floats to integers (truncating, or saturating at the limits) for each lane, From 9718639d61b32d4efd2fac330ab1058732b3b758 Mon Sep 17 00:00:00 2001 From: Andrew Straw Date: Mon, 11 Apr 2022 21:17:44 +0200 Subject: [PATCH 067/161] rust-lang/portable-simd#276: Mention slice methods as_simd() and as_simd_mut() This links to a practical suggestion for how to solve the issues brought up in this section. --- beginners-guide.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/beginners-guide.md b/beginners-guide.md index 75158e5aa85..17ade06ae80 100644 --- a/beginners-guide.md +++ b/beginners-guide.md @@ -82,5 +82,10 @@ Fortunately, most SIMD types have a fairly predictable size. `i32x4` is bit-equi However, this is not the same as alignment. Computer architectures generally prefer aligned accesses, especially when moving data between memory and vector registers, and while some support specialized operations that can bend the rules to help with this, unaligned access is still typically slow, or even undefined behavior. In addition, different architectures can require different alignments when interacting with their native SIMD types. For this reason, any `#[repr(simd)]` type has a non-portable alignment. If it is necessary to directly interact with the alignment of these types, it should be via [`mem::align_of`]. +When working with slices, data correctly aligned for SIMD can be acquired using the [`as_simd`] and [`as_simd_mut`] methods of the slice primitive. + [`mem::transmute`]: https://doc.rust-lang.org/core/mem/fn.transmute.html [`mem::align_of`]: https://doc.rust-lang.org/core/mem/fn.align_of.html +[`as_simd`]: https://doc.rust-lang.org/nightly/std/primitive.slice.html#method.as_simd +[`as_simd_mut`]: https://doc.rust-lang.org/nightly/std/primitive.slice.html#method.as_simd_mut + From 376957ad8cac70a85c08a26a147eb534d5cee380 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 11 Apr 2022 01:38:07 -0400 Subject: [PATCH 068/161] Move integer functions to traits. --- crates/core_simd/src/elements.rs | 5 + crates/core_simd/src/elements/int.rs | 273 ++++++++++++++++++++++++++ crates/core_simd/src/elements/uint.rs | 128 ++++++++++++ crates/core_simd/src/math.rs | 156 --------------- crates/core_simd/src/mod.rs | 3 +- crates/core_simd/src/reduction.rs | 144 +------------- crates/core_simd/src/vector/int.rs | 42 +--- crates/core_simd/tests/ops_macros.rs | 2 + 8 files changed, 413 insertions(+), 340 deletions(-) create mode 100644 crates/core_simd/src/elements.rs create mode 100644 crates/core_simd/src/elements/int.rs create mode 100644 crates/core_simd/src/elements/uint.rs delete mode 100644 crates/core_simd/src/math.rs diff --git a/crates/core_simd/src/elements.rs b/crates/core_simd/src/elements.rs new file mode 100644 index 00000000000..0fb1f5b9fe9 --- /dev/null +++ b/crates/core_simd/src/elements.rs @@ -0,0 +1,5 @@ +mod int; +mod uint; + +pub use int::*; +pub use uint::*; diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs new file mode 100644 index 00000000000..61135427456 --- /dev/null +++ b/crates/core_simd/src/elements/int.rs @@ -0,0 +1,273 @@ +use crate::simd::{ + intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount, +}; + +/// Operations on SIMD vectors of signed integers. +pub trait SimdInt: Sized { + /// Mask type used for manipulating this SIMD vector type. + type Mask; + + /// Scalar type contained by this SIMD vector type. + type Scalar; + + /// Lanewise saturating add. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// use core::i32::{MIN, MAX}; + /// let x = Simd::from_array([MIN, 0, 1, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x + max; + /// let sat = x.saturating_add(max); + /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2])); + /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX])); + /// ``` + fn saturating_add(self, second: Self) -> Self; + + /// Lanewise saturating subtract. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// use core::i32::{MIN, MAX}; + /// let x = Simd::from_array([MIN, -2, -1, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x - max; + /// let sat = x.saturating_sub(max); + /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0])); + /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0])); + fn saturating_sub(self, second: Self) -> Self; + + /// Lanewise absolute value, implemented in Rust. + /// Every lane becomes its absolute value. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// use core::i32::{MIN, MAX}; + /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]); + /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0])); + /// ``` + fn abs(self) -> Self; + + /// Lanewise saturating absolute value, implemented in Rust. + /// As abs(), except the MIN value becomes MAX instead of itself. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// use core::i32::{MIN, MAX}; + /// let xs = Simd::from_array([MIN, -2, 0, 3]); + /// let unsat = xs.abs(); + /// let sat = xs.saturating_abs(); + /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3])); + /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3])); + /// ``` + fn saturating_abs(self) -> Self; + + /// Lanewise saturating negation, implemented in Rust. + /// As neg(), except the MIN value becomes MAX instead of itself. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// use core::i32::{MIN, MAX}; + /// let x = Simd::from_array([MIN, -2, 3, MAX]); + /// let unsat = -x; + /// let sat = x.saturating_neg(); + /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1])); + /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1])); + /// ``` + fn saturating_neg(self) -> Self; + + /// Returns true for each positive lane and false if it is zero or negative. + fn is_positive(self) -> Self::Mask; + + /// Returns true for each negative lane and false if it is zero or positive. + fn is_negative(self) -> Self::Mask; + + /// Returns numbers representing the sign of each lane. + /// * `0` if the number is zero + /// * `1` if the number is positive + /// * `-1` if the number is negative + fn signum(self) -> Self; + + /// Returns the sum of the lanes of the vector, with wrapping addition. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::i32x4; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_sum(), 10); + /// + /// // SIMD integer addition is always wrapping + /// let v = i32x4::from_array([i32::MAX, 1, 0, 0]); + /// assert_eq!(v.reduce_sum(), i32::MIN); + /// ``` + fn reduce_sum(self) -> Self::Scalar; + + /// Returns the product of the lanes of the vector, with wrapping multiplication. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::i32x4; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_product(), 24); + /// + /// // SIMD integer multiplication is always wrapping + /// let v = i32x4::from_array([i32::MAX, 2, 1, 1]); + /// assert!(v.reduce_product() < i32::MAX); + /// ``` + fn reduce_product(self) -> Self::Scalar; + + /// Returns the maximum lane in the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::i32x4; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_max(), 4); + /// ``` + fn reduce_max(self) -> Self::Scalar; + + /// Returns the minimum lane in the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::i32x4; + /// let v = i32x4::from_array([1, 2, 3, 4]); + /// assert_eq!(v.reduce_min(), 1); + /// ``` + fn reduce_min(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "and" across the lanes of the vector. + fn reduce_and(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "or" across the lanes of the vector. + fn reduce_or(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "xor" across the lanes of the vector. + fn reduce_xor(self) -> Self::Scalar; +} + +macro_rules! impl_trait { + { $($ty:ty),* } => { + $( + impl SimdInt for Simd<$ty, LANES> + where + LaneCount: SupportedLaneCount, + { + type Mask = Mask<<$ty as SimdElement>::Mask, LANES>; + type Scalar = $ty; + + #[inline] + fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_add(self, second) } + } + + #[inline] + fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_sub(self, second) } + } + + #[inline] + fn abs(self) -> Self { + const SHR: $ty = <$ty>::BITS as $ty - 1; + let m = self >> Simd::splat(SHR); + (self^m) - m + } + + #[inline] + fn saturating_abs(self) -> Self { + // arith shift for -1 or 0 mask based on sign bit, giving 2s complement + const SHR: $ty = <$ty>::BITS as $ty - 1; + let m = self >> Simd::splat(SHR); + (self^m).saturating_sub(m) + } + + #[inline] + fn saturating_neg(self) -> Self { + Self::splat(0).saturating_sub(self) + } + + #[inline] + fn is_positive(self) -> Self::Mask { + self.simd_gt(Self::splat(0)) + } + + #[inline] + fn is_negative(self) -> Self::Mask { + self.simd_lt(Self::splat(0)) + } + + #[inline] + fn signum(self) -> Self { + self.is_positive().select( + Self::splat(1), + self.is_negative().select(Self::splat(-1), Self::splat(0)) + ) + } + + #[inline] + fn reduce_sum(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_add_ordered(self, 0) } + } + + #[inline] + fn reduce_product(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) } + } + + #[inline] + fn reduce_max(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_max(self) } + } + + #[inline] + fn reduce_min(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_min(self) } + } + + #[inline] + fn reduce_and(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_and(self) } + } + + #[inline] + fn reduce_or(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_or(self) } + } + + #[inline] + fn reduce_xor(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_xor(self) } + } + } + )* + } +} + +impl_trait! { i8, i16, i32, i64, isize } diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs new file mode 100644 index 00000000000..da3213535a3 --- /dev/null +++ b/crates/core_simd/src/elements/uint.rs @@ -0,0 +1,128 @@ +use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount}; + +/// Operations on SIMD vectors of unsigned integers. +pub trait SimdUint: Sized { + /// Scalar type contained by this SIMD vector type. + type Scalar; + + /// Lanewise saturating add. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// use core::u32::MAX; + /// let x = Simd::from_array([2, 1, 0, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x + max; + /// let sat = x.saturating_add(max); + /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1])); + /// assert_eq!(sat, max); + /// ``` + fn saturating_add(self, second: Self) -> Self; + + /// Lanewise saturating subtract. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// use core::u32::MAX; + /// let x = Simd::from_array([2, 1, 0, MAX]); + /// let max = Simd::splat(MAX); + /// let unsat = x - max; + /// let sat = x.saturating_sub(max); + /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0])); + /// assert_eq!(sat, Simd::splat(0)); + fn saturating_sub(self, second: Self) -> Self; + + /// Returns the sum of the lanes of the vector, with wrapping addition. + fn reduce_sum(self) -> Self::Scalar; + + /// Returns the product of the lanes of the vector, with wrapping multiplication. + fn reduce_product(self) -> Self::Scalar; + + /// Returns the maximum lane in the vector. + fn reduce_max(self) -> Self::Scalar; + + /// Returns the minimum lane in the vector. + fn reduce_min(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "and" across the lanes of the vector. + fn reduce_and(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "or" across the lanes of the vector. + fn reduce_or(self) -> Self::Scalar; + + /// Returns the cumulative bitwise "xor" across the lanes of the vector. + fn reduce_xor(self) -> Self::Scalar; +} + +macro_rules! impl_trait { + { $($ty:ty),* } => { + $( + impl SimdUint for Simd<$ty, LANES> + where + LaneCount: SupportedLaneCount, + { + type Scalar = $ty; + + #[inline] + fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_add(self, second) } + } + + #[inline] + fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector + unsafe { intrinsics::simd_saturating_sub(self, second) } + } + + #[inline] + fn reduce_sum(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_add_ordered(self, 0) } + } + + #[inline] + fn reduce_product(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) } + } + + #[inline] + fn reduce_max(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_max(self) } + } + + #[inline] + fn reduce_min(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_min(self) } + } + + #[inline] + fn reduce_and(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_and(self) } + } + + #[inline] + fn reduce_or(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_or(self) } + } + + #[inline] + fn reduce_xor(self) -> Self::Scalar { + // Safety: `self` is an integer vector + unsafe { intrinsics::simd_reduce_xor(self) } + } + } + )* + } +} + +impl_trait! { u8, u16, u32, u64, usize } diff --git a/crates/core_simd/src/math.rs b/crates/core_simd/src/math.rs deleted file mode 100644 index 606021e983e..00000000000 --- a/crates/core_simd/src/math.rs +++ /dev/null @@ -1,156 +0,0 @@ -use crate::simd::intrinsics::{simd_saturating_add, simd_saturating_sub}; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; - -macro_rules! impl_uint_arith { - ($($ty:ty),+) => { - $( impl Simd<$ty, LANES> where LaneCount: SupportedLaneCount { - - /// Lanewise saturating add. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::", stringify!($ty), "::MAX;")] - /// let x = Simd::from_array([2, 1, 0, MAX]); - /// let max = Simd::splat(MAX); - /// let unsat = x + max; - /// let sat = x.saturating_add(max); - /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1])); - /// assert_eq!(sat, max); - /// ``` - #[inline] - pub fn saturating_add(self, second: Self) -> Self { - // Safety: `self` is a vector - unsafe { simd_saturating_add(self, second) } - } - - /// Lanewise saturating subtract. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::", stringify!($ty), "::MAX;")] - /// let x = Simd::from_array([2, 1, 0, MAX]); - /// let max = Simd::splat(MAX); - /// let unsat = x - max; - /// let sat = x.saturating_sub(max); - /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0])); - /// assert_eq!(sat, Simd::splat(0)); - #[inline] - pub fn saturating_sub(self, second: Self) -> Self { - // Safety: `self` is a vector - unsafe { simd_saturating_sub(self, second) } - } - })+ - } -} - -macro_rules! impl_int_arith { - ($($ty:ty),+) => { - $( impl Simd<$ty, LANES> where LaneCount: SupportedLaneCount { - - /// Lanewise saturating add. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] - /// let x = Simd::from_array([MIN, 0, 1, MAX]); - /// let max = Simd::splat(MAX); - /// let unsat = x + max; - /// let sat = x.saturating_add(max); - /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2])); - /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX])); - /// ``` - #[inline] - pub fn saturating_add(self, second: Self) -> Self { - // Safety: `self` is a vector - unsafe { simd_saturating_add(self, second) } - } - - /// Lanewise saturating subtract. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] - /// let x = Simd::from_array([MIN, -2, -1, MAX]); - /// let max = Simd::splat(MAX); - /// let unsat = x - max; - /// let sat = x.saturating_sub(max); - /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0])); - /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0])); - #[inline] - pub fn saturating_sub(self, second: Self) -> Self { - // Safety: `self` is a vector - unsafe { simd_saturating_sub(self, second) } - } - - /// Lanewise absolute value, implemented in Rust. - /// Every lane becomes its absolute value. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] - /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]); - /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0])); - /// ``` - #[inline] - pub fn abs(self) -> Self { - const SHR: $ty = <$ty>::BITS as $ty - 1; - let m = self >> Simd::splat(SHR); - (self^m) - m - } - - /// Lanewise saturating absolute value, implemented in Rust. - /// As abs(), except the MIN value becomes MAX instead of itself. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] - /// let xs = Simd::from_array([MIN, -2, 0, 3]); - /// let unsat = xs.abs(); - /// let sat = xs.saturating_abs(); - /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3])); - /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3])); - /// ``` - #[inline] - pub fn saturating_abs(self) -> Self { - // arith shift for -1 or 0 mask based on sign bit, giving 2s complement - const SHR: $ty = <$ty>::BITS as $ty - 1; - let m = self >> Simd::splat(SHR); - (self^m).saturating_sub(m) - } - - /// Lanewise saturating negation, implemented in Rust. - /// As neg(), except the MIN value becomes MAX instead of itself. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")] - /// let x = Simd::from_array([MIN, -2, 3, MAX]); - /// let unsat = -x; - /// let sat = x.saturating_neg(); - /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1])); - /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1])); - /// ``` - #[inline] - pub fn saturating_neg(self) -> Self { - Self::splat(0).saturating_sub(self) - } - })+ - } -} - -impl_uint_arith! { u8, u16, u32, u64, usize } -impl_int_arith! { i8, i16, i32, i64, isize } diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 42257f4e119..2d4fe2b7fde 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -9,12 +9,12 @@ pub(crate) mod intrinsics; #[cfg(feature = "generic_const_exprs")] mod to_bytes; +mod elements; mod eq; mod fmt; mod iter; mod lane_count; mod masks; -mod math; mod ops; mod ord; mod round; @@ -26,6 +26,7 @@ mod vendor; pub mod simd { pub(crate) use crate::core_simd::intrinsics; + pub use crate::core_simd::elements::*; pub use crate::core_simd::eq::*; pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index 642ab319cdd..9d8639feeee 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -1,105 +1,7 @@ use crate::simd::intrinsics::{ - simd_reduce_add_ordered, simd_reduce_and, simd_reduce_max, simd_reduce_min, - simd_reduce_mul_ordered, simd_reduce_or, simd_reduce_xor, + simd_reduce_add_ordered, simd_reduce_max, simd_reduce_min, simd_reduce_mul_ordered, }; -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; -use core::ops::{BitAnd, BitOr, BitXor}; - -macro_rules! impl_integer_reductions { - { $scalar:ty } => { - impl Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Reducing wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] - #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] - /// assert_eq!(v.reduce_sum(), 10); - /// - /// // SIMD integer addition is always wrapping - #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 1, 0, 0]);")] - #[doc = concat!("assert_eq!(v.reduce_sum(), ", stringify!($scalar), "::MIN);")] - /// ``` - #[inline] - pub fn reduce_sum(self) -> $scalar { - // Safety: `self` is an integer vector - unsafe { simd_reduce_add_ordered(self, 0) } - } - - /// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] - #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] - /// assert_eq!(v.reduce_product(), 24); - /// - /// // SIMD integer multiplication is always wrapping - #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 2, 1, 1]);")] - #[doc = concat!("assert!(v.reduce_product() < ", stringify!($scalar), "::MAX);")] - /// ``` - #[inline] - pub fn reduce_product(self) -> $scalar { - // Safety: `self` is an integer vector - unsafe { simd_reduce_mul_ordered(self, 1) } - } - - /// Reducing maximum. Returns the maximum lane in the vector. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] - #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] - /// assert_eq!(v.reduce_max(), 4); - /// ``` - #[inline] - pub fn reduce_max(self) -> $scalar { - // Safety: `self` is an integer vector - unsafe { simd_reduce_max(self) } - } - - /// Reducing minimum. Returns the minimum lane in the vector. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")] - #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")] - /// assert_eq!(v.reduce_min(), 1); - /// ``` - #[inline] - pub fn reduce_min(self) -> $scalar { - // Safety: `self` is an integer vector - unsafe { simd_reduce_min(self) } - } - } - } -} - -impl_integer_reductions! { i8 } -impl_integer_reductions! { i16 } -impl_integer_reductions! { i32 } -impl_integer_reductions! { i64 } -impl_integer_reductions! { isize } -impl_integer_reductions! { u8 } -impl_integer_reductions! { u16 } -impl_integer_reductions! { u32 } -impl_integer_reductions! { u64 } -impl_integer_reductions! { usize } +use crate::simd::{LaneCount, Simd, SupportedLaneCount}; macro_rules! impl_float_reductions { { $scalar:ty } => { @@ -223,45 +125,3 @@ macro_rules! impl_float_reductions { impl_float_reductions! { f32 } impl_float_reductions! { f64 } - -impl Simd -where - Self: BitAnd, - T: SimdElement + BitAnd, - LaneCount: SupportedLaneCount, -{ - /// Reducing bitwise "and". Returns the cumulative bitwise "and" across the lanes of - /// the vector. - #[inline] - pub fn reduce_and(self) -> T { - unsafe { simd_reduce_and(self) } - } -} - -impl Simd -where - Self: BitOr, - T: SimdElement + BitOr, - LaneCount: SupportedLaneCount, -{ - /// Reducing bitwise "or". Returns the cumulative bitwise "or" across the lanes of - /// the vector. - #[inline] - pub fn reduce_or(self) -> T { - unsafe { simd_reduce_or(self) } - } -} - -impl Simd -where - Self: BitXor, - T: SimdElement + BitXor, - LaneCount: SupportedLaneCount, -{ - /// Reducing bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of - /// the vector. - #[inline] - pub fn reduce_xor(self) -> T { - unsafe { simd_reduce_xor(self) } - } -} diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs index 384f01d822a..20e56c7dc64 100644 --- a/crates/core_simd/src/vector/int.rs +++ b/crates/core_simd/src/vector/int.rs @@ -1,46 +1,6 @@ #![allow(non_camel_case_types)] -use crate::simd::{LaneCount, Mask, Simd, SimdPartialOrd, SupportedLaneCount}; - -/// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`. -macro_rules! impl_integer_vector { - { $type:ty } => { - impl Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Returns true for each positive lane and false if it is zero or negative. - #[inline] - pub fn is_positive(self) -> Mask<$type, LANES> { - self.simd_gt(Self::splat(0)) - } - - /// Returns true for each negative lane and false if it is zero or positive. - #[inline] - pub fn is_negative(self) -> Mask<$type, LANES> { - self.simd_lt(Self::splat(0)) - } - - /// Returns numbers representing the sign of each lane. - /// * `0` if the number is zero - /// * `1` if the number is positive - /// * `-1` if the number is negative - #[inline] - pub fn signum(self) -> Self { - self.is_positive().select( - Self::splat(1), - self.is_negative().select(Self::splat(-1), Self::splat(0)) - ) - } - } - } -} - -impl_integer_vector! { isize } -impl_integer_vector! { i16 } -impl_integer_vector! { i32 } -impl_integer_vector! { i64 } -impl_integer_vector! { i8 } +use crate::simd::Simd; /// A SIMD vector with two elements of type `isize`. pub type isizex2 = Simd; diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 47fe49b0982..48c512be7d0 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -172,6 +172,7 @@ macro_rules! impl_common_integer_tests { macro_rules! impl_signed_tests { { $scalar:tt } => { mod $scalar { + use core_simd::simd::SimdInt; type Vector = core_simd::Simd; type Scalar = $scalar; @@ -312,6 +313,7 @@ macro_rules! impl_signed_tests { macro_rules! impl_unsigned_tests { { $scalar:tt } => { mod $scalar { + use core_simd::simd::SimdUint; type Vector = core_simd::Simd; type Scalar = $scalar; From 04be48ff97757a803e934ab8d2e90631b59557f8 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 15 Apr 2022 01:44:18 -0400 Subject: [PATCH 069/161] Add float trait, and seal traits. --- crates/core_simd/src/elements.rs | 6 + crates/core_simd/src/elements/float.rs | 344 +++++++++++++++++++++++++ crates/core_simd/src/elements/int.rs | 9 +- crates/core_simd/src/elements/uint.rs | 9 +- crates/core_simd/src/mod.rs | 3 - crates/core_simd/src/reduction.rs | 127 --------- crates/core_simd/src/vector/float.rs | 211 +-------------- crates/core_simd/tests/ops_macros.rs | 4 +- 8 files changed, 368 insertions(+), 345 deletions(-) create mode 100644 crates/core_simd/src/elements/float.rs delete mode 100644 crates/core_simd/src/reduction.rs diff --git a/crates/core_simd/src/elements.rs b/crates/core_simd/src/elements.rs index 0fb1f5b9fe9..701eb66b248 100644 --- a/crates/core_simd/src/elements.rs +++ b/crates/core_simd/src/elements.rs @@ -1,5 +1,11 @@ +mod float; mod int; mod uint; +mod sealed { + pub trait Sealed {} +} + +pub use float::*; pub use int::*; pub use uint::*; diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs new file mode 100644 index 00000000000..fafbd2a4d21 --- /dev/null +++ b/crates/core_simd/src/elements/float.rs @@ -0,0 +1,344 @@ +use super::sealed::Sealed; +use crate::simd::{ + intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialEq, SimdPartialOrd, + SupportedLaneCount, +}; + +/// Operations on SIMD vectors of floats. +pub trait SimdFloat: Sized + Sealed { + /// Mask type used for manipulating this SIMD vector type. + type Mask; + + /// Scalar type contained by this SIMD vector type. + type Scalar; + + /// Bit representation of this SIMD vector type. + type Bits; + + /// Raw transmutation to an unsigned integer vector type with the + /// same size and number of lanes. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn to_bits(self) -> Self::Bits; + + /// Raw transmutation from an unsigned integer vector type with the + /// same size and number of lanes. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn from_bits(bits: Self::Bits) -> Self; + + /// Produces a vector where every lane has the absolute value of the + /// equivalently-indexed lane in `self`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn abs(self) -> Self; + + /// Takes the reciprocal (inverse) of each lane, `1/x`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn recip(self) -> Self; + + /// Converts each lane from radians to degrees. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn to_degrees(self) -> Self; + + /// Converts each lane from degrees to radians. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn to_radians(self) -> Self; + + /// Returns true for each lane if it has a positive sign, including + /// `+0.0`, `NaN`s with positive sign bit and positive infinity. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_sign_positive(self) -> Self::Mask; + + /// Returns true for each lane if it has a negative sign, including + /// `-0.0`, `NaN`s with negative sign bit and negative infinity. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_sign_negative(self) -> Self::Mask; + + /// Returns true for each lane if its value is `NaN`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_nan(self) -> Self::Mask; + + /// Returns true for each lane if its value is positive infinity or negative infinity. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_infinite(self) -> Self::Mask; + + /// Returns true for each lane if its value is neither infinite nor `NaN`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_finite(self) -> Self::Mask; + + /// Returns true for each lane if its value is subnormal. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_subnormal(self) -> Self::Mask; + + /// Returns true for each lane if its value is neither zero, infinite, + /// subnormal, nor `NaN`. + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_normal(self) -> Self::Mask; + + /// Replaces each lane with a number that represents its sign. + /// + /// * `1.0` if the number is positive, `+0.0`, or `INFINITY` + /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY` + /// * `NAN` if the number is `NAN` + #[must_use = "method returns a new vector and does not mutate the original value"] + fn signum(self) -> Self; + + /// Returns each lane with the magnitude of `self` and the sign of `sign`. + /// + /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn copysign(self, sign: Self) -> Self; + + /// Returns the minimum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_min(self, other: Self) -> Self; + + /// Returns the maximum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_max(self, other: Self) -> Self; + + /// Restrict each lane to a certain interval unless it is NaN. + /// + /// For each lane in `self`, returns the corresponding lane in `max` if the lane is + /// greater than `max`, and the corresponding lane in `min` if the lane is less + /// than `min`. Otherwise returns the lane in `self`. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn simd_clamp(self, min: Self, max: Self) -> Self; + + /// Returns the sum of the lanes of the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::f32x2; + /// let v = f32x2::from_array([1., 2.]); + /// assert_eq!(v.reduce_sum(), 3.); + /// ``` + fn reduce_sum(self) -> Self::Scalar; + + /// Reducing multiply. Returns the product of the lanes of the vector. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::f32x2; + /// let v = f32x2::from_array([3., 4.]); + /// assert_eq!(v.reduce_product(), 12.); + /// ``` + fn reduce_product(self) -> Self::Scalar; + + /// Returns the maximum lane in the vector. + /// + /// Returns values based on equality, so a vector containing both `0.` and `-0.` may + /// return either. + /// + /// This function will not return `NaN` unless all lanes are `NaN`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::f32x2; + /// let v = f32x2::from_array([1., 2.]); + /// assert_eq!(v.reduce_max(), 2.); + /// + /// // NaN values are skipped... + /// let v = f32x2::from_array([1., f32::NAN]); + /// assert_eq!(v.reduce_max(), 1.); + /// + /// // ...unless all values are NaN + /// let v = f32x2::from_array([f32::NAN, f32::NAN]); + /// assert!(v.reduce_max().is_nan()); + /// ``` + fn reduce_max(self) -> Self::Scalar; + + /// Returns the minimum lane in the vector. + /// + /// Returns values based on equality, so a vector containing both `0.` and `-0.` may + /// return either. + /// + /// This function will not return `NaN` unless all lanes are `NaN`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::f32x2; + /// let v = f32x2::from_array([3., 7.]); + /// assert_eq!(v.reduce_min(), 3.); + /// + /// // NaN values are skipped... + /// let v = f32x2::from_array([1., f32::NAN]); + /// assert_eq!(v.reduce_min(), 1.); + /// + /// // ...unless all values are NaN + /// let v = f32x2::from_array([f32::NAN, f32::NAN]); + /// assert!(v.reduce_min().is_nan()); + /// ``` + fn reduce_min(self) -> Self::Scalar; +} + +macro_rules! impl_trait { + { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => { + $( + impl Sealed for Simd<$ty, LANES> + where + LaneCount: SupportedLaneCount, + { + } + + impl SimdFloat for Simd<$ty, LANES> + where + LaneCount: SupportedLaneCount, + { + type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>; + type Scalar = $ty; + type Bits = Simd<$bits_ty, LANES>; + + #[inline] + fn to_bits(self) -> Simd<$bits_ty, LANES> { + assert_eq!(core::mem::size_of::(), core::mem::size_of::()); + unsafe { core::mem::transmute_copy(&self) } + } + + #[inline] + fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self { + assert_eq!(core::mem::size_of::(), core::mem::size_of::()); + unsafe { core::mem::transmute_copy(&bits) } + } + + #[inline] + fn abs(self) -> Self { + unsafe { intrinsics::simd_fabs(self) } + } + + #[inline] + fn recip(self) -> Self { + Self::splat(1.0) / self + } + + #[inline] + fn to_degrees(self) -> Self { + // to_degrees uses a special constant for better precision, so extract that constant + self * Self::splat(Self::Scalar::to_degrees(1.)) + } + + #[inline] + fn to_radians(self) -> Self { + self * Self::splat(Self::Scalar::to_radians(1.)) + } + + #[inline] + fn is_sign_positive(self) -> Self::Mask { + !self.is_sign_negative() + } + + #[inline] + fn is_sign_negative(self) -> Self::Mask { + let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1); + sign_bits.simd_gt(Simd::splat(0)) + } + + #[inline] + fn is_nan(self) -> Self::Mask { + self.simd_ne(self) + } + + #[inline] + fn is_infinite(self) -> Self::Mask { + self.abs().simd_eq(Self::splat(Self::Scalar::INFINITY)) + } + + #[inline] + fn is_finite(self) -> Self::Mask { + self.abs().simd_lt(Self::splat(Self::Scalar::INFINITY)) + } + + #[inline] + fn is_subnormal(self) -> Self::Mask { + self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0)) + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + fn is_normal(self) -> Self::Mask { + !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite()) + } + + #[inline] + fn signum(self) -> Self { + self.is_nan().select(Self::splat(Self::Scalar::NAN), Self::splat(1.0).copysign(self)) + } + + #[inline] + fn copysign(self, sign: Self) -> Self { + let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits(); + let magnitude = self.to_bits() & !Self::splat(-0.).to_bits(); + Self::from_bits(sign_bit | magnitude) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + unsafe { intrinsics::simd_fmin(self, other) } + } + + #[inline] + fn simd_max(self, other: Self) -> Self { + unsafe { intrinsics::simd_fmax(self, other) } + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + let mut x = self; + x = x.simd_lt(min).select(min, x); + x = x.simd_gt(max).select(max, x); + x + } + + #[inline] + fn reduce_sum(self) -> Self::Scalar { + // LLVM sum is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { + self.as_array().iter().sum() + } else { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_add_ordered(self, 0.) } + } + } + + #[inline] + fn reduce_product(self) -> Self::Scalar { + // LLVM product is inaccurate on i586 + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { + self.as_array().iter().product() + } else { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_mul_ordered(self, 1.) } + } + } + + #[inline] + fn reduce_max(self) -> Self::Scalar { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_max(self) } + } + + #[inline] + fn reduce_min(self) -> Self::Scalar { + // Safety: `self` is a float vector + unsafe { intrinsics::simd_reduce_min(self) } + } + } + )* + } +} + +impl_trait! { f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } } diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs index 61135427456..c3139b4ba3e 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/elements/int.rs @@ -1,9 +1,10 @@ +use super::sealed::Sealed; use crate::simd::{ intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount, }; /// Operations on SIMD vectors of signed integers. -pub trait SimdInt: Sized { +pub trait SimdInt: Sized + Sealed { /// Mask type used for manipulating this SIMD vector type. type Mask; @@ -167,6 +168,12 @@ pub trait SimdInt: Sized { macro_rules! impl_trait { { $($ty:ty),* } => { $( + impl Sealed for Simd<$ty, LANES> + where + LaneCount: SupportedLaneCount, + { + } + impl SimdInt for Simd<$ty, LANES> where LaneCount: SupportedLaneCount, diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs index da3213535a3..cba1a9b0ce0 100644 --- a/crates/core_simd/src/elements/uint.rs +++ b/crates/core_simd/src/elements/uint.rs @@ -1,7 +1,8 @@ +use super::sealed::Sealed; use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount}; /// Operations on SIMD vectors of unsigned integers. -pub trait SimdUint: Sized { +pub trait SimdUint: Sized + Sealed { /// Scalar type contained by this SIMD vector type. type Scalar; @@ -61,6 +62,12 @@ pub trait SimdUint: Sized { macro_rules! impl_trait { { $($ty:ty),* } => { $( + impl Sealed for Simd<$ty, LANES> + where + LaneCount: SupportedLaneCount, + { + } + impl SimdUint for Simd<$ty, LANES> where LaneCount: SupportedLaneCount, diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 2d4fe2b7fde..590b2e4a153 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -1,6 +1,3 @@ -#[macro_use] -mod reduction; - #[macro_use] mod swizzle; diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs deleted file mode 100644 index 9d8639feeee..00000000000 --- a/crates/core_simd/src/reduction.rs +++ /dev/null @@ -1,127 +0,0 @@ -use crate::simd::intrinsics::{ - simd_reduce_add_ordered, simd_reduce_max, simd_reduce_min, simd_reduce_mul_ordered, -}; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; - -macro_rules! impl_float_reductions { - { $scalar:ty } => { - impl Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - - /// Reducing add. Returns the sum of the lanes of the vector. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")] - /// assert_eq!(v.reduce_sum(), 3.); - /// ``` - #[inline] - pub fn reduce_sum(self) -> $scalar { - // LLVM sum is inaccurate on i586 - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { - self.as_array().iter().sum() - } else { - // Safety: `self` is a float vector - unsafe { simd_reduce_add_ordered(self, 0.) } - } - } - - /// Reducing multiply. Returns the product of the lanes of the vector. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 4.]);")] - /// assert_eq!(v.reduce_product(), 12.); - /// ``` - #[inline] - pub fn reduce_product(self) -> $scalar { - // LLVM product is inaccurate on i586 - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { - self.as_array().iter().product() - } else { - // Safety: `self` is a float vector - unsafe { simd_reduce_mul_ordered(self, 1.) } - } - } - - /// Reducing maximum. Returns the maximum lane in the vector. - /// - /// Returns values based on equality, so a vector containing both `0.` and `-0.` may - /// return either. - /// - /// This function will not return `NaN` unless all lanes are `NaN`. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")] - /// assert_eq!(v.reduce_max(), 2.); - /// - /// // NaN values are skipped... - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")] - /// assert_eq!(v.reduce_max(), 1.); - /// - /// // ...unless all values are NaN - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([", - stringify!($scalar), "::NAN, ", - stringify!($scalar), "::NAN]);" - )] - /// assert!(v.reduce_max().is_nan()); - /// ``` - #[inline] - pub fn reduce_max(self) -> $scalar { - // Safety: `self` is a float vector - unsafe { simd_reduce_max(self) } - } - - /// Reducing minimum. Returns the minimum lane in the vector. - /// - /// Returns values based on equality, so a vector containing both `0.` and `-0.` may - /// return either. - /// - /// This function will not return `NaN` unless all lanes are `NaN`. - /// - /// # Examples - /// - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")] - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 7.]);")] - /// assert_eq!(v.reduce_min(), 3.); - /// - /// // NaN values are skipped... - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")] - /// assert_eq!(v.reduce_min(), 1.); - /// - /// // ...unless all values are NaN - #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([", - stringify!($scalar), "::NAN, ", - stringify!($scalar), "::NAN]);" - )] - /// assert!(v.reduce_min().is_nan()); - /// ``` - #[inline] - pub fn reduce_min(self) -> $scalar { - // Safety: `self` is a float vector - unsafe { simd_reduce_min(self) } - } - } - } -} - -impl_float_reductions! { f32 } -impl_float_reductions! { f64 } diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index 13b1d3995a1..f836c99b1e2 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -1,145 +1,6 @@ #![allow(non_camel_case_types)] -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SimdPartialOrd, SupportedLaneCount}; - -/// Implements inherent methods for a float vector containing multiple -/// `$lanes` of float `$type`, which uses `$bits_ty` as its binary -/// representation. -macro_rules! impl_float_vector { - { $type:ty, $bits_ty:ty, $mask_ty:ty } => { - impl Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Raw transmutation to an unsigned integer vector type with the - /// same size and number of lanes. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_bits(self) -> Simd<$bits_ty, LANES> { - assert_eq!(core::mem::size_of::(), core::mem::size_of::>()); - unsafe { core::mem::transmute_copy(&self) } - } - - /// Raw transmutation from an unsigned integer vector type with the - /// same size and number of lanes. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self { - assert_eq!(core::mem::size_of::(), core::mem::size_of::>()); - unsafe { core::mem::transmute_copy(&bits) } - } - - /// Produces a vector where every lane has the absolute value of the - /// equivalently-indexed lane in `self`. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn abs(self) -> Self { - unsafe { intrinsics::simd_fabs(self) } - } - - /// Takes the reciprocal (inverse) of each lane, `1/x`. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn recip(self) -> Self { - Self::splat(1.0) / self - } - - /// Converts each lane from radians to degrees. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_degrees(self) -> Self { - // to_degrees uses a special constant for better precision, so extract that constant - self * Self::splat(<$type>::to_degrees(1.)) - } - - /// Converts each lane from degrees to radians. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_radians(self) -> Self { - self * Self::splat(<$type>::to_radians(1.)) - } - - /// Returns true for each lane if it has a positive sign, including - /// `+0.0`, `NaN`s with positive sign bit and positive infinity. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn is_sign_positive(self) -> Mask<$mask_ty, LANES> { - !self.is_sign_negative() - } - - /// Returns true for each lane if it has a negative sign, including - /// `-0.0`, `NaN`s with negative sign bit and negative infinity. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn is_sign_negative(self) -> Mask<$mask_ty, LANES> { - let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1); - sign_bits.simd_gt(Simd::splat(0)) - } - - /// Returns true for each lane if its value is `NaN`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn is_nan(self) -> Mask<$mask_ty, LANES> { - self.simd_ne(self) - } - - /// Returns true for each lane if its value is positive infinity or negative infinity. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn is_infinite(self) -> Mask<$mask_ty, LANES> { - self.abs().simd_eq(Self::splat(<$type>::INFINITY)) - } - - /// Returns true for each lane if its value is neither infinite nor `NaN`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn is_finite(self) -> Mask<$mask_ty, LANES> { - self.abs().simd_lt(Self::splat(<$type>::INFINITY)) - } - - /// Returns true for each lane if its value is subnormal. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> { - self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).simd_eq(Simd::splat(0)) - } - - /// Returns true for each lane if its value is neither zero, infinite, - /// subnormal, nor `NaN`. - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn is_normal(self) -> Mask<$mask_ty, LANES> { - !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite()) - } - - /// Replaces each lane with a number that represents its sign. - /// - /// * `1.0` if the number is positive, `+0.0`, or `INFINITY` - /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY` - /// * `NAN` if the number is `NAN` - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn signum(self) -> Self { - self.is_nan().select(Self::splat(<$type>::NAN), Self::splat(1.0).copysign(self)) - } - - /// Returns each lane with the magnitude of `self` and the sign of `sign`. - /// - /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned. - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn copysign(self, sign: Self) -> Self { - let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits(); - let magnitude = self.to_bits() & !Self::splat(-0.).to_bits(); - Self::from_bits(sign_bit | magnitude) - } - } - }; -} - -impl_float_vector! { f32, u32, i32 } -impl_float_vector! { f64, u64, i64 } +use crate::simd::Simd; /// A 64-bit SIMD vector with two elements of type `f32`. pub type f32x2 = Simd; @@ -161,73 +22,3 @@ pub type f64x4 = Simd; /// A 512-bit SIMD vector with eight elements of type `f64`. pub type f64x8 = Simd; - -mod sealed { - pub trait Sealed {} -} -use sealed::Sealed; - -/// SIMD operations on vectors of floating point numbers. -pub trait SimdFloat: Sized + Sealed { - /// Returns the minimum of each lane. - /// - /// If one of the values is `NAN`, then the other value is returned. - #[must_use = "method returns a new vector and does not mutate the original value"] - fn simd_min(self, other: Self) -> Self; - - /// Returns the maximum of each lane. - /// - /// If one of the values is `NAN`, then the other value is returned. - #[must_use = "method returns a new vector and does not mutate the original value"] - fn simd_max(self, other: Self) -> Self; - - /// Restrict each lane to a certain interval unless it is NaN. - /// - /// For each lane in `self`, returns the corresponding lane in `max` if the lane is - /// greater than `max`, and the corresponding lane in `min` if the lane is less - /// than `min`. Otherwise returns the lane in `self`. - #[must_use = "method returns a new vector and does not mutate the original value"] - fn simd_clamp(self, min: Self, max: Self) -> Self; -} - -macro_rules! impl_simd_float { - { $($float:ty),* } => { - $( - impl Sealed for Simd<$float, LANES> - where - LaneCount: SupportedLaneCount, - { - } - - impl SimdFloat for Simd<$float, LANES> - where - LaneCount: SupportedLaneCount, - { - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - fn simd_min(self, other: Self) -> Self { - unsafe { intrinsics::simd_fmin(self, other) } - } - - #[inline] - fn simd_max(self, other: Self) -> Self { - unsafe { intrinsics::simd_fmax(self, other) } - } - - #[inline] - fn simd_clamp(self, min: Self, max: Self) -> Self { - assert!( - min.simd_le(max).all(), - "each lane in `min` must be less than or equal to the corresponding lane in `max`", - ); - let mut x = self; - x = x.simd_lt(min).select(min, x); - x = x.simd_gt(max).select(max, x); - x - } - } - )* - } -} - -impl_simd_float! { f32, f64 } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 48c512be7d0..f759394d075 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -348,6 +348,7 @@ macro_rules! impl_unsigned_tests { macro_rules! impl_float_tests { { $scalar:tt, $int_scalar:tt } => { mod $scalar { + use core_simd::SimdFloat; type Vector = core_simd::Simd; type Scalar = $scalar; @@ -464,7 +465,6 @@ macro_rules! impl_float_tests { } fn simd_min() { - use core_simd::simd::SimdFloat; // Regular conditions (both values aren't zero) test_helpers::test_binary_elementwise( &Vector::::simd_min, @@ -488,7 +488,6 @@ macro_rules! impl_float_tests { } fn simd_max() { - use core_simd::simd::SimdFloat; // Regular conditions (both values aren't zero) test_helpers::test_binary_elementwise( &Vector::::simd_max, @@ -512,7 +511,6 @@ macro_rules! impl_float_tests { } fn simd_clamp() { - use core_simd::simd::SimdFloat; test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { for (min, max) in min.iter_mut().zip(max.iter_mut()) { if max < min { From 528bc8593ad756239a6ded0443f10af657488559 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 15 Apr 2022 13:47:43 -0400 Subject: [PATCH 070/161] Improve copysign documentation --- crates/core_simd/src/elements/float.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index fafbd2a4d21..456dd780dac 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -83,7 +83,7 @@ pub trait SimdFloat: Sized + Sealed { /// Returns each lane with the magnitude of `self` and the sign of `sign`. /// - /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned. + /// For any lane containing a `NAN`, a `NAN` with the sign of `sign` is returned. #[must_use = "method returns a new vector and does not mutate the original value"] fn copysign(self, sign: Self) -> Self; From 62d3b2e39c3610046afd927843942cfdddab6753 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 16 Apr 2022 16:17:43 -0400 Subject: [PATCH 071/161] Add Copy bound to SIMD traits --- crates/core_simd/src/elements/float.rs | 2 +- crates/core_simd/src/elements/int.rs | 2 +- crates/core_simd/src/elements/uint.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index 456dd780dac..5a628f2121e 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -5,7 +5,7 @@ use crate::simd::{ }; /// Operations on SIMD vectors of floats. -pub trait SimdFloat: Sized + Sealed { +pub trait SimdFloat: Copy + Sealed { /// Mask type used for manipulating this SIMD vector type. type Mask; diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs index c3139b4ba3e..787a0741146 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/elements/int.rs @@ -4,7 +4,7 @@ use crate::simd::{ }; /// Operations on SIMD vectors of signed integers. -pub trait SimdInt: Sized + Sealed { +pub trait SimdInt: Copy + Sealed { /// Mask type used for manipulating this SIMD vector type. type Mask; diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs index cba1a9b0ce0..f9d43a1d19b 100644 --- a/crates/core_simd/src/elements/uint.rs +++ b/crates/core_simd/src/elements/uint.rs @@ -2,7 +2,7 @@ use super::sealed::Sealed; use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount}; /// Operations on SIMD vectors of unsigned integers. -pub trait SimdUint: Sized + Sealed { +pub trait SimdUint: Copy + Sealed { /// Scalar type contained by this SIMD vector type. type Scalar; From af53b5de24712a29f952b657d6e3107ba32f6e67 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 25 Apr 2022 16:32:56 -0400 Subject: [PATCH 072/161] rust-lang/portable-simd#279: Silence clippy false alarms --- crates/core_simd/src/ops.rs | 1 + crates/core_simd/src/vector.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index d39b4091df9..8dbae346b5f 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -48,6 +48,7 @@ macro_rules! unsafe_base { // cg_clif defaults to this, and scalar MIR shifts also default to wrapping macro_rules! wrap_bitshift { ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { + #[allow(clippy::suspicious_arithmetic_impl)] unsafe { $crate::simd::intrinsics::$simd_call( $lhs, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index f7989ee762a..9eb51f957d1 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -479,6 +479,7 @@ where mask.all() } + #[allow(clippy::partialeq_ne_impl)] #[inline] fn ne(&self, other: &Self) -> bool { // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. From 98cd636d581962c29ca336ca89dce083dbed926f Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 2 Mar 2022 20:09:01 -0800 Subject: [PATCH 073/161] Add Mask::cast --- crates/core_simd/src/masks.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index e8962b86b11..8f2f3f6aee0 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -181,6 +181,13 @@ where self.0.to_int() } + /// Converts the mask to a mask of any other lane size. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn cast(self) -> Mask { + Mask(self.0.convert()) + } + /// Tests the value of the specified lane. /// /// # Safety From aa11959f19789f7db6d3a37bc52f5d7718cc9224 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 21 May 2022 15:13:20 -0400 Subject: [PATCH 074/161] Add mask cast tests --- crates/core_simd/tests/masks.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index d10c6610f50..3a0493d4ee6 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -99,6 +99,29 @@ macro_rules! test_mask_api { assert_eq!(bitmask, 0b01); assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask); } + + #[test] + fn cast() { + fn cast_impl() + where + core_simd::Mask<$type, 8>: Into>, + { + let values = [true, false, false, true, false, false, true, false]; + let mask = core_simd::Mask::<$type, 8>::from_array(values); + + let cast_mask = mask.cast::(); + assert_eq!(values, cast_mask.to_array()); + + let into_mask: core_simd::Mask = mask.into(); + assert_eq!(values, into_mask.to_array()); + } + + cast_impl::(); + cast_impl::(); + cast_impl::(); + cast_impl::(); + cast_impl::(); + } } } } From c9f4e0ef98a4c8c54919d25eafd83e9bcaf2e4df Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 21 May 2022 16:49:03 -0400 Subject: [PATCH 075/161] Use Mask::cast in From impl --- crates/core_simd/src/masks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 8f2f3f6aee0..dcec336cfaf 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -578,7 +578,7 @@ macro_rules! impl_from { LaneCount: SupportedLaneCount, { fn from(value: Mask<$from, LANES>) -> Self { - Self(value.0.convert()) + value.cast() } } )* From b7fea94c96769e939ecdc98f368bb6daa330aa0b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 13 Jan 2022 21:20:17 -0500 Subject: [PATCH 076/161] Generically implement ToBitMaskArray --- crates/core_simd/src/masks.rs | 5 +- crates/core_simd/src/masks/bitmask.rs | 20 ++++++- crates/core_simd/src/masks/full_masks.rs | 68 +++++++++++++++++++++++- crates/core_simd/src/masks/to_bitmask.rs | 38 +++++++++++++ crates/core_simd/tests/masks.rs | 13 +++++ 5 files changed, 141 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index dcec336cfaf..e65548a3287 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -13,7 +13,10 @@ mod mask_impl; mod to_bitmask; -pub use to_bitmask::ToBitMask; +pub use to_bitmask::{ToBitMask, ToBitMaskArray}; + +#[cfg(feature = "generic_const_exprs")] +pub use to_bitmask::bitmask_len; use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; use core::cmp::Ordering; diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index ec4dd357ee9..2e2c0a45c51 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -1,7 +1,7 @@ #![allow(unused_imports)] use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray}; use core::marker::PhantomData; /// A mask where each lane is represented by a single bit. @@ -115,6 +115,24 @@ where unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } } + #[inline] + #[must_use = "method returns a new array and does not mutate the original value"] + pub fn to_bitmask_array(self) -> [u8; N] { + assert!(core::mem::size_of::() == N); + + // Safety: converting an integer to an array of bytes of the same size is safe + unsafe { core::mem::transmute_copy(&self.0) } + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn from_bitmask_array(bitmask: [u8; N]) -> Self { + assert!(core::mem::size_of::() == N); + + // Safety: converting an array of bytes to an integer of the same size is safe + Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData) + } + #[inline] pub fn to_bitmask_integer(self) -> U where diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index efa688b128f..b1c3b2b88ad 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -2,7 +2,7 @@ use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray}; #[repr(transparent)] pub struct Mask(Simd) @@ -139,6 +139,72 @@ where unsafe { Mask(intrinsics::simd_cast(self.0)) } } + #[inline] + #[must_use = "method returns a new array and does not mutate the original value"] + pub fn to_bitmask_array(self) -> [u8; N] + where + super::Mask: ToBitMaskArray, + [(); as ToBitMaskArray>::BYTES]: Sized, + { + assert_eq!( as ToBitMaskArray>::BYTES, N); + + // Safety: N is the correct bitmask size + // + // The transmute below allows this function to be marked safe, since it will prevent + // monomorphization errors in the case of an incorrect size. + unsafe { + // Compute the bitmask + let bitmask: [u8; as ToBitMaskArray>::BYTES] = + intrinsics::simd_bitmask(self.0); + + // Transmute to the return type, previously asserted to be the same size + let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask); + + // LLVM assumes bit order should match endianness + if cfg!(target_endian = "big") { + for x in bitmask.as_mut() { + *x = x.reverse_bits(); + } + }; + + bitmask + } + } + + #[inline] + #[must_use = "method returns a new mask and does not mutate the original value"] + pub fn from_bitmask_array(mut bitmask: [u8; N]) -> Self + where + super::Mask: ToBitMaskArray, + [(); as ToBitMaskArray>::BYTES]: Sized, + { + assert_eq!( as ToBitMaskArray>::BYTES, N); + + // Safety: N is the correct bitmask size + // + // The transmute below allows this function to be marked safe, since it will prevent + // monomorphization errors in the case of an incorrect size. + unsafe { + // LLVM assumes bit order should match endianness + if cfg!(target_endian = "big") { + for x in bitmask.as_mut() { + *x = x.reverse_bits(); + } + } + + // Transmute to the bitmask type, previously asserted to be the same size + let bitmask: [u8; as ToBitMaskArray>::BYTES] = + core::mem::transmute_copy(&bitmask); + + // Compute the regular mask + Self::from_int_unchecked(intrinsics::simd_select_bitmask( + bitmask, + Self::splat(true).to_int(), + Self::splat(false).to_int(), + )) + } + } + #[inline] pub(crate) fn to_bitmask_integer(self) -> U where diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index c263f6a4eec..ee229fc7a44 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -31,6 +31,24 @@ pub unsafe trait ToBitMask: Sealed { fn from_bitmask(bitmask: Self::BitMask) -> Self; } +/// Converts masks to and from byte array bitmasks. +/// +/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. +/// +/// # Safety +/// This trait is `unsafe` and sealed, since the `BYTES` value must match the number of lanes in +/// the mask. +pub unsafe trait ToBitMaskArray: Sealed { + /// The length of the bitmask array. + const BYTES: usize; + + /// Converts a mask to a bitmask. + fn to_bitmask_array(self) -> [u8; Self::BYTES]; + + /// Converts a bitmask to a mask. + fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self; +} + macro_rules! impl_integer_intrinsic { { $(unsafe impl ToBitMask for Mask<_, $lanes:literal>)* } => { $( @@ -58,3 +76,23 @@ impl_integer_intrinsic! { unsafe impl ToBitMask for Mask<_, 32> unsafe impl ToBitMask for Mask<_, 64> } + +/// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes. +pub const fn bitmask_len(lanes: usize) -> usize { + (lanes + 7) / 8 +} + +unsafe impl ToBitMaskArray for Mask +where + LaneCount: SupportedLaneCount, +{ + const BYTES: usize = bitmask_len(LANES); + + fn to_bitmask_array(self) -> [u8; Self::BYTES] { + self.0.to_bitmask_array() + } + + fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self { + Mask(mask_impl::Mask::from_bitmask_array(bitmask)) + } +} diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 3a0493d4ee6..6150124b8ca 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -122,6 +122,19 @@ macro_rules! test_mask_api { cast_impl::(); cast_impl::(); } + + #[test] + fn roundtrip_bitmask_array_conversion() { + use core_simd::ToBitMaskArray; + let values = [ + true, false, false, true, false, false, true, false, + true, true, false, false, false, false, false, true, + ]; + let mask = core_simd::Mask::<$type, 16>::from_array(values); + let bitmask = mask.to_bitmask_array(); + assert_eq!(bitmask, [0b01001001, 0b10000011]); + assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask_array(bitmask), mask); + } } } } From 1cee9304b3678523ad3e234d9d72fcd1d73fff99 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 21 May 2022 20:08:38 -0400 Subject: [PATCH 077/161] Fix generic_const_exprs feature --- crates/core_simd/src/masks.rs | 4 ++-- crates/core_simd/src/masks/bitmask.rs | 4 +++- crates/core_simd/src/masks/full_masks.rs | 7 ++++++- crates/core_simd/src/masks/to_bitmask.rs | 3 +++ crates/core_simd/tests/masks.rs | 1 + 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index e65548a3287..11d7288eccb 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -13,10 +13,10 @@ mod mask_impl; mod to_bitmask; -pub use to_bitmask::{ToBitMask, ToBitMaskArray}; +pub use to_bitmask::ToBitMask; #[cfg(feature = "generic_const_exprs")] -pub use to_bitmask::bitmask_len; +pub use to_bitmask::{bitmask_len, ToBitMaskArray}; use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; use core::cmp::Ordering; diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 2e2c0a45c51..365ecc0a325 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -1,7 +1,7 @@ #![allow(unused_imports)] use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; use core::marker::PhantomData; /// A mask where each lane is represented by a single bit. @@ -115,6 +115,7 @@ where unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } } + #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new array and does not mutate the original value"] pub fn to_bitmask_array(self) -> [u8; N] { @@ -124,6 +125,7 @@ where unsafe { core::mem::transmute_copy(&self.0) } } + #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask_array(bitmask: [u8; N]) -> Self { diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index b1c3b2b88ad..7ed844de625 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -2,7 +2,10 @@ use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; + +#[cfg(feature = "generic_const_exprs")] +use crate::simd::ToBitMaskArray; #[repr(transparent)] pub struct Mask(Simd) @@ -139,6 +142,7 @@ where unsafe { Mask(intrinsics::simd_cast(self.0)) } } + #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new array and does not mutate the original value"] pub fn to_bitmask_array(self) -> [u8; N] @@ -171,6 +175,7 @@ where } } + #[cfg(feature = "generic_const_exprs")] #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask_array(mut bitmask: [u8; N]) -> Self diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index ee229fc7a44..954f88ea511 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -38,6 +38,7 @@ pub unsafe trait ToBitMask: Sealed { /// # Safety /// This trait is `unsafe` and sealed, since the `BYTES` value must match the number of lanes in /// the mask. +#[cfg(feature = "generic_const_exprs")] pub unsafe trait ToBitMaskArray: Sealed { /// The length of the bitmask array. const BYTES: usize; @@ -78,10 +79,12 @@ impl_integer_intrinsic! { } /// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes. +#[cfg(feature = "generic_const_exprs")] pub const fn bitmask_len(lanes: usize) -> usize { (lanes + 7) / 8 } +#[cfg(feature = "generic_const_exprs")] unsafe impl ToBitMaskArray for Mask where LaneCount: SupportedLaneCount, diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 6150124b8ca..673d0db93fe 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -123,6 +123,7 @@ macro_rules! test_mask_api { cast_impl::(); } + #[cfg(feature = "generic_const_exprs")] #[test] fn roundtrip_bitmask_array_conversion() { use core_simd::ToBitMaskArray; From bca8dec404c18d9f4ef9fa1ec5f19766910d0c84 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 22 May 2022 01:20:28 -0400 Subject: [PATCH 078/161] Remove incorrect comment --- crates/core_simd/src/masks/full_masks.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 7ed844de625..adf0fcbeae2 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -153,9 +153,6 @@ where assert_eq!( as ToBitMaskArray>::BYTES, N); // Safety: N is the correct bitmask size - // - // The transmute below allows this function to be marked safe, since it will prevent - // monomorphization errors in the case of an incorrect size. unsafe { // Compute the bitmask let bitmask: [u8; as ToBitMaskArray>::BYTES] = @@ -186,9 +183,6 @@ where assert_eq!( as ToBitMaskArray>::BYTES, N); // Safety: N is the correct bitmask size - // - // The transmute below allows this function to be marked safe, since it will prevent - // monomorphization errors in the case of an incorrect size. unsafe { // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { From 05c92c73c181015ed512255abef8377292d5cbf7 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 21 May 2022 14:55:52 -0400 Subject: [PATCH 079/161] Document remaining internal unsafety, and deny undocumented unsafety --- crates/core_simd/src/elements/float.rs | 5 ++++ crates/core_simd/src/lib.rs | 2 +- crates/core_simd/src/masks.rs | 1 + crates/core_simd/src/masks/to_bitmask.rs | 32 +++++++++--------------- crates/core_simd/src/ops.rs | 3 +++ crates/core_simd/src/ops/unary.rs | 1 + crates/core_simd/src/round.rs | 2 ++ crates/core_simd/src/vector.rs | 26 ++++++++++++++++++- 8 files changed, 50 insertions(+), 22 deletions(-) diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index 5a628f2121e..67e4454e5e1 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -202,17 +202,20 @@ macro_rules! impl_trait { #[inline] fn to_bits(self) -> Simd<$bits_ty, LANES> { assert_eq!(core::mem::size_of::(), core::mem::size_of::()); + // Safety: transmuting between vector types is safe unsafe { core::mem::transmute_copy(&self) } } #[inline] fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self { assert_eq!(core::mem::size_of::(), core::mem::size_of::()); + // Safety: transmuting between vector types is safe unsafe { core::mem::transmute_copy(&bits) } } #[inline] fn abs(self) -> Self { + // Safety: `self` is a float vector unsafe { intrinsics::simd_fabs(self) } } @@ -283,11 +286,13 @@ macro_rules! impl_trait { #[inline] fn simd_min(self, other: Self) -> Self { + // Safety: `self` and `other` are float vectors unsafe { intrinsics::simd_fmin(self, other) } } #[inline] fn simd_max(self, other: Self) -> Self { + // Safety: `self` and `other` are floating point vectors unsafe { intrinsics::simd_fmax(self, other) } } diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 2632073622e..715f258f617 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -12,7 +12,7 @@ #![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))] #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))] #![warn(missing_docs)] -#![deny(unsafe_op_in_unsafe_fn)] +#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] #![unstable(feature = "portable_simd", issue = "86656")] //! Portable SIMD module. diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 11d7288eccb..c36c336d8a2 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -68,6 +68,7 @@ macro_rules! impl_element { const FALSE: Self = 0; } + // Safety: this is a valid mask element type unsafe impl MaskElement for $ty {} } } diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 954f88ea511..65d3ce9be65 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -16,11 +16,7 @@ where /// Converts masks to and from integer bitmasks. /// /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB. -/// -/// # Safety -/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in -/// the mask. -pub unsafe trait ToBitMask: Sealed { +pub trait ToBitMask: Sealed { /// The integer bitmask type. type BitMask; @@ -34,12 +30,8 @@ pub unsafe trait ToBitMask: Sealed { /// Converts masks to and from byte array bitmasks. /// /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte. -/// -/// # Safety -/// This trait is `unsafe` and sealed, since the `BYTES` value must match the number of lanes in -/// the mask. #[cfg(feature = "generic_const_exprs")] -pub unsafe trait ToBitMaskArray: Sealed { +pub trait ToBitMaskArray: Sealed { /// The length of the bitmask array. const BYTES: usize; @@ -51,9 +43,9 @@ pub unsafe trait ToBitMaskArray: Sealed { } macro_rules! impl_integer_intrinsic { - { $(unsafe impl ToBitMask for Mask<_, $lanes:literal>)* } => { + { $(impl ToBitMask for Mask<_, $lanes:literal>)* } => { $( - unsafe impl ToBitMask for Mask { + impl ToBitMask for Mask { type BitMask = $int; fn to_bitmask(self) -> $int { @@ -69,13 +61,13 @@ macro_rules! impl_integer_intrinsic { } impl_integer_intrinsic! { - unsafe impl ToBitMask for Mask<_, 1> - unsafe impl ToBitMask for Mask<_, 2> - unsafe impl ToBitMask for Mask<_, 4> - unsafe impl ToBitMask for Mask<_, 8> - unsafe impl ToBitMask for Mask<_, 16> - unsafe impl ToBitMask for Mask<_, 32> - unsafe impl ToBitMask for Mask<_, 64> + impl ToBitMask for Mask<_, 1> + impl ToBitMask for Mask<_, 2> + impl ToBitMask for Mask<_, 4> + impl ToBitMask for Mask<_, 8> + impl ToBitMask for Mask<_, 16> + impl ToBitMask for Mask<_, 32> + impl ToBitMask for Mask<_, 64> } /// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes. @@ -85,7 +77,7 @@ pub const fn bitmask_len(lanes: usize) -> usize { } #[cfg(feature = "generic_const_exprs")] -unsafe impl ToBitMaskArray for Mask +impl ToBitMaskArray for Mask where LaneCount: SupportedLaneCount, { diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 8dbae346b5f..5a077a469d8 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -33,6 +33,7 @@ where macro_rules! unsafe_base { ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { + // Safety: $lhs and $rhs are vectors unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } }; } @@ -49,6 +50,7 @@ macro_rules! unsafe_base { macro_rules! wrap_bitshift { ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { #[allow(clippy::suspicious_arithmetic_impl)] + // Safety: $lhs and the bitand result are vectors unsafe { $crate::simd::intrinsics::$simd_call( $lhs, @@ -91,6 +93,7 @@ macro_rules! int_divrem_guard { // Nice base case to make it easy to const-fold away the other branch. $rhs }; + // Safety: $lhs and rhs are vectors unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) } } }; diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs index 4ebea560fc6..4ad02215034 100644 --- a/crates/core_simd/src/ops/unary.rs +++ b/crates/core_simd/src/ops/unary.rs @@ -14,6 +14,7 @@ macro_rules! neg { #[inline] #[must_use = "operator returns a new vector without mutating the input"] fn neg(self) -> Self::Output { + // Safety: `self` is a signed vector unsafe { intrinsics::simd_neg(self) } } })* diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index 556bc2cc1fe..e111f3e0494 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -30,6 +30,8 @@ macro_rules! implement { $type: FloatToInt, I: SimdElement, { + // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to + // an integer. unsafe { intrinsics::simd_cast(self) } } } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 9eb51f957d1..fac7dca51f4 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -213,7 +213,7 @@ where #[inline] #[cfg(not(bootstrap))] pub fn cast(self) -> Simd { - // Safety: The input argument is a vector of a known SIMD type. + // Safety: The input argument is a vector of a valid SIMD element type. unsafe { intrinsics::simd_as(self) } } @@ -624,61 +624,85 @@ pub unsafe trait SimdElement: Sealed + Copy { } impl Sealed for u8 {} + +// Safety: u8 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for u8 { type Mask = i8; } impl Sealed for u16 {} + +// Safety: u16 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for u16 { type Mask = i16; } impl Sealed for u32 {} + +// Safety: u32 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for u32 { type Mask = i32; } impl Sealed for u64 {} + +// Safety: u64 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for u64 { type Mask = i64; } impl Sealed for usize {} + +// Safety: usize is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for usize { type Mask = isize; } impl Sealed for i8 {} + +// Safety: i8 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for i8 { type Mask = i8; } impl Sealed for i16 {} + +// Safety: i16 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for i16 { type Mask = i16; } impl Sealed for i32 {} + +// Safety: i32 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for i32 { type Mask = i32; } impl Sealed for i64 {} + +// Safety: i64 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for i64 { type Mask = i64; } impl Sealed for isize {} + +// Safety: isize is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for isize { type Mask = isize; } impl Sealed for f32 {} + +// Safety: f32 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for f32 { type Mask = i32; } impl Sealed for f64 {} + +// Safety: f64 is a valid SIMD element type, and is supported by this API unsafe impl SimdElement for f64 { type Mask = i64; } From 5562b02ff059fd519d59c1fb7873bdd386efc22e Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 2 Jun 2022 10:19:20 -0400 Subject: [PATCH 080/161] Deduplicate to_int_unchecked --- crates/core_simd/src/mod.rs | 1 - crates/core_simd/src/round.rs | 42 ---------------------------------- crates/core_simd/src/vector.rs | 25 ++++++++++++++++++++ 3 files changed, 25 insertions(+), 43 deletions(-) delete mode 100644 crates/core_simd/src/round.rs diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 590b2e4a153..b472aa3abe2 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -14,7 +14,6 @@ mod lane_count; mod masks; mod ops; mod ord; -mod round; mod select; mod vector; mod vendor; diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs deleted file mode 100644 index e111f3e0494..00000000000 --- a/crates/core_simd/src/round.rs +++ /dev/null @@ -1,42 +0,0 @@ -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; -use core::convert::FloatToInt; - -macro_rules! implement { - { - $type:ty - } => { - impl Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Rounds toward zero and converts to the same-width integer type, assuming that - /// the value is finite and fits in that type. - /// - /// # Safety - /// The value must: - /// - /// * Not be NaN - /// * Not be infinite - /// * Be representable in the return type, after truncating off its fractional part - /// - /// If these requirements are infeasible or costly, consider using the safe function [cast], - /// which saturates on conversion. - /// - /// [cast]: Simd::cast - #[inline] - pub unsafe fn to_int_unchecked(self) -> Simd - where - $type: FloatToInt, - I: SimdElement, - { - // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to - // an integer. - unsafe { intrinsics::simd_cast(self) } - } - } - } -} - -implement! { f32 } -implement! { f64 } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index fac7dca51f4..7433a695da9 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -217,6 +217,31 @@ where unsafe { intrinsics::simd_as(self) } } + /// Rounds toward zero and converts to the same-width integer type, assuming that + /// the value is finite and fits in that type. + /// + /// # Safety + /// The value must: + /// + /// * Not be NaN + /// * Not be infinite + /// * Be representable in the return type, after truncating off its fractional part + /// + /// If these requirements are infeasible or costly, consider using the safe function [cast], + /// which saturates on conversion. + /// + /// [cast]: Simd::cast + #[inline] + pub unsafe fn to_int_unchecked(self) -> Simd + where + T: core::convert::FloatToInt, + I: SimdElement, + { + // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to + // an integer. + unsafe { intrinsics::simd_cast(self) } + } + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. /// If an index is out-of-bounds, the lane is instead selected from the `or` vector. /// From c9636158d91dda241eca5a729fcb7e2a45d7a950 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 6 Jun 2022 12:16:17 -0700 Subject: [PATCH 081/161] Change `Simd::splat` to not generate a loop This fixes poor codegen in some circumstances for `u16x8::splat` on x86_64 https://rust-lang.zulipchat.com/#narrow/stream/257879-project-portable-simd/topic/Very.20bad.20.60u16x8.3A.3Asplat.60.20codegen.20on.20x86_64 --- crates/core_simd/src/vector.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index fac7dca51f4..8379135826d 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -9,8 +9,9 @@ pub use uint::*; // Vectors of pointers are not for public use at the current time. pub(crate) mod ptr; -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount}; +use crate::simd::{ + intrinsics, LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount, Swizzle, +}; /// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. /// @@ -123,8 +124,12 @@ where /// let v = u32x4::splat(8); /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); /// ``` - pub const fn splat(value: T) -> Self { - Self([value; LANES]) + pub fn splat(value: T) -> Self { + struct Splat; + impl Swizzle<1, LANES> for Splat { + const INDEX: [usize; LANES] = [0; LANES]; + } + Splat::swizzle(Simd::::from([value])) } /// Returns an array reference containing the entire SIMD vector. From f7412ad7b918578864f4d9a0fc24279f7fbebc31 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 6 Jun 2022 12:43:29 -0700 Subject: [PATCH 082/161] add workaround comment in `Simd::splat` --- crates/core_simd/src/vector.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 8379135826d..19bf45385db 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -125,6 +125,8 @@ where /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); /// ``` pub fn splat(value: T) -> Self { + // This is a workaround for `[value; LANES]` generating a loop: + // https://github.com/rust-lang/rust/issues/97804 struct Splat; impl Swizzle<1, LANES> for Splat { const INDEX: [usize; LANES] = [0; LANES]; From ed8092e96bb5ad10f7242589f2c263746adafa35 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 21 Jun 2022 20:52:43 -0400 Subject: [PATCH 083/161] Clarify comment --- crates/core_simd/src/vector.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index c6f588672b4..761151ab8b2 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -125,7 +125,7 @@ where /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); /// ``` pub fn splat(value: T) -> Self { - // This is a workaround for `[value; LANES]` generating a loop: + // This is preferred over `[value; LANES]`, since it's explicitly a splat: // https://github.com/rust-lang/rust/issues/97804 struct Splat; impl Swizzle<1, LANES> for Splat { From 64bef2910be17ca75ced3f0a99b4584f69114c74 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 12 Apr 2022 11:01:22 -0400 Subject: [PATCH 084/161] portable-simd: use simd_arith_offset to avoid ptr-int transmutation --- crates/core_simd/src/intrinsics.rs | 4 ++++ crates/core_simd/src/vector/ptr.rs | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index ee7408b62de..a1de8474fb2 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -61,6 +61,10 @@ extern "platform-intrinsic" { /// xor pub(crate) fn simd_xor(x: T, y: T) -> T; + /// getelementptr (without inbounds) + #[cfg(not(bootstrap))] + pub(crate) fn simd_arith_offset(ptrs: T, offsets: U) -> T; + /// fptoui/fptosi/uitofp/sitofp /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5 /// but the truncated value must fit in the target type or the result is poison. diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs index 417d255c28d..68a9c67f795 100644 --- a/crates/core_simd/src/vector/ptr.rs +++ b/crates/core_simd/src/vector/ptr.rs @@ -1,5 +1,8 @@ //! Private implementation details of public gather/scatter APIs. +#[cfg(not(bootstrap))] +use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +#[cfg(bootstrap)] use core::mem; /// A vector of *const T. @@ -21,12 +24,16 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { + #[cfg(bootstrap)] // Safety: converting pointers to usize and vice-versa is safe // (even if using that pointer is not) unsafe { let x: Simd = mem::transmute_copy(&self); mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) } + #[cfg(not(bootstrap))] + // Safety: this intrinsic doesn't have a precondition + unsafe { intrinsics::simd_arith_offset(self, addend) } } } @@ -49,11 +56,15 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { + #[cfg(bootstrap)] // Safety: converting pointers to usize and vice-versa is safe // (even if using that pointer is not) unsafe { let x: Simd = mem::transmute_copy(&self); mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) } + #[cfg(not(bootstrap))] + // Safety: this intrinsic doesn't have a precondition + unsafe { intrinsics::simd_arith_offset(self, addend) } } } From bbf31f9c78ad35ad1ef8fa91146e31cb447662b9 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Fri, 20 May 2022 08:54:10 -0400 Subject: [PATCH 085/161] Finish bumping stage0 It looks like the last time had left some remaining cfg's -- which made me think that the stage0 bump was actually successful. This brings us to a released 1.62 beta though. --- crates/core_simd/src/intrinsics.rs | 1 - crates/core_simd/src/vector/ptr.rs | 19 ------------------- 2 files changed, 20 deletions(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index a1de8474fb2..6047890a093 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -62,7 +62,6 @@ extern "platform-intrinsic" { pub(crate) fn simd_xor(x: T, y: T) -> T; /// getelementptr (without inbounds) - #[cfg(not(bootstrap))] pub(crate) fn simd_arith_offset(ptrs: T, offsets: U) -> T; /// fptoui/fptosi/uitofp/sitofp diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs index 68a9c67f795..fa756344db9 100644 --- a/crates/core_simd/src/vector/ptr.rs +++ b/crates/core_simd/src/vector/ptr.rs @@ -1,9 +1,6 @@ //! Private implementation details of public gather/scatter APIs. -#[cfg(not(bootstrap))] use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SupportedLaneCount}; -#[cfg(bootstrap)] -use core::mem; /// A vector of *const T. #[derive(Debug, Copy, Clone)] @@ -24,14 +21,6 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { - #[cfg(bootstrap)] - // Safety: converting pointers to usize and vice-versa is safe - // (even if using that pointer is not) - unsafe { - let x: Simd = mem::transmute_copy(&self); - mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) - } - #[cfg(not(bootstrap))] // Safety: this intrinsic doesn't have a precondition unsafe { intrinsics::simd_arith_offset(self, addend) } } @@ -56,14 +45,6 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { - #[cfg(bootstrap)] - // Safety: converting pointers to usize and vice-versa is safe - // (even if using that pointer is not) - unsafe { - let x: Simd = mem::transmute_copy(&self); - mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) - } - #[cfg(not(bootstrap))] // Safety: this intrinsic doesn't have a precondition unsafe { intrinsics::simd_arith_offset(self, addend) } } From 2e081db92aa3ee0a4563bc28ce01bdad5b1b2efd Mon Sep 17 00:00:00 2001 From: The Atelier Date: Wed, 20 Jul 2022 17:23:46 -0700 Subject: [PATCH 086/161] Fix doctest imports using as_crate feature Within core, `use self::` does not work to import these items. And because core is not core_simd, neither does the existing `use`. So, use this quirky hack instead, switching the import on a feature. --- crates/core_simd/Cargo.toml | 3 ++- crates/core_simd/src/elements/float.rs | 16 +++++++++--- crates/core_simd/src/elements/int.rs | 36 +++++++++++++++++++------- crates/core_simd/src/elements/uint.rs | 8 ++++-- crates/core_simd/src/vector.rs | 14 +++++++--- 5 files changed, 57 insertions(+), 20 deletions(-) diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index 8877c6df66e..8a29cf15696 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -9,7 +9,8 @@ categories = ["hardware-support", "no-std"] license = "MIT OR Apache-2.0" [features] -default = [] +default = ["as_crate"] +as_crate = [] std = [] generic_const_exprs = [] diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index 67e4454e5e1..d6022327055 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -113,7 +113,9 @@ pub trait SimdFloat: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::f32x2; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; /// let v = f32x2::from_array([1., 2.]); /// assert_eq!(v.reduce_sum(), 3.); /// ``` @@ -125,7 +127,9 @@ pub trait SimdFloat: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::f32x2; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; /// let v = f32x2::from_array([3., 4.]); /// assert_eq!(v.reduce_product(), 12.); /// ``` @@ -142,7 +146,9 @@ pub trait SimdFloat: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::f32x2; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; /// let v = f32x2::from_array([1., 2.]); /// assert_eq!(v.reduce_max(), 2.); /// @@ -167,7 +173,9 @@ pub trait SimdFloat: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::f32x2; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{f32x2, SimdFloat}; /// let v = f32x2::from_array([3., 7.]); /// assert_eq!(v.reduce_min(), 3.); /// diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs index 787a0741146..9b8c37ed466 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/elements/int.rs @@ -16,7 +16,9 @@ pub trait SimdInt: Copy + Sealed { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; /// use core::i32::{MIN, MAX}; /// let x = Simd::from_array([MIN, 0, 1, MAX]); /// let max = Simd::splat(MAX); @@ -32,7 +34,9 @@ pub trait SimdInt: Copy + Sealed { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; /// use core::i32::{MIN, MAX}; /// let x = Simd::from_array([MIN, -2, -1, MAX]); /// let max = Simd::splat(MAX); @@ -48,7 +52,9 @@ pub trait SimdInt: Copy + Sealed { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; /// use core::i32::{MIN, MAX}; /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]); /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0])); @@ -61,7 +67,9 @@ pub trait SimdInt: Copy + Sealed { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; /// use core::i32::{MIN, MAX}; /// let xs = Simd::from_array([MIN, -2, 0, 3]); /// let unsat = xs.abs(); @@ -77,7 +85,9 @@ pub trait SimdInt: Copy + Sealed { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdInt}; /// use core::i32::{MIN, MAX}; /// let x = Simd::from_array([MIN, -2, 3, MAX]); /// let unsat = -x; @@ -105,7 +115,9 @@ pub trait SimdInt: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::i32x4; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_sum(), 10); /// @@ -121,7 +133,9 @@ pub trait SimdInt: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::i32x4; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_product(), 24); /// @@ -137,7 +151,9 @@ pub trait SimdInt: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::i32x4; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_max(), 4); /// ``` @@ -149,7 +165,9 @@ pub trait SimdInt: Copy + Sealed { /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::i32x4; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{i32x4, SimdInt}; /// let v = i32x4::from_array([1, 2, 3, 4]); /// assert_eq!(v.reduce_min(), 1); /// ``` diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs index f9d43a1d19b..21e7e76eb3d 100644 --- a/crates/core_simd/src/elements/uint.rs +++ b/crates/core_simd/src/elements/uint.rs @@ -11,7 +11,9 @@ pub trait SimdUint: Copy + Sealed { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdUint}; /// use core::u32::MAX; /// let x = Simd::from_array([2, 1, 0, MAX]); /// let max = Simd::splat(MAX); @@ -27,7 +29,9 @@ pub trait SimdUint: Copy + Sealed { /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdUint}; /// use core::u32::MAX; /// let x = Simd::from_array([2, 1, 0, MAX]); /// let max = Simd::splat(MAX); diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 761151ab8b2..8661be938d5 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -173,7 +173,7 @@ where /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::{Simd, u32x4}; + /// # use core::simd::u32x4; /// let source = vec![1, 2, 3, 4, 5, 6]; /// let v = u32x4::from_slice(&source); /// assert_eq!(v.as_array(), &[1, 2, 3, 4]); @@ -332,7 +332,9 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask}; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdPartialOrd, Mask}; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 5]); /// let alt = Simd::from_array([-5, -4, -3, -2]); @@ -389,7 +391,9 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core_simd::simd::{Simd, Mask}; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); @@ -423,7 +427,9 @@ where /// # Examples /// ``` /// # #![feature(portable_simd)] - /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask}; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdPartialOrd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); From b5f9d43ff1139fb5dbd1a919dbf63e48c2c56012 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 21 Jul 2022 14:53:07 -0700 Subject: [PATCH 087/161] rust-lang/portable-simd#289: Strengthen warnings about relying on Mask layout This makes it more clear that you can't rely on the layout of these, which seems worth doing given that the names vaguely suggest that you can (and the docs only clarify that you can't on Mask but not the maskNxM aliases). --- crates/core_simd/src/masks.rs | 76 ++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index c36c336d8a2..99535021735 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -83,7 +83,9 @@ impl_element! { isize } /// /// Masks represent boolean inclusion/exclusion on a per-lane basis. /// -/// The layout of this type is unspecified. +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[T; LANES]`. #[repr(transparent)] pub struct Mask(mask_impl::Mask) where @@ -521,57 +523,129 @@ where } /// A mask for SIMD vectors with eight elements of 8 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i8; 8]`. pub type mask8x8 = Mask; /// A mask for SIMD vectors with 16 elements of 8 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i8; 16]`. pub type mask8x16 = Mask; /// A mask for SIMD vectors with 32 elements of 8 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i8; 32]`. pub type mask8x32 = Mask; /// A mask for SIMD vectors with 64 elements of 8 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i8; 64]`. pub type mask8x64 = Mask; /// A mask for SIMD vectors with four elements of 16 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i16; 4]`. pub type mask16x4 = Mask; /// A mask for SIMD vectors with eight elements of 16 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i16; 8]`. pub type mask16x8 = Mask; /// A mask for SIMD vectors with 16 elements of 16 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i16; 16]`. pub type mask16x16 = Mask; /// A mask for SIMD vectors with 32 elements of 16 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i16; 32]`. pub type mask16x32 = Mask; /// A mask for SIMD vectors with two elements of 32 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i32; 2]`. pub type mask32x2 = Mask; /// A mask for SIMD vectors with four elements of 32 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i32; 4]`. pub type mask32x4 = Mask; /// A mask for SIMD vectors with eight elements of 32 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i32; 8]`. pub type mask32x8 = Mask; /// A mask for SIMD vectors with 16 elements of 32 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i32; 16]`. pub type mask32x16 = Mask; /// A mask for SIMD vectors with two elements of 64 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i64; 2]`. pub type mask64x2 = Mask; /// A mask for SIMD vectors with four elements of 64 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i64; 4]`. pub type mask64x4 = Mask; /// A mask for SIMD vectors with eight elements of 64 bits. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[i64; 8]`. pub type mask64x8 = Mask; /// A mask for SIMD vectors with two elements of pointer width. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[isize; 2]`. pub type masksizex2 = Mask; /// A mask for SIMD vectors with four elements of pointer width. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[isize; 4]`. pub type masksizex4 = Mask; /// A mask for SIMD vectors with eight elements of pointer width. +/// +/// The layout of this type is unspecified, and may change between platforms +/// and/or Rust versions, and code should not assume that it is equivalent to +/// `[isize; 8]`. pub type masksizex8 = Mask; macro_rules! impl_from { From ddede9fb9b5bd3a7cce71775ac8ce7bd30fdf87a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 22 Jul 2022 09:39:23 -0400 Subject: [PATCH 088/161] make some Miri backtraces more pretty --- crates/core_simd/src/vector.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 8661be938d5..e8e8f6899d3 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -239,6 +239,7 @@ where /// /// [cast]: Simd::cast #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn to_int_unchecked(self) -> Simd where T: core::convert::FloatToInt, @@ -349,6 +350,7 @@ where /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[must_use] #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn gather_select_unchecked( slice: &[T], enable: Mask, @@ -444,6 +446,7 @@ where /// ``` /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn scatter_select_unchecked( self, slice: &mut [T], From 3183afb6b5fcbf688bb90cf1db3f635406f868dc Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 29 Jul 2022 11:57:05 -0400 Subject: [PATCH 089/161] Fix interleave/deinterleave for vectors with only one lane --- crates/core_simd/src/swizzle.rs | 12 ++++++++++-- crates/core_simd/tests/swizzle.rs | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 22999d24950..02567252a63 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -325,7 +325,11 @@ where const INDEX: [Which; LANES] = hi::(); } - (Lo::swizzle2(self, other), Hi::swizzle2(self, other)) + if LANES == 1 { + (self, other) + } else { + (Lo::swizzle2(self, other), Hi::swizzle2(self, other)) + } } /// Deinterleave two vectors. @@ -380,6 +384,10 @@ where const INDEX: [Which; LANES] = odd::(); } - (Even::swizzle2(self, other), Odd::swizzle2(self, other)) + if LANES == 1 { + (self, other) + } else { + (Even::swizzle2(self, other), Odd::swizzle2(self, other)) + } } } diff --git a/crates/core_simd/tests/swizzle.rs b/crates/core_simd/tests/swizzle.rs index 51c63611aba..33a7becb421 100644 --- a/crates/core_simd/tests/swizzle.rs +++ b/crates/core_simd/tests/swizzle.rs @@ -60,3 +60,17 @@ fn interleave() { assert_eq!(even, a); assert_eq!(odd, b); } + +// portable-simd#298 +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn interleave_one() { + let a = Simd::from_array([0]); + let b = Simd::from_array([1]); + let (lo, hi) = a.interleave(b); + assert_eq!(lo.to_array(), [0]); + assert_eq!(hi.to_array(), [1]); + let (even, odd) = lo.deinterleave(hi); + assert_eq!(even, a); + assert_eq!(odd, b); +} From 8742a86b1da28c1bb7f0e7f663becde9b0c5a73e Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Fri, 29 Jul 2022 16:12:24 -0700 Subject: [PATCH 090/161] add all_lane_counts feature to enable non-power-of-2 lane counts <= 64 --- .github/workflows/ci.yml | 4 + crates/core_simd/Cargo.toml | 1 + crates/core_simd/src/lane_count.rs | 36 ++-- crates/test_helpers/Cargo.toml | 3 + crates/test_helpers/src/lib.rs | 271 ++++++++++++++++++++--------- 5 files changed, 217 insertions(+), 98 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d50dfa1be4c..acd47a3da72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -241,6 +241,10 @@ jobs: - "--features std" - "--features generic_const_exprs" - "--features std --features generic_const_exprs" + - "--features all_lane_counts" + - "--features all_lane_counts --features std" + - "--features all_lane_counts --features generic_const_exprs" + - "--features all_lane_counts --features std --features generic_const_exprs" steps: - uses: actions/checkout@v2 diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index 8a29cf15696..7435e24edd3 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -13,6 +13,7 @@ default = ["as_crate"] as_crate = [] std = [] generic_const_exprs = [] +all_lane_counts = [] [target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen] version = "0.2" diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs index 63723e2ec13..2b91eb9e800 100644 --- a/crates/core_simd/src/lane_count.rs +++ b/crates/core_simd/src/lane_count.rs @@ -23,24 +23,20 @@ pub trait SupportedLaneCount: Sealed { impl Sealed for LaneCount {} -impl SupportedLaneCount for LaneCount<1> { - type BitMask = [u8; 1]; -} -impl SupportedLaneCount for LaneCount<2> { - type BitMask = [u8; 1]; -} -impl SupportedLaneCount for LaneCount<4> { - type BitMask = [u8; 1]; -} -impl SupportedLaneCount for LaneCount<8> { - type BitMask = [u8; 1]; -} -impl SupportedLaneCount for LaneCount<16> { - type BitMask = [u8; 2]; -} -impl SupportedLaneCount for LaneCount<32> { - type BitMask = [u8; 4]; -} -impl SupportedLaneCount for LaneCount<64> { - type BitMask = [u8; 8]; +macro_rules! supported_lane_count { + ($($lanes:literal),+) => { + $( + impl SupportedLaneCount for LaneCount<$lanes> { + type BitMask = [u8; ($lanes + 7) / 8]; + } + )+ + }; } + +supported_lane_count!(1, 2, 4, 8, 16, 32, 64); +#[cfg(feature = "all_lane_counts")] +supported_lane_count!( + 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 +); diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index a04b0961d7f..1d2bc8b519a 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -8,3 +8,6 @@ publish = false version = "0.10" default-features = false features = ["alloc"] + +[features] +all_lane_counts = [] diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 141bee18a9a..650eadd12bf 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -333,6 +333,39 @@ pub fn test_ternary_elementwise< ); } +#[doc(hidden)] +#[macro_export] +macro_rules! test_lanes_helper { + ($($(#[$meta:meta])* $fn_name:ident $lanes:literal;)+) => { + $( + #[test] + $(#[$meta])* + fn $fn_name() { + implementation::<$lanes>(); + } + )+ + }; + ( + $(#[$meta:meta])+; + $($(#[$meta_before:meta])+ $fn_name_before:ident $lanes_before:literal;)* + $fn_name:ident $lanes:literal; + $($fn_name_rest:ident $lanes_rest:literal;)* + ) => { + $crate::test_lanes_helper!( + $(#[$meta])+; + $($(#[$meta_before])+ $fn_name_before $lanes_before;)* + $(#[$meta])+ $fn_name $lanes; + $($fn_name_rest $lanes_rest;)* + ); + }; + ( + $(#[$meta_ignored:meta])+; + $($(#[$meta:meta])+ $fn_name:ident $lanes:literal;)+ + ) => { + $crate::test_lanes_helper!($($(#[$meta])+ $fn_name $lanes;)+); + }; +} + /// Expand a const-generic test into separate tests for each possible lane count. #[macro_export] macro_rules! test_lanes { @@ -351,51 +384,90 @@ macro_rules! test_lanes { #[cfg(target_arch = "wasm32")] wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] - fn lanes_1() { - implementation::<1>(); - } + $crate::test_lanes_helper!( + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; + lanes_1 1; + lanes_2 2; + lanes_4 4; + ); - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] - fn lanes_2() { - implementation::<2>(); - } - - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] - fn lanes_4() { - implementation::<4>(); - } - - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow - fn lanes_8() { - implementation::<8>(); - } + $crate::test_lanes_helper!( + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; + lanes_8 8; + lanes_16 16; + lanes_32 32; + lanes_64 64; + ); - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] - #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow - fn lanes_16() { - implementation::<16>(); - } + #[cfg(feature = "all_lane_counts")] + $crate::test_lanes_helper!( + // test some odd and even non-power-of-2 lengths on miri + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; + lanes_3 3; + lanes_5 5; + lanes_6 6; + ); - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg(feature = "all_lane_counts")] #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow - fn lanes_32() { - implementation::<32>(); - } - - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] - #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow - fn lanes_64() { - implementation::<64>(); - } + $crate::test_lanes_helper!( + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; + lanes_7 7; + lanes_9 9; + lanes_10 10; + lanes_11 11; + lanes_12 12; + lanes_13 13; + lanes_14 14; + lanes_15 15; + lanes_17 17; + lanes_18 18; + lanes_19 19; + lanes_20 20; + lanes_21 21; + lanes_22 22; + lanes_23 23; + lanes_24 24; + lanes_25 25; + lanes_26 26; + lanes_27 27; + lanes_28 28; + lanes_29 29; + lanes_30 30; + lanes_31 31; + lanes_33 33; + lanes_34 34; + lanes_35 35; + lanes_36 36; + lanes_37 37; + lanes_38 38; + lanes_39 39; + lanes_40 40; + lanes_41 41; + lanes_42 42; + lanes_43 43; + lanes_44 44; + lanes_45 45; + lanes_46 46; + lanes_47 47; + lanes_48 48; + lanes_49 49; + lanes_50 50; + lanes_51 51; + lanes_52 52; + lanes_53 53; + lanes_54 54; + lanes_55 55; + lanes_56 56; + lanes_57 57; + lanes_58 58; + lanes_59 59; + lanes_60 60; + lanes_61 61; + lanes_62 62; + lanes_63 63; + ); } )* } @@ -416,47 +488,90 @@ macro_rules! test_lanes_panic { core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount, $body - #[test] - #[should_panic] - fn lanes_1() { - implementation::<1>(); - } + $crate::test_lanes_helper!( + #[should_panic]; + lanes_1 1; + lanes_2 2; + lanes_4 4; + ); - #[test] - #[should_panic] - fn lanes_2() { - implementation::<2>(); - } + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow + $crate::test_lanes_helper!( + #[should_panic]; + lanes_8 8; + lanes_16 16; + lanes_32 32; + lanes_64 64; + ); - #[test] - #[should_panic] - fn lanes_4() { - implementation::<4>(); - } + #[cfg(feature = "all_lane_counts")] + $crate::test_lanes_helper!( + // test some odd and even non-power-of-2 lengths on miri + #[should_panic]; + lanes_3 3; + lanes_5 5; + lanes_6 6; + ); - #[test] - #[should_panic] - fn lanes_8() { - implementation::<8>(); - } - - #[test] - #[should_panic] - fn lanes_16() { - implementation::<16>(); - } - - #[test] - #[should_panic] - fn lanes_32() { - implementation::<32>(); - } - - #[test] - #[should_panic] - fn lanes_64() { - implementation::<64>(); - } + #[cfg(feature = "all_lane_counts")] + #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow + $crate::test_lanes_helper!( + #[should_panic]; + lanes_7 7; + lanes_9 9; + lanes_10 10; + lanes_11 11; + lanes_12 12; + lanes_13 13; + lanes_14 14; + lanes_15 15; + lanes_17 17; + lanes_18 18; + lanes_19 19; + lanes_20 20; + lanes_21 21; + lanes_22 22; + lanes_23 23; + lanes_24 24; + lanes_25 25; + lanes_26 26; + lanes_27 27; + lanes_28 28; + lanes_29 29; + lanes_30 30; + lanes_31 31; + lanes_33 33; + lanes_34 34; + lanes_35 35; + lanes_36 36; + lanes_37 37; + lanes_38 38; + lanes_39 39; + lanes_40 40; + lanes_41 41; + lanes_42 42; + lanes_43 43; + lanes_44 44; + lanes_45 45; + lanes_46 46; + lanes_47 47; + lanes_48 48; + lanes_49 49; + lanes_50 50; + lanes_51 51; + lanes_52 52; + lanes_53 53; + lanes_54 54; + lanes_55 55; + lanes_56 56; + lanes_57 57; + lanes_58 58; + lanes_59 59; + lanes_60 60; + lanes_61 61; + lanes_62 62; + lanes_63 63; + ); } )* } From 6bf512823548b4fdbb7127489e883bff8a98b33f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 1 Aug 2022 00:34:58 -0400 Subject: [PATCH 091/161] Simplify interleave/deinterleave and fix for odd-length vectors. --- crates/core_simd/src/swizzle.rs | 74 ++++++++++++--------------------- 1 file changed, 26 insertions(+), 48 deletions(-) diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 02567252a63..0b66b8a0ae0 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -265,13 +265,10 @@ where /// Interleave two vectors. /// - /// Produces two vectors with lanes taken alternately from `self` and `other`. + /// The resulting vectors contain lanes taken alternatively from `self` and `other`, first + /// filling the first result, and then the second. /// - /// The first result contains the first `LANES / 2` lanes from `self` and `other`, - /// alternating, starting with the first lane of `self`. - /// - /// The second result contains the last `LANES / 2` lanes from `self` and `other`, - /// alternating, starting with the lane `LANES / 2` from the start of `self`. + /// The reverse of this operation is [`Simd::deinterleave`]. /// /// ``` /// #![feature(portable_simd)] @@ -285,29 +282,17 @@ where #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn interleave(self, other: Self) -> (Self, Self) { - const fn lo() -> [Which; LANES] { + const fn interleave(high: bool) -> [Which; LANES] { let mut idx = [Which::First(0); LANES]; let mut i = 0; while i < LANES { - let offset = i / 2; - idx[i] = if i % 2 == 0 { - Which::First(offset) + // Treat the source as a concatenated vector + let dst_index = if high { i + LANES } else { i }; + let src_index = dst_index / 2 + (dst_index % 2) * LANES; + idx[i] = if src_index < LANES { + Which::First(src_index) } else { - Which::Second(offset) - }; - i += 1; - } - idx - } - const fn hi() -> [Which; LANES] { - let mut idx = [Which::First(0); LANES]; - let mut i = 0; - while i < LANES { - let offset = (LANES + i) / 2; - idx[i] = if i % 2 == 0 { - Which::First(offset) - } else { - Which::Second(offset) + Which::Second(src_index % LANES) }; i += 1; } @@ -318,18 +303,14 @@ where struct Hi; impl Swizzle2 for Lo { - const INDEX: [Which; LANES] = lo::(); + const INDEX: [Which; LANES] = interleave::(false); } impl Swizzle2 for Hi { - const INDEX: [Which; LANES] = hi::(); + const INDEX: [Which; LANES] = interleave::(true); } - if LANES == 1 { - (self, other) - } else { - (Lo::swizzle2(self, other), Hi::swizzle2(self, other)) - } + (Lo::swizzle2(self, other), Hi::swizzle2(self, other)) } /// Deinterleave two vectors. @@ -340,6 +321,8 @@ where /// The second result takes every other lane of `self` and then `other`, starting with /// the second lane. /// + /// The reverse of this operation is [`Simd::interleave`]. + /// /// ``` /// #![feature(portable_simd)] /// # use core::simd::Simd; @@ -352,22 +335,17 @@ where #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn deinterleave(self, other: Self) -> (Self, Self) { - const fn even() -> [Which; LANES] { + const fn deinterleave(second: bool) -> [Which; LANES] { let mut idx = [Which::First(0); LANES]; let mut i = 0; - while i < LANES / 2 { - idx[i] = Which::First(2 * i); - idx[i + LANES / 2] = Which::Second(2 * i); - i += 1; - } - idx - } - const fn odd() -> [Which; LANES] { - let mut idx = [Which::First(0); LANES]; - let mut i = 0; - while i < LANES / 2 { - idx[i] = Which::First(2 * i + 1); - idx[i + LANES / 2] = Which::Second(2 * i + 1); + while i < LANES { + // Treat the source as a concatenated vector + let src_index = i * 2 + if second { 1 } else { 0 }; + idx[i] = if src_index < LANES { + Which::First(src_index) + } else { + Which::Second(src_index % LANES) + }; i += 1; } idx @@ -377,11 +355,11 @@ where struct Odd; impl Swizzle2 for Even { - const INDEX: [Which; LANES] = even::(); + const INDEX: [Which; LANES] = deinterleave::(false); } impl Swizzle2 for Odd { - const INDEX: [Which; LANES] = odd::(); + const INDEX: [Which; LANES] = deinterleave::(true); } if LANES == 1 { From c739af3908613ba3f611dce115525e2f2f91bfca Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 1 Aug 2022 00:38:29 -0400 Subject: [PATCH 092/161] Hide rustc unstable feature from docs --- crates/core_simd/src/swizzle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 0b66b8a0ae0..72cce7aeb04 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -271,7 +271,7 @@ where /// The reverse of this operation is [`Simd::deinterleave`]. /// /// ``` - /// #![feature(portable_simd)] + /// # #![feature(portable_simd)] /// # use core::simd::Simd; /// let a = Simd::from_array([0, 1, 2, 3]); /// let b = Simd::from_array([4, 5, 6, 7]); @@ -324,7 +324,7 @@ where /// The reverse of this operation is [`Simd::interleave`]. /// /// ``` - /// #![feature(portable_simd)] + /// # #![feature(portable_simd)] /// # use core::simd::Simd; /// let a = Simd::from_array([0, 4, 1, 5]); /// let b = Simd::from_array([2, 6, 3, 7]); From d030301161a372b545e5d8c1784cba113e5a8ebd Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 1 Aug 2022 19:52:35 -0400 Subject: [PATCH 093/161] Remove special case for length-1 vectors --- crates/core_simd/src/swizzle.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 72cce7aeb04..61cc604e4cd 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -362,10 +362,6 @@ where const INDEX: [Which; LANES] = deinterleave::(true); } - if LANES == 1 { - (self, other) - } else { - (Even::swizzle2(self, other), Odd::swizzle2(self, other)) - } + (Even::swizzle2(self, other), Odd::swizzle2(self, other)) } } From 5f7066430b9239cfe8243ddba4c29416f002ae6b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 1 Aug 2022 19:57:41 -0400 Subject: [PATCH 094/161] Simplify expression --- crates/core_simd/src/swizzle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index 61cc604e4cd..68f20516cf5 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -340,7 +340,7 @@ where let mut i = 0; while i < LANES { // Treat the source as a concatenated vector - let src_index = i * 2 + if second { 1 } else { 0 }; + let src_index = i * 2 + second as usize; idx[i] = if src_index < LANES { Which::First(src_index) } else { From 2c5ebfb6a26d384bc21db6796095890c1f13f19c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Fri, 30 Sep 2022 20:25:34 -0500 Subject: [PATCH 095/161] add feature flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit couldn't run the `hellosimd` without it 🤷🏾 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index db0af2da606..791051f69ae 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ core_simd = { git = "https://github.com/rust-lang/portable-simd" } and finally write this in `src/main.rs`: ```rust +#![feature(portable_simd)] use core_simd::*; fn main() { let a = f32x4::splat(10.0); From 4491309cb01cc917ef455c41b0dcf9cf5900aa35 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 16 Oct 2022 13:31:42 -0400 Subject: [PATCH 096/161] Mark more mask functions inline --- crates/core_simd/src/masks.rs | 9 +++++++++ crates/core_simd/src/masks/bitmask.rs | 4 ++++ crates/core_simd/src/masks/full_masks.rs | 4 ++++ crates/core_simd/src/masks/to_bitmask.rs | 4 ++++ 4 files changed, 21 insertions(+) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 99535021735..7fd50fed447 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -55,6 +55,7 @@ pub unsafe trait MaskElement: SimdElement + Sealed {} macro_rules! impl_element { { $ty:ty } => { impl Sealed for $ty { + #[inline] fn valid(value: Simd) -> bool where LaneCount: SupportedLaneCount, @@ -62,6 +63,7 @@ macro_rules! impl_element { (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all() } + #[inline] fn eq(self, other: Self) -> bool { self == other } const TRUE: Self = -1; @@ -104,6 +106,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn clone(&self) -> Self { *self } @@ -115,11 +118,13 @@ where LaneCount: SupportedLaneCount, { /// Construct a mask by setting all lanes to the given value. + #[inline] pub fn splat(value: bool) -> Self { Self(mask_impl::Mask::splat(value)) } /// Converts an array of bools to a SIMD mask. + #[inline] pub fn from_array(array: [bool; LANES]) -> Self { // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of // true: 0b_0000_0001 @@ -136,6 +141,7 @@ where } /// Converts a SIMD mask to an array of bools. + #[inline] pub fn to_array(self) -> [bool; LANES] { // This follows mostly the same logic as from_array. // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of @@ -263,6 +269,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn from(array: [bool; LANES]) -> Self { Self::from_array(array) } @@ -273,6 +280,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn from(vector: Mask) -> Self { vector.to_array() } @@ -655,6 +663,7 @@ macro_rules! impl_from { where LaneCount: SupportedLaneCount, { + #[inline] fn from(value: Mask<$from, LANES>) -> Self { value.cast() } diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 365ecc0a325..20465ba9b07 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -26,6 +26,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn clone(&self) -> Self { *self } @@ -36,6 +37,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn eq(&self, other: &Self) -> bool { self.0.as_ref() == other.0.as_ref() } @@ -46,6 +48,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn partial_cmp(&self, other: &Self) -> Option { self.0.as_ref().partial_cmp(other.0.as_ref()) } @@ -63,6 +66,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.0.as_ref().cmp(other.0.as_ref()) } diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index adf0fcbeae2..bcedd2df225 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -37,6 +37,7 @@ where T: MaskElement + PartialEq, LaneCount: SupportedLaneCount, { + #[inline] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } @@ -47,6 +48,7 @@ where T: MaskElement + PartialOrd, LaneCount: SupportedLaneCount, { + #[inline] fn partial_cmp(&self, other: &Self) -> Option { self.0.partial_cmp(&other.0) } @@ -64,6 +66,7 @@ where T: MaskElement + Ord, LaneCount: SupportedLaneCount, { + #[inline] fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.0.cmp(&other.0) } @@ -262,6 +265,7 @@ where T: MaskElement, LaneCount: SupportedLaneCount, { + #[inline] fn from(value: Mask) -> Self { value.0 } diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 65d3ce9be65..46914dfe0d9 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -48,10 +48,12 @@ macro_rules! impl_integer_intrinsic { impl ToBitMask for Mask { type BitMask = $int; + #[inline] fn to_bitmask(self) -> $int { self.0.to_bitmask_integer() } + #[inline] fn from_bitmask(bitmask: $int) -> Self { Self(mask_impl::Mask::from_bitmask_integer(bitmask)) } @@ -83,10 +85,12 @@ where { const BYTES: usize = bitmask_len(LANES); + #[inline] fn to_bitmask_array(self) -> [u8; Self::BYTES] { self.0.to_bitmask_array() } + #[inline] fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self { Mask(mask_impl::Mask::from_bitmask_array(bitmask)) } From ee9a23facb7871218f5f0bf596f77e27586187a9 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 16 Oct 2022 13:52:08 -0400 Subject: [PATCH 097/161] Update readme --- README.md | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 791051f69ae..4c1b4062100 100644 --- a/README.md +++ b/README.md @@ -24,20 +24,10 @@ or by setting up `rustup default nightly` or else with `cargo +nightly {build,te ```bash cargo new hellosimd ``` -to create a new crate. Edit `hellosimd/Cargo.toml` to be -```toml -[package] -name = "hellosimd" -version = "0.1.0" -edition = "2018" -[dependencies] -core_simd = { git = "https://github.com/rust-lang/portable-simd" } -``` - -and finally write this in `src/main.rs`: +to create a new crate. Finally write this in `src/main.rs`: ```rust #![feature(portable_simd)] -use core_simd::*; +use std::simd::f32x4; fn main() { let a = f32x4::splat(10.0); let b = f32x4::from_array([1.0, 2.0, 3.0, 4.0]); @@ -45,24 +35,22 @@ fn main() { } ``` -Explanation: We import all the bindings from the crate with the first line. Then, we construct our SIMD vectors with methods like `splat` or `from_array`. Finally, we can use operators on them like `+` and the appropriate SIMD instructions will be carried out. When we run `cargo run` you should get `[11.0, 12.0, 13.0, 14.0]`. +Explanation: We construct our SIMD vectors with methods like `splat` or `from_array`. Next, we can use operators like `+` on them, and the appropriate SIMD instructions will be carried out. When we run `cargo run` you should get `[11.0, 12.0, 13.0, 14.0]`. -## Code Organization +## Supported vectors -Currently the crate is organized so that each element type is a file, and then the 64-bit, 128-bit, 256-bit, and 512-bit vectors using those types are contained in said file. - -All types are then exported as a single, flat module. +Currently, vectors may have up to 64 elements, but aliases are provided only up to 512-bit vectors. Depending on the size of the primitive type, the number of lanes the vector will have varies. For example, 128-bit vectors have four `f32` lanes and two `f64` lanes. The supported element types are as follows: * **Floating Point:** `f32`, `f64` -* **Signed Integers:** `i8`, `i16`, `i32`, `i64`, `i128`, `isize` -* **Unsigned Integers:** `u8`, `u16`, `u32`, `u64`, `u128`, `usize` -* **Masks:** `mask8`, `mask16`, `mask32`, `mask64`, `mask128`, `masksize` +* **Signed Integers:** `i8`, `i16`, `i32`, `i64`, `isize` (`i128` excluded) +* **Unsigned Integers:** `u8`, `u16`, `u32`, `u64`, `usize` (`u128` excluded) +* **Masks:** 8-bit, 16-bit, 32-bit, 64-bit, and `usize`-sized masks Floating point, signed integers, and unsigned integers are the [primitive types](https://doc.rust-lang.org/core/primitive/index.html) you're already used to. -The `mask` types are "truthy" values, but they use the number of bits in their name instead of just 1 bit like a normal `bool` uses. +The mask types are "truthy" values, like `bool`, but have an unspecified layout in the vector type and cannot be constructed outside of a vector. [simd-guide]: ./beginners-guide.md [zulip-project-portable-simd]: https://rust-lang.zulipchat.com/#narrow/stream/257879-project-portable-simd From f236f5745a0058bd85e044fe3252b87676843018 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 16 Oct 2022 18:08:17 -0400 Subject: [PATCH 098/161] Update README.md Co-authored-by: Jacob Lifshay --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c1b4062100..80313157ea2 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ The supported element types are as follows: * **Masks:** 8-bit, 16-bit, 32-bit, 64-bit, and `usize`-sized masks Floating point, signed integers, and unsigned integers are the [primitive types](https://doc.rust-lang.org/core/primitive/index.html) you're already used to. -The mask types are "truthy" values, like `bool`, but have an unspecified layout in the vector type and cannot be constructed outside of a vector. +The mask types have elements that are "truthy" values, like `bool`, but have an unspecified layout because different architectures prefer different layouts for mask types. [simd-guide]: ./beginners-guide.md [zulip-project-portable-simd]: https://rust-lang.zulipchat.com/#narrow/stream/257879-project-portable-simd From 61a6f1854f453bb1003b08358b9478eba7fd6ad8 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 16 Oct 2022 21:38:13 -0400 Subject: [PATCH 099/161] Specify aliases in one place, and make it more uniform which are defined --- crates/core_simd/src/alias.rs | 227 +++++++++++++++++++++++++++ crates/core_simd/src/masks.rs | 126 --------------- crates/core_simd/src/mod.rs | 2 + crates/core_simd/src/vector.rs | 8 - crates/core_simd/src/vector/float.rs | 24 --- crates/core_simd/src/vector/int.rs | 63 -------- crates/core_simd/src/vector/uint.rs | 63 -------- 7 files changed, 229 insertions(+), 284 deletions(-) create mode 100644 crates/core_simd/src/alias.rs delete mode 100644 crates/core_simd/src/vector/float.rs delete mode 100644 crates/core_simd/src/vector/int.rs delete mode 100644 crates/core_simd/src/vector/uint.rs diff --git a/crates/core_simd/src/alias.rs b/crates/core_simd/src/alias.rs new file mode 100644 index 00000000000..b4d5f45208a --- /dev/null +++ b/crates/core_simd/src/alias.rs @@ -0,0 +1,227 @@ +macro_rules! number { + { 1 } => { "one" }; + { 2 } => { "two" }; + { 4 } => { "four" }; + { 8 } => { "eight" }; + { $x:literal } => { stringify!($x) }; +} + +macro_rules! plural { + { 1 } => { "" }; + { $x:literal } => { "s" }; +} + +macro_rules! alias { + { + $( + $element:ty = { + $($alias:ident $elements:tt)* + } + )* + } => { + $( + $( + #[doc = concat!("A SIMD vector with ", number!($elements), " element", plural!($elements), " of type [`", stringify!($element), "`].")] + #[allow(non_camel_case_types)] + pub type $alias = $crate::simd::Simd<$element, $elements>; + )* + )* + } +} + +macro_rules! mask_alias { + { + $( + $element:ty : $size:literal = { + $($alias:ident $elements:tt)* + } + )* + } => { + $( + $( + #[doc = concat!("A SIMD mask with ", number!($elements), " element", plural!($elements), " for vectors with ", $size, " element types.")] + /// + #[doc = concat!( + "The layout of this type is unspecified, and may change between platforms and/or Rust versions, and code should not assume that it is equivalent to `[", + stringify!($element), "; ", $elements, "]`." + )] + #[allow(non_camel_case_types)] + pub type $alias = $crate::simd::Mask<$element, $elements>; + )* + )* + } +} + +alias! { + i8 = { + i8x1 1 + i8x2 2 + i8x4 4 + i8x8 8 + i8x16 16 + i8x32 32 + i8x64 64 + } + + i16 = { + i16x1 1 + i16x2 2 + i16x4 4 + i16x8 8 + i16x16 16 + i16x32 32 + i16x64 64 + } + + i32 = { + i32x1 1 + i32x2 2 + i32x4 4 + i32x8 8 + i32x16 16 + i32x32 32 + i32x64 64 + } + + i64 = { + i64x1 1 + i64x2 2 + i64x4 4 + i64x8 8 + i64x16 16 + i64x32 32 + i64x64 64 + } + + isize = { + isizex1 1 + isizex2 2 + isizex4 4 + isizex8 8 + isizex16 16 + isizex32 32 + isizex64 64 + } + + u8 = { + u8x1 1 + u8x2 2 + u8x4 4 + u8x8 8 + u8x16 16 + u8x32 32 + u8x64 64 + } + + u16 = { + u16x1 1 + u16x2 2 + u16x4 4 + u16x8 8 + u16x16 16 + u16x32 32 + u16x64 64 + } + + u32 = { + u32x1 1 + u32x2 2 + u32x4 4 + u32x8 8 + u32x16 16 + u32x32 32 + u32x64 64 + } + + u64 = { + u64x1 1 + u64x2 2 + u64x4 4 + u64x8 8 + u64x16 16 + u64x32 32 + u64x64 64 + } + + usize = { + usizex1 1 + usizex2 2 + usizex4 4 + usizex8 8 + usizex16 16 + usizex32 32 + usizex64 64 + } + + f32 = { + f32x1 1 + f32x2 2 + f32x4 4 + f32x8 8 + f32x16 16 + f32x32 32 + f32x64 64 + } + + f64 = { + f64x1 1 + f64x2 2 + f64x4 4 + f64x8 8 + f64x16 16 + f64x32 32 + f64x64 64 + } +} + +mask_alias! { + i8 : "8-bit" = { + mask8x1 1 + mask8x2 2 + mask8x4 4 + mask8x8 8 + mask8x16 16 + mask8x32 32 + mask8x64 64 + } + + i16 : "16-bit" = { + mask16x1 1 + mask16x2 2 + mask16x4 4 + mask16x8 8 + mask16x16 16 + mask16x32 32 + mask16x64 64 + } + + i32 : "32-bit" = { + mask32x1 1 + mask32x2 2 + mask32x4 4 + mask32x8 8 + mask32x16 16 + mask32x32 32 + mask32x64 64 + } + + i64 : "64-bit" = { + mask64x1 1 + mask64x2 2 + mask64x4 4 + mask64x8 8 + mask64x16 16 + mask64x32 32 + mask64x64 64 + } + + isize : "pointer-sized" = { + masksizex1 1 + masksizex2 2 + masksizex4 4 + masksizex8 8 + masksizex16 16 + masksizex32 32 + masksizex64 64 + } +} diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 7fd50fed447..e58df80fca8 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -530,132 +530,6 @@ where } } -/// A mask for SIMD vectors with eight elements of 8 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i8; 8]`. -pub type mask8x8 = Mask; - -/// A mask for SIMD vectors with 16 elements of 8 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i8; 16]`. -pub type mask8x16 = Mask; - -/// A mask for SIMD vectors with 32 elements of 8 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i8; 32]`. -pub type mask8x32 = Mask; - -/// A mask for SIMD vectors with 64 elements of 8 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i8; 64]`. -pub type mask8x64 = Mask; - -/// A mask for SIMD vectors with four elements of 16 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i16; 4]`. -pub type mask16x4 = Mask; - -/// A mask for SIMD vectors with eight elements of 16 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i16; 8]`. -pub type mask16x8 = Mask; - -/// A mask for SIMD vectors with 16 elements of 16 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i16; 16]`. -pub type mask16x16 = Mask; - -/// A mask for SIMD vectors with 32 elements of 16 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i16; 32]`. -pub type mask16x32 = Mask; - -/// A mask for SIMD vectors with two elements of 32 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i32; 2]`. -pub type mask32x2 = Mask; - -/// A mask for SIMD vectors with four elements of 32 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i32; 4]`. -pub type mask32x4 = Mask; - -/// A mask for SIMD vectors with eight elements of 32 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i32; 8]`. -pub type mask32x8 = Mask; - -/// A mask for SIMD vectors with 16 elements of 32 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i32; 16]`. -pub type mask32x16 = Mask; - -/// A mask for SIMD vectors with two elements of 64 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i64; 2]`. -pub type mask64x2 = Mask; - -/// A mask for SIMD vectors with four elements of 64 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i64; 4]`. -pub type mask64x4 = Mask; - -/// A mask for SIMD vectors with eight elements of 64 bits. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[i64; 8]`. -pub type mask64x8 = Mask; - -/// A mask for SIMD vectors with two elements of pointer width. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[isize; 2]`. -pub type masksizex2 = Mask; - -/// A mask for SIMD vectors with four elements of pointer width. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[isize; 4]`. -pub type masksizex4 = Mask; - -/// A mask for SIMD vectors with eight elements of pointer width. -/// -/// The layout of this type is unspecified, and may change between platforms -/// and/or Rust versions, and code should not assume that it is equivalent to -/// `[isize; 8]`. -pub type masksizex8 = Mask; - macro_rules! impl_from { { $from:ty => $($to:ty),* } => { $( diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index b472aa3abe2..9909d639874 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -6,6 +6,7 @@ pub(crate) mod intrinsics; #[cfg(feature = "generic_const_exprs")] mod to_bytes; +mod alias; mod elements; mod eq; mod fmt; @@ -22,6 +23,7 @@ mod vendor; pub mod simd { pub(crate) use crate::core_simd::intrinsics; + pub use crate::core_simd::alias::*; pub use crate::core_simd::elements::*; pub use crate::core_simd::eq::*; pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index e8e8f6899d3..7f0e8350cf8 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,11 +1,3 @@ -mod float; -mod int; -mod uint; - -pub use float::*; -pub use int::*; -pub use uint::*; - // Vectors of pointers are not for public use at the current time. pub(crate) mod ptr; diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs deleted file mode 100644 index f836c99b1e2..00000000000 --- a/crates/core_simd/src/vector/float.rs +++ /dev/null @@ -1,24 +0,0 @@ -#![allow(non_camel_case_types)] - -use crate::simd::Simd; - -/// A 64-bit SIMD vector with two elements of type `f32`. -pub type f32x2 = Simd; - -/// A 128-bit SIMD vector with four elements of type `f32`. -pub type f32x4 = Simd; - -/// A 256-bit SIMD vector with eight elements of type `f32`. -pub type f32x8 = Simd; - -/// A 512-bit SIMD vector with 16 elements of type `f32`. -pub type f32x16 = Simd; - -/// A 128-bit SIMD vector with two elements of type `f64`. -pub type f64x2 = Simd; - -/// A 256-bit SIMD vector with four elements of type `f64`. -pub type f64x4 = Simd; - -/// A 512-bit SIMD vector with eight elements of type `f64`. -pub type f64x8 = Simd; diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs deleted file mode 100644 index 20e56c7dc64..00000000000 --- a/crates/core_simd/src/vector/int.rs +++ /dev/null @@ -1,63 +0,0 @@ -#![allow(non_camel_case_types)] - -use crate::simd::Simd; - -/// A SIMD vector with two elements of type `isize`. -pub type isizex2 = Simd; - -/// A SIMD vector with four elements of type `isize`. -pub type isizex4 = Simd; - -/// A SIMD vector with eight elements of type `isize`. -pub type isizex8 = Simd; - -/// A 32-bit SIMD vector with two elements of type `i16`. -pub type i16x2 = Simd; - -/// A 64-bit SIMD vector with four elements of type `i16`. -pub type i16x4 = Simd; - -/// A 128-bit SIMD vector with eight elements of type `i16`. -pub type i16x8 = Simd; - -/// A 256-bit SIMD vector with 16 elements of type `i16`. -pub type i16x16 = Simd; - -/// A 512-bit SIMD vector with 32 elements of type `i16`. -pub type i16x32 = Simd; - -/// A 64-bit SIMD vector with two elements of type `i32`. -pub type i32x2 = Simd; - -/// A 128-bit SIMD vector with four elements of type `i32`. -pub type i32x4 = Simd; - -/// A 256-bit SIMD vector with eight elements of type `i32`. -pub type i32x8 = Simd; - -/// A 512-bit SIMD vector with 16 elements of type `i32`. -pub type i32x16 = Simd; - -/// A 128-bit SIMD vector with two elements of type `i64`. -pub type i64x2 = Simd; - -/// A 256-bit SIMD vector with four elements of type `i64`. -pub type i64x4 = Simd; - -/// A 512-bit SIMD vector with eight elements of type `i64`. -pub type i64x8 = Simd; - -/// A 32-bit SIMD vector with four elements of type `i8`. -pub type i8x4 = Simd; - -/// A 64-bit SIMD vector with eight elements of type `i8`. -pub type i8x8 = Simd; - -/// A 128-bit SIMD vector with 16 elements of type `i8`. -pub type i8x16 = Simd; - -/// A 256-bit SIMD vector with 32 elements of type `i8`. -pub type i8x32 = Simd; - -/// A 512-bit SIMD vector with 64 elements of type `i8`. -pub type i8x64 = Simd; diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs deleted file mode 100644 index b4a69c44363..00000000000 --- a/crates/core_simd/src/vector/uint.rs +++ /dev/null @@ -1,63 +0,0 @@ -#![allow(non_camel_case_types)] - -use crate::simd::Simd; - -/// A SIMD vector with two elements of type `usize`. -pub type usizex2 = Simd; - -/// A SIMD vector with four elements of type `usize`. -pub type usizex4 = Simd; - -/// A SIMD vector with eight elements of type `usize`. -pub type usizex8 = Simd; - -/// A 32-bit SIMD vector with two elements of type `u16`. -pub type u16x2 = Simd; - -/// A 64-bit SIMD vector with four elements of type `u16`. -pub type u16x4 = Simd; - -/// A 128-bit SIMD vector with eight elements of type `u16`. -pub type u16x8 = Simd; - -/// A 256-bit SIMD vector with 16 elements of type `u16`. -pub type u16x16 = Simd; - -/// A 512-bit SIMD vector with 32 elements of type `u16`. -pub type u16x32 = Simd; - -/// A 64-bit SIMD vector with two elements of type `u32`. -pub type u32x2 = Simd; - -/// A 128-bit SIMD vector with four elements of type `u32`. -pub type u32x4 = Simd; - -/// A 256-bit SIMD vector with eight elements of type `u32`. -pub type u32x8 = Simd; - -/// A 512-bit SIMD vector with 16 elements of type `u32`. -pub type u32x16 = Simd; - -/// A 128-bit SIMD vector with two elements of type `u64`. -pub type u64x2 = Simd; - -/// A 256-bit SIMD vector with four elements of type `u64`. -pub type u64x4 = Simd; - -/// A 512-bit SIMD vector with eight elements of type `u64`. -pub type u64x8 = Simd; - -/// A 32-bit SIMD vector with four elements of type `u8`. -pub type u8x4 = Simd; - -/// A 64-bit SIMD vector with eight elements of type `u8`. -pub type u8x8 = Simd; - -/// A 128-bit SIMD vector with 16 elements of type `u8`. -pub type u8x16 = Simd; - -/// A 256-bit SIMD vector with 32 elements of type `u8`. -pub type u8x32 = Simd; - -/// A 512-bit SIMD vector with 64 elements of type `u8`. -pub type u8x64 = Simd; From 402b50a2728ec4dd9a6da2e57b25cce3ffb48f06 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 16 Oct 2022 23:46:18 -0400 Subject: [PATCH 100/161] Improve variable names --- crates/core_simd/src/alias.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/core_simd/src/alias.rs b/crates/core_simd/src/alias.rs index b4d5f45208a..23f121c4619 100644 --- a/crates/core_simd/src/alias.rs +++ b/crates/core_simd/src/alias.rs @@ -14,16 +14,16 @@ macro_rules! plural { macro_rules! alias { { $( - $element:ty = { - $($alias:ident $elements:tt)* + $element_ty:ty = { + $($alias:ident $num_elements:tt)* } )* } => { $( $( - #[doc = concat!("A SIMD vector with ", number!($elements), " element", plural!($elements), " of type [`", stringify!($element), "`].")] + #[doc = concat!("A SIMD vector with ", number!($num_elements), " element", plural!($num_elements), " of type [`", stringify!($element_ty), "`].")] #[allow(non_camel_case_types)] - pub type $alias = $crate::simd::Simd<$element, $elements>; + pub type $alias = $crate::simd::Simd<$element_ty, $num_elements>; )* )* } @@ -32,21 +32,21 @@ macro_rules! alias { macro_rules! mask_alias { { $( - $element:ty : $size:literal = { - $($alias:ident $elements:tt)* + $element_ty:ty : $size:literal = { + $($alias:ident $num_elements:tt)* } )* } => { $( $( - #[doc = concat!("A SIMD mask with ", number!($elements), " element", plural!($elements), " for vectors with ", $size, " element types.")] + #[doc = concat!("A SIMD mask with ", number!($num_elements), " element", plural!($num_elements), " for vectors with ", $size, " element types.")] /// #[doc = concat!( "The layout of this type is unspecified, and may change between platforms and/or Rust versions, and code should not assume that it is equivalent to `[", - stringify!($element), "; ", $elements, "]`." + stringify!($element_ty), "; ", $num_elements, "]`." )] #[allow(non_camel_case_types)] - pub type $alias = $crate::simd::Mask<$element, $elements>; + pub type $alias = $crate::simd::Mask<$element_ty, $num_elements>; )* )* } From d3cfd7c5c9dba01a8f31b10cef4a1985ae1dc53f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 21 Jun 2022 23:17:13 -0400 Subject: [PATCH 101/161] Add vectors of pointers --- crates/core_simd/src/cast.rs | 45 ++++++++ crates/core_simd/src/elements.rs | 4 + crates/core_simd/src/elements/const_ptr.rs | 59 +++++++++++ crates/core_simd/src/elements/mut_ptr.rs | 57 +++++++++++ crates/core_simd/src/eq.rs | 42 ++++++++ crates/core_simd/src/mod.rs | 2 + crates/core_simd/src/ord.rs | 114 +++++++++++++++++++++ crates/core_simd/src/vector.rs | 19 +++- 8 files changed, 339 insertions(+), 3 deletions(-) create mode 100644 crates/core_simd/src/cast.rs create mode 100644 crates/core_simd/src/elements/const_ptr.rs create mode 100644 crates/core_simd/src/elements/mut_ptr.rs diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs new file mode 100644 index 00000000000..e04a9042b1b --- /dev/null +++ b/crates/core_simd/src/cast.rs @@ -0,0 +1,45 @@ +use crate::simd::SimdElement; + +/// Supporting trait for `Simd::cast`. Typically doesn't need to be used directly. +pub trait SimdCast: SimdElement {} + +macro_rules! into_number { + { $($type:ty),* } => { + $( + impl SimdCast for $type {} + impl SimdCast for $type {} + impl SimdCast for $type {} + impl SimdCast for $type {} + impl SimdCast for $type {} + + impl SimdCast for $type {} + impl SimdCast for $type {} + impl SimdCast for $type {} + impl SimdCast for $type {} + impl SimdCast for $type {} + + impl SimdCast for $type {} + impl SimdCast for $type {} + )* + } +} + +into_number! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, f32, f64 } + +macro_rules! into_pointer { + { $($type:ty),* } => { + $( + impl SimdCast<$type> for *const T {} + impl SimdCast<$type> for *mut T {} + impl SimdCast<*const T> for $type {} + impl SimdCast<*mut T> for $type {} + )* + } +} + +into_pointer! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } + +impl SimdCast<*const T> for *const U {} +impl SimdCast<*const T> for *mut U {} +impl SimdCast<*mut T> for *const U {} +impl SimdCast<*mut T> for *mut U {} diff --git a/crates/core_simd/src/elements.rs b/crates/core_simd/src/elements.rs index 701eb66b248..dc7f52a4d57 100644 --- a/crates/core_simd/src/elements.rs +++ b/crates/core_simd/src/elements.rs @@ -1,11 +1,15 @@ +mod const_ptr; mod float; mod int; +mod mut_ptr; mod uint; mod sealed { pub trait Sealed {} } +pub use const_ptr::*; pub use float::*; pub use int::*; +pub use mut_ptr::*; pub use uint::*; diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs new file mode 100644 index 00000000000..ab6b5b8b5f4 --- /dev/null +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -0,0 +1,59 @@ +use super::sealed::Sealed; +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; + +/// Operations on SIMD vectors of constant pointers. +pub trait SimdConstPtr: Copy + Sealed { + /// Vector type representing the pointers as bits. + type Bits; + + /// Vector of mutable pointers to the same type. + type MutPtr; + + /// Mask type used for manipulating this SIMD vector type. + type Mask; + + /// Returns `true` for each lane that is null. + fn is_null(self) -> Self::Mask; + + /// Changes constness without changing the type. + fn as_mut(self) -> Self::MutPtr; + + /// Cast pointers to raw bits. + fn to_bits(self) -> Self::Bits; + + /// Cast raw bits to pointers. + fn from_bits(bits: Self::Bits) -> Self; +} + +impl Sealed for Simd<*const T, LANES> where + LaneCount: SupportedLaneCount +{ +} + +impl SimdConstPtr for Simd<*const T, LANES> +where + LaneCount: SupportedLaneCount, +{ + type Bits = Simd; + type MutPtr = Simd<*mut T, LANES>; + type Mask = Mask; + + fn is_null(self) -> Self::Mask { + Simd::splat(core::ptr::null()).simd_eq(self) + } + + fn as_mut(self) -> Self::MutPtr { + // Converting between pointers is safe + unsafe { intrinsics::simd_as(self) } + } + + fn to_bits(self) -> Self::Bits { + // Casting pointers to usize is safe + unsafe { intrinsics::simd_as(self) } + } + + fn from_bits(bits: Self::Bits) -> Self { + // Casting usize to pointers is safe + unsafe { intrinsics::simd_as(bits) } + } +} diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs new file mode 100644 index 00000000000..b49f9fda7e4 --- /dev/null +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -0,0 +1,57 @@ +use super::sealed::Sealed; +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; + +/// Operations on SIMD vectors of mutable pointers. +pub trait SimdMutPtr: Copy + Sealed { + /// Vector type representing the pointers as bits. + type Bits; + + /// Vector of constant pointers to the same type. + type ConstPtr; + + /// Mask type used for manipulating this SIMD vector type. + type Mask; + + /// Returns `true` for each lane that is null. + fn is_null(self) -> Self::Mask; + + /// Changes constness without changing the type. + fn as_const(self) -> Self::ConstPtr; + + /// Cast pointers to raw bits. + fn to_bits(self) -> Self::Bits; + + /// Cast raw bits to pointers. + fn from_bits(bits: Self::Bits) -> Self; +} + +impl Sealed for Simd<*mut T, LANES> where LaneCount: SupportedLaneCount +{} + +impl SimdMutPtr for Simd<*mut T, LANES> +where + LaneCount: SupportedLaneCount, +{ + type Bits = Simd; + type ConstPtr = Simd<*const T, LANES>; + type Mask = Mask; + + fn is_null(self) -> Self::Mask { + Simd::splat(core::ptr::null_mut()).simd_eq(self) + } + + fn as_const(self) -> Self::ConstPtr { + // Converting between pointers is safe + unsafe { intrinsics::simd_as(self) } + } + + fn to_bits(self) -> Self::Bits { + // Casting pointers to usize is safe + unsafe { intrinsics::simd_as(self) } + } + + fn from_bits(bits: Self::Bits) -> Self { + // Casting usize to pointers is safe + unsafe { intrinsics::simd_as(bits) } + } +} diff --git a/crates/core_simd/src/eq.rs b/crates/core_simd/src/eq.rs index c7111f720a8..149380746e7 100644 --- a/crates/core_simd/src/eq.rs +++ b/crates/core_simd/src/eq.rs @@ -71,3 +71,45 @@ macro_rules! impl_mask { } impl_mask! { i8, i16, i32, i64, isize } + +impl SimdPartialEq for Simd<*const T, LANES> +where + LaneCount: SupportedLaneCount, +{ + type Mask = Mask; + + #[inline] + fn simd_eq(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } + } + + #[inline] + fn simd_ne(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } + } +} + +impl SimdPartialEq for Simd<*mut T, LANES> +where + LaneCount: SupportedLaneCount, +{ + type Mask = Mask; + + #[inline] + fn simd_eq(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } + } + + #[inline] + fn simd_ne(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } + } +} diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 9909d639874..ece026a448b 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod intrinsics; mod to_bytes; mod alias; +mod cast; mod elements; mod eq; mod fmt; @@ -24,6 +25,7 @@ pub mod simd { pub(crate) use crate::core_simd::intrinsics; pub use crate::core_simd::alias::*; + pub use crate::core_simd::cast::*; pub use crate::core_simd::elements::*; pub use crate::core_simd::eq::*; pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/ord.rs index 9a87bc2e344..95a1ecaeeda 100644 --- a/crates/core_simd/src/ord.rs +++ b/crates/core_simd/src/ord.rs @@ -211,3 +211,117 @@ macro_rules! impl_mask { } impl_mask! { i8, i16, i32, i64, isize } + +impl SimdPartialOrd for Simd<*const T, LANES> +where + LaneCount: SupportedLaneCount, +{ + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + } +} + +impl SimdOrd for Simd<*const T, LANES> +where + LaneCount: SupportedLaneCount, +{ + #[inline] + fn simd_max(self, other: Self) -> Self { + self.simd_lt(other).select(other, self) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + self.simd_gt(other).select(other, self) + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + self.simd_max(min).simd_min(max) + } +} + +impl SimdPartialOrd for Simd<*mut T, LANES> +where + LaneCount: SupportedLaneCount, +{ + #[inline] + fn simd_lt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + } + + #[inline] + fn simd_le(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + } + + #[inline] + fn simd_gt(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + } + + #[inline] + fn simd_ge(self, other: Self) -> Self::Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. + unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + } +} + +impl SimdOrd for Simd<*mut T, LANES> +where + LaneCount: SupportedLaneCount, +{ + #[inline] + fn simd_max(self, other: Self) -> Self { + self.simd_lt(other).select(other, self) + } + + #[inline] + fn simd_min(self, other: Self) -> Self { + self.simd_gt(other).select(other, self) + } + + #[inline] + fn simd_clamp(self, min: Self, max: Self) -> Self { + assert!( + min.simd_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + self.simd_max(min).simd_min(max) + } +} diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 7f0e8350cf8..cbc8ced5a84 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -2,7 +2,7 @@ pub(crate) mod ptr; use crate::simd::{ - intrinsics, LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount, Swizzle, + intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdPartialOrd, SupportedLaneCount, Swizzle, }; /// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. @@ -211,7 +211,10 @@ where #[must_use] #[inline] #[cfg(not(bootstrap))] - pub fn cast(self) -> Simd { + pub fn cast(self) -> Simd + where + T: SimdCast, + { // Safety: The input argument is a vector of a valid SIMD element type. unsafe { intrinsics::simd_as(self) } } @@ -234,7 +237,7 @@ where #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn to_int_unchecked(self) -> Simd where - T: core::convert::FloatToInt, + T: core::convert::FloatToInt + SimdCast, I: SimdElement, { // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to @@ -739,3 +742,13 @@ impl Sealed for f64 {} unsafe impl SimdElement for f64 { type Mask = i64; } + +impl Sealed for *const T {} +unsafe impl SimdElement for *const T { + type Mask = isize; +} + +impl Sealed for *mut T {} +unsafe impl SimdElement for *mut T { + type Mask = isize; +} From 7e96f5dbea3fd2291f0e835a21ed0c41f6ef086e Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 21 Jun 2022 23:20:06 -0400 Subject: [PATCH 102/161] Use safe casts --- crates/core_simd/src/elements/const_ptr.rs | 11 ++++------- crates/core_simd/src/elements/mut_ptr.rs | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index ab6b5b8b5f4..62365eace89 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { @@ -43,17 +43,14 @@ where } fn as_mut(self) -> Self::MutPtr { - // Converting between pointers is safe - unsafe { intrinsics::simd_as(self) } + self.cast() } fn to_bits(self) -> Self::Bits { - // Casting pointers to usize is safe - unsafe { intrinsics::simd_as(self) } + self.cast() } fn from_bits(bits: Self::Bits) -> Self { - // Casting usize to pointers is safe - unsafe { intrinsics::simd_as(bits) } + bits.cast() } } diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index b49f9fda7e4..8c68d42628f 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { @@ -41,17 +41,14 @@ where } fn as_const(self) -> Self::ConstPtr { - // Converting between pointers is safe - unsafe { intrinsics::simd_as(self) } + self.cast() } fn to_bits(self) -> Self::Bits { - // Casting pointers to usize is safe - unsafe { intrinsics::simd_as(self) } + self.cast() } fn from_bits(bits: Self::Bits) -> Self { - // Casting usize to pointers is safe - unsafe { intrinsics::simd_as(bits) } + bits.cast() } } From 4076ba8a77326c70645f6c4a4351b0d84c5c898f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 23 Jun 2022 01:21:58 -0400 Subject: [PATCH 103/161] Implement scatter/gather with new pointer vector and add tests --- crates/core_simd/src/cast.rs | 128 +++++++++++++++++---- crates/core_simd/src/elements/const_ptr.rs | 30 +++-- crates/core_simd/src/elements/mut_ptr.rs | 30 +++-- crates/core_simd/src/eq.rs | 20 ++-- crates/core_simd/src/ord.rs | 36 ++---- crates/core_simd/src/vector.rs | 13 +-- crates/core_simd/src/vector/ptr.rs | 51 -------- crates/core_simd/tests/pointers.rs | 43 +++++++ crates/test_helpers/src/biteq.rs | 20 ++++ crates/test_helpers/src/lib.rs | 63 ++++++---- 10 files changed, 275 insertions(+), 159 deletions(-) delete mode 100644 crates/core_simd/src/vector/ptr.rs create mode 100644 crates/core_simd/tests/pointers.rs diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index e04a9042b1b..d62d3f6635d 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -1,25 +1,41 @@ -use crate::simd::SimdElement; +use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount}; /// Supporting trait for `Simd::cast`. Typically doesn't need to be used directly. -pub trait SimdCast: SimdElement {} +pub trait SimdCast: SimdElement { + #[doc(hidden)] + fn cast(x: Simd) -> Simd + where + LaneCount: SupportedLaneCount; +} macro_rules! into_number { + { $from:ty, $to:ty } => { + impl SimdCast<$to> for $from { + fn cast(x: Simd) -> Simd<$to, LANES> + where + LaneCount: SupportedLaneCount, + { + // Safety: simd_as can handle numeric conversions + unsafe { intrinsics::simd_as(x) } + } + } + }; { $($type:ty),* } => { $( - impl SimdCast for $type {} - impl SimdCast for $type {} - impl SimdCast for $type {} - impl SimdCast for $type {} - impl SimdCast for $type {} + into_number! { $type, i8 } + into_number! { $type, i16 } + into_number! { $type, i32 } + into_number! { $type, i64 } + into_number! { $type, isize } - impl SimdCast for $type {} - impl SimdCast for $type {} - impl SimdCast for $type {} - impl SimdCast for $type {} - impl SimdCast for $type {} + into_number! { $type, u8 } + into_number! { $type, u16 } + into_number! { $type, u32 } + into_number! { $type, u64 } + into_number! { $type, usize } - impl SimdCast for $type {} - impl SimdCast for $type {} + into_number! { $type, f32 } + into_number! { $type, f64 } )* } } @@ -29,17 +45,85 @@ into_number! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, f32, f64 } macro_rules! into_pointer { { $($type:ty),* } => { $( - impl SimdCast<$type> for *const T {} - impl SimdCast<$type> for *mut T {} - impl SimdCast<*const T> for $type {} - impl SimdCast<*mut T> for $type {} + impl SimdCast<$type> for *const T { + fn cast(x: Simd) -> Simd<$type, LANES> + where + LaneCount: SupportedLaneCount, + { + // Safety: transmuting isize to pointers is safe + let x: Simd = unsafe { core::mem::transmute_copy(&x) }; + x.cast() + } + } + impl SimdCast<$type> for *mut T { + fn cast(x: Simd) -> Simd<$type, LANES> + where + LaneCount: SupportedLaneCount, + { + // Safety: transmuting isize to pointers is safe + let x: Simd = unsafe { core::mem::transmute_copy(&x) }; + x.cast() + } + } + impl SimdCast<*const T> for $type { + fn cast(x: Simd<$type, LANES>) -> Simd<*const T, LANES> + where + LaneCount: SupportedLaneCount, + { + let x: Simd = x.cast(); + // Safety: transmuting isize to pointers is safe + unsafe { core::mem::transmute_copy(&x) } + } + } + impl SimdCast<*mut T> for $type { + fn cast(x: Simd<$type, LANES>) -> Simd<*mut T, LANES> + where + LaneCount: SupportedLaneCount, + { + let x: Simd = x.cast(); + // Safety: transmuting isize to pointers is safe + unsafe { core::mem::transmute_copy(&x) } + } + } )* } } into_pointer! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } -impl SimdCast<*const T> for *const U {} -impl SimdCast<*const T> for *mut U {} -impl SimdCast<*mut T> for *const U {} -impl SimdCast<*mut T> for *mut U {} +impl SimdCast<*const T> for *const U { + fn cast(x: Simd<*const U, LANES>) -> Simd<*const T, LANES> + where + LaneCount: SupportedLaneCount, + { + // Safety: transmuting pointers is safe + unsafe { core::mem::transmute_copy(&x) } + } +} +impl SimdCast<*const T> for *mut U { + fn cast(x: Simd<*mut U, LANES>) -> Simd<*const T, LANES> + where + LaneCount: SupportedLaneCount, + { + // Safety: transmuting pointers is safe + unsafe { core::mem::transmute_copy(&x) } + } +} +impl SimdCast<*mut T> for *const U { + fn cast(x: Simd<*const U, LANES>) -> Simd<*mut T, LANES> + where + LaneCount: SupportedLaneCount, + { + // Safety: transmuting pointers is safe + unsafe { core::mem::transmute_copy(&x) } + } +} +impl SimdCast<*mut T> for *mut U { + fn cast(x: Simd<*mut U, LANES>) -> Simd<*mut T, LANES> + where + LaneCount: SupportedLaneCount, + { + // Safety: transmuting pointers is safe + unsafe { core::mem::transmute_copy(&x) } + } +} diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index 62365eace89..c4a254f5ab1 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -3,8 +3,8 @@ use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { - /// Vector type representing the pointers as bits. - type Bits; + /// Vector of usize with the same number of lanes. + type Usize; /// Vector of mutable pointers to the same type. type MutPtr; @@ -18,11 +18,15 @@ pub trait SimdConstPtr: Copy + Sealed { /// Changes constness without changing the type. fn as_mut(self) -> Self::MutPtr; - /// Cast pointers to raw bits. - fn to_bits(self) -> Self::Bits; + /// Gets the "address" portion of the pointer. + /// + /// Equivalent to calling [`pointer::addr`] on each lane. + fn addr(self) -> Self::Usize; - /// Cast raw bits to pointers. - fn from_bits(bits: Self::Bits) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + fn wrapping_add(self, count: Self::Usize) -> Self; } impl Sealed for Simd<*const T, LANES> where @@ -34,23 +38,29 @@ impl SimdConstPtr for Simd<*const T, LANES> where LaneCount: SupportedLaneCount, { - type Bits = Simd; + type Usize = Simd; type MutPtr = Simd<*mut T, LANES>; type Mask = Mask; + #[inline] fn is_null(self) -> Self::Mask { Simd::splat(core::ptr::null()).simd_eq(self) } + #[inline] fn as_mut(self) -> Self::MutPtr { self.cast() } - fn to_bits(self) -> Self::Bits { + #[inline] + fn addr(self) -> Self::Usize { self.cast() } - fn from_bits(bits: Self::Bits) -> Self { - bits.cast() + #[inline] + fn wrapping_add(self, count: Self::Usize) -> Self { + let addr = self.addr() + (count * Simd::splat(core::mem::size_of::())); + // Safety: transmuting usize to pointers is safe, even if accessing those pointers isn't. + unsafe { core::mem::transmute_copy(&addr) } } } diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index 8c68d42628f..5920960c49c 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -3,8 +3,8 @@ use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { - /// Vector type representing the pointers as bits. - type Bits; + /// Vector of usize with the same number of lanes. + type Usize; /// Vector of constant pointers to the same type. type ConstPtr; @@ -18,11 +18,15 @@ pub trait SimdMutPtr: Copy + Sealed { /// Changes constness without changing the type. fn as_const(self) -> Self::ConstPtr; - /// Cast pointers to raw bits. - fn to_bits(self) -> Self::Bits; + /// Gets the "address" portion of the pointer. + /// + /// Equivalent to calling [`pointer::addr`] on each lane. + fn addr(self) -> Self::Usize; - /// Cast raw bits to pointers. - fn from_bits(bits: Self::Bits) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + fn wrapping_add(self, count: Self::Usize) -> Self; } impl Sealed for Simd<*mut T, LANES> where LaneCount: SupportedLaneCount @@ -32,23 +36,29 @@ impl SimdMutPtr for Simd<*mut T, LANES> where LaneCount: SupportedLaneCount, { - type Bits = Simd; + type Usize = Simd; type ConstPtr = Simd<*const T, LANES>; type Mask = Mask; + #[inline] fn is_null(self) -> Self::Mask { Simd::splat(core::ptr::null_mut()).simd_eq(self) } + #[inline] fn as_const(self) -> Self::ConstPtr { self.cast() } - fn to_bits(self) -> Self::Bits { + #[inline] + fn addr(self) -> Self::Usize { self.cast() } - fn from_bits(bits: Self::Bits) -> Self { - bits.cast() + #[inline] + fn wrapping_add(self, count: Self::Usize) -> Self { + let addr = self.addr() + (count * Simd::splat(core::mem::size_of::())); + // Safety: transmuting usize to pointers is safe, even if accessing those pointers isn't. + unsafe { core::mem::transmute_copy(&addr) } } } diff --git a/crates/core_simd/src/eq.rs b/crates/core_simd/src/eq.rs index 149380746e7..80763c07272 100644 --- a/crates/core_simd/src/eq.rs +++ b/crates/core_simd/src/eq.rs @@ -1,4 +1,6 @@ -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{ + intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdElement, SimdMutPtr, SupportedLaneCount, +}; /// Parallel `PartialEq`. pub trait SimdPartialEq { @@ -80,16 +82,12 @@ where #[inline] fn simd_eq(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } + self.addr().simd_eq(other.addr()) } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } + self.addr().simd_ne(other.addr()) } } @@ -101,15 +99,11 @@ where #[inline] fn simd_eq(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } + self.addr().simd_eq(other.addr()) } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } + self.addr().simd_ne(other.addr()) } } diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/ord.rs index 95a1ecaeeda..1ae9cd061fb 100644 --- a/crates/core_simd/src/ord.rs +++ b/crates/core_simd/src/ord.rs @@ -1,4 +1,6 @@ -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{ + intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdMutPtr, SimdPartialEq, SupportedLaneCount, +}; /// Parallel `PartialOrd`. pub trait SimdPartialOrd: SimdPartialEq { @@ -218,30 +220,22 @@ where { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + self.addr().simd_lt(other.addr()) } #[inline] fn simd_le(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + self.addr().simd_le(other.addr()) } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + self.addr().simd_gt(other.addr()) } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + self.addr().simd_ge(other.addr()) } } @@ -275,30 +269,22 @@ where { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + self.addr().simd_lt(other.addr()) } #[inline] fn simd_le(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + self.addr().simd_le(other.addr()) } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + self.addr().simd_gt(other.addr()) } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { - // Safety: `self` is a vector, and the result of the comparison - // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + self.addr().simd_ge(other.addr()) } } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index cbc8ced5a84..145394a519d 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,8 +1,6 @@ -// Vectors of pointers are not for public use at the current time. -pub(crate) mod ptr; - use crate::simd::{ - intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdPartialOrd, SupportedLaneCount, Swizzle, + intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdConstPtr, SimdMutPtr, SimdPartialOrd, + SupportedLaneCount, Swizzle, }; /// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. @@ -215,8 +213,7 @@ where where T: SimdCast, { - // Safety: The input argument is a vector of a valid SIMD element type. - unsafe { intrinsics::simd_as(self) } + SimdCast::cast(self) } /// Rounds toward zero and converts to the same-width integer type, assuming that @@ -352,7 +349,7 @@ where idxs: Simd, or: Self, ) -> Self { - let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr()); + let base_ptr = Simd::<*const T, LANES>::splat(slice.as_ptr()); // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah @@ -460,7 +457,7 @@ where // 3. &mut [T] which will become our base ptr. unsafe { // Now Entering ☢️ *mut T Zone - let base_ptr = crate::simd::ptr::SimdMutPtr::splat(slice.as_mut_ptr()); + let base_ptr = Simd::<*mut T, LANES>::splat(slice.as_mut_ptr()); // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs deleted file mode 100644 index fa756344db9..00000000000 --- a/crates/core_simd/src/vector/ptr.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Private implementation details of public gather/scatter APIs. -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; - -/// A vector of *const T. -#[derive(Debug, Copy, Clone)] -#[repr(simd)] -pub(crate) struct SimdConstPtr([*const T; LANES]); - -impl SimdConstPtr -where - LaneCount: SupportedLaneCount, - T: Sized, -{ - #[inline] - #[must_use] - pub fn splat(ptr: *const T) -> Self { - Self([ptr; LANES]) - } - - #[inline] - #[must_use] - pub fn wrapping_add(self, addend: Simd) -> Self { - // Safety: this intrinsic doesn't have a precondition - unsafe { intrinsics::simd_arith_offset(self, addend) } - } -} - -/// A vector of *mut T. Be very careful around potential aliasing. -#[derive(Debug, Copy, Clone)] -#[repr(simd)] -pub(crate) struct SimdMutPtr([*mut T; LANES]); - -impl SimdMutPtr -where - LaneCount: SupportedLaneCount, - T: Sized, -{ - #[inline] - #[must_use] - pub fn splat(ptr: *mut T) -> Self { - Self([ptr; LANES]) - } - - #[inline] - #[must_use] - pub fn wrapping_add(self, addend: Simd) -> Self { - // Safety: this intrinsic doesn't have a precondition - unsafe { intrinsics::simd_arith_offset(self, addend) } - } -} diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs new file mode 100644 index 00000000000..df26c462f93 --- /dev/null +++ b/crates/core_simd/tests/pointers.rs @@ -0,0 +1,43 @@ +#![feature(portable_simd, strict_provenance)] + +use core_simd::{Simd, SimdConstPtr, SimdMutPtr}; + +macro_rules! common_tests { + { $constness:ident } => { + test_helpers::test_lanes! { + fn is_null() { + test_helpers::test_unary_mask_elementwise( + &Simd::<*$constness (), LANES>::is_null, + &<*$constness ()>::is_null, + &|_| true, + ); + } + + fn addr() { + test_helpers::test_unary_elementwise( + &Simd::<*$constness (), LANES>::addr, + &<*$constness ()>::addr, + &|_| true, + ); + } + + fn wrapping_add() { + test_helpers::test_binary_elementwise( + &Simd::<*$constness (), LANES>::wrapping_add, + &<*$constness ()>::wrapping_add, + &|_, _| true, + ); + } + } + } +} + +mod const_ptr { + use super::*; + common_tests! { const } +} + +mod mut_ptr { + use super::*; + common_tests! { mut } +} diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs index 00350e22418..7d91260d838 100644 --- a/crates/test_helpers/src/biteq.rs +++ b/crates/test_helpers/src/biteq.rs @@ -55,6 +55,26 @@ macro_rules! impl_float_biteq { impl_float_biteq! { f32, f64 } +impl BitEq for *const T { + fn biteq(&self, other: &Self) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl BitEq for *mut T { + fn biteq(&self, other: &Self) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} + impl BitEq for [T; N] { fn biteq(&self, other: &Self) -> bool { self.iter() diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 650eadd12bf..5f2a928b5e4 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -38,6 +38,28 @@ impl_num! { usize } impl_num! { f32 } impl_num! { f64 } +impl DefaultStrategy for *const T { + type Strategy = proptest::strategy::Map *const T>; + fn default_strategy() -> Self::Strategy { + fn map(x: isize) -> *const T { + x as _ + } + use proptest::strategy::Strategy; + proptest::num::isize::ANY.prop_map(map) + } +} + +impl DefaultStrategy for *mut T { + type Strategy = proptest::strategy::Map *mut T>; + fn default_strategy() -> Self::Strategy { + fn map(x: isize) -> *mut T { + x as _ + } + use proptest::strategy::Strategy; + proptest::num::isize::ANY.prop_map(map) + } +} + #[cfg(not(target_arch = "wasm32"))] impl DefaultStrategy for u128 { type Strategy = proptest::num::u128::Any; @@ -135,21 +157,21 @@ pub fn test_unary_elementwise ScalarResult, check: &dyn Fn([Scalar; LANES]) -> bool, ) where - Scalar: Copy + Default + core::fmt::Debug + DefaultStrategy, - ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Scalar: Copy + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy, Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, { test_1(&|x: [Scalar; LANES]| { proptest::prop_assume!(check(x)); let result_1: [ScalarResult; LANES] = fv(x.into()).into(); - let result_2: [ScalarResult; LANES] = { - let mut result = [ScalarResult::default(); LANES]; - for (i, o) in x.iter().zip(result.iter_mut()) { - *o = fs(*i); - } - result - }; + let result_2: [ScalarResult; LANES] = x + .iter() + .copied() + .map(fs) + .collect::>() + .try_into() + .unwrap(); crate::prop_assert_biteq!(result_1, result_2); Ok(()) }); @@ -162,7 +184,7 @@ pub fn test_unary_mask_elementwise( fs: &dyn Fn(Scalar) -> bool, check: &dyn Fn([Scalar; LANES]) -> bool, ) where - Scalar: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar: Copy + core::fmt::Debug + DefaultStrategy, Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, Mask: Into<[bool; LANES]> + From<[bool; LANES]> + Copy, { @@ -196,9 +218,9 @@ pub fn test_binary_elementwise< fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, ) where - Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy, - Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy, - ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Scalar1: Copy + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy, Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, @@ -206,13 +228,14 @@ pub fn test_binary_elementwise< test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { proptest::prop_assume!(check(x, y)); let result_1: [ScalarResult; LANES] = fv(x.into(), y.into()).into(); - let result_2: [ScalarResult; LANES] = { - let mut result = [ScalarResult::default(); LANES]; - for ((i1, i2), o) in x.iter().zip(y.iter()).zip(result.iter_mut()) { - *o = fs(*i1, *i2); - } - result - }; + let result_2: [ScalarResult; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| fs(x, y)) + .collect::>() + .try_into() + .unwrap(); crate::prop_assert_biteq!(result_1, result_2); Ok(()) }); From 6b3c599ba29e46fd7011cf1f01ec6c4cfda395cf Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 23 Jun 2022 01:40:51 -0400 Subject: [PATCH 104/161] Add missing safety comment --- crates/core_simd/src/vector.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 145394a519d..2fc090254d7 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -741,11 +741,15 @@ unsafe impl SimdElement for f64 { } impl Sealed for *const T {} + +// Safety: const pointers are valid SIMD element types, and are supported by this API unsafe impl SimdElement for *const T { type Mask = isize; } impl Sealed for *mut T {} + +// Safety: mut pointers are valid SIMD element types, and are supported by this API unsafe impl SimdElement for *mut T { type Mask = isize; } From f10e591de1d321b57af68502a78eef6f8f80c05c Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 24 Jun 2022 00:13:36 -0400 Subject: [PATCH 105/161] Fix wrapping pointer arithmetic --- crates/core_simd/src/elements/const_ptr.rs | 33 ++++++++++++++++++---- crates/core_simd/src/elements/mut_ptr.rs | 33 ++++++++++++++++++---- crates/core_simd/src/intrinsics.rs | 3 ++ crates/core_simd/tests/pointers.rs | 16 +++++++++++ 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index c4a254f5ab1..d10bd1481d0 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -1,11 +1,14 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { - /// Vector of usize with the same number of lanes. + /// Vector of `usize` with the same number of lanes. type Usize; + /// Vector of `isize` with the same number of lanes. + type Isize; + /// Vector of mutable pointers to the same type. type MutPtr; @@ -23,10 +26,20 @@ pub trait SimdConstPtr: Copy + Sealed { /// Equivalent to calling [`pointer::addr`] on each lane. fn addr(self) -> Self::Usize; + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. + fn wrapping_offset(self, offset: Self::Isize) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. /// /// Equivalent to calling [`pointer::wrapping_add`] on each lane. fn wrapping_add(self, count: Self::Usize) -> Self; + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + fn wrapping_sub(self, count: Self::Usize) -> Self; } impl Sealed for Simd<*const T, LANES> where @@ -39,6 +52,7 @@ where LaneCount: SupportedLaneCount, { type Usize = Simd; + type Isize = Simd; type MutPtr = Simd<*mut T, LANES>; type Mask = Mask; @@ -57,10 +71,19 @@ where self.cast() } + #[inline] + fn wrapping_offset(self, count: Self::Isize) -> Self { + // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets + unsafe { intrinsics::simd_arith_offset(self, count) } + } + #[inline] fn wrapping_add(self, count: Self::Usize) -> Self { - let addr = self.addr() + (count * Simd::splat(core::mem::size_of::())); - // Safety: transmuting usize to pointers is safe, even if accessing those pointers isn't. - unsafe { core::mem::transmute_copy(&addr) } + self.wrapping_offset(count.cast()) + } + + #[inline] + fn wrapping_sub(self, count: Self::Usize) -> Self { + self.wrapping_offset(-count.cast::()) } } diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index 5920960c49c..4fc6202e14e 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -1,11 +1,14 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { - /// Vector of usize with the same number of lanes. + /// Vector of `usize` with the same number of lanes. type Usize; + /// Vector of `isize` with the same number of lanes. + type Isize; + /// Vector of constant pointers to the same type. type ConstPtr; @@ -23,10 +26,20 @@ pub trait SimdMutPtr: Copy + Sealed { /// Equivalent to calling [`pointer::addr`] on each lane. fn addr(self) -> Self::Usize; + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. + fn wrapping_offset(self, offset: Self::Isize) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. /// /// Equivalent to calling [`pointer::wrapping_add`] on each lane. fn wrapping_add(self, count: Self::Usize) -> Self; + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + fn wrapping_sub(self, count: Self::Usize) -> Self; } impl Sealed for Simd<*mut T, LANES> where LaneCount: SupportedLaneCount @@ -37,6 +50,7 @@ where LaneCount: SupportedLaneCount, { type Usize = Simd; + type Isize = Simd; type ConstPtr = Simd<*const T, LANES>; type Mask = Mask; @@ -55,10 +69,19 @@ where self.cast() } + #[inline] + fn wrapping_offset(self, count: Self::Isize) -> Self { + // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets + unsafe { intrinsics::simd_arith_offset(self, count) } + } + #[inline] fn wrapping_add(self, count: Self::Usize) -> Self { - let addr = self.addr() + (count * Simd::splat(core::mem::size_of::())); - // Safety: transmuting usize to pointers is safe, even if accessing those pointers isn't. - unsafe { core::mem::transmute_copy(&addr) } + self.wrapping_offset(count.cast()) + } + + #[inline] + fn wrapping_sub(self, count: Self::Usize) -> Self { + self.wrapping_offset(-count.cast::()) } } diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 6047890a093..41128cd1481 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -151,4 +151,7 @@ extern "platform-intrinsic" { pub(crate) fn simd_select(m: M, yes: T, no: T) -> T; #[allow(unused)] pub(crate) fn simd_select_bitmask(m: M, yes: T, no: T) -> T; + + // equivalent to wrapping_offset + pub(crate) fn simd_arith_offset(ptr: T, offset: U) -> T; } diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs index df26c462f93..2c20362119e 100644 --- a/crates/core_simd/tests/pointers.rs +++ b/crates/core_simd/tests/pointers.rs @@ -21,6 +21,14 @@ macro_rules! common_tests { ); } + fn wrapping_offset() { + test_helpers::test_binary_elementwise( + &Simd::<*$constness (), LANES>::wrapping_offset, + &<*$constness ()>::wrapping_offset, + &|_, _| true, + ); + } + fn wrapping_add() { test_helpers::test_binary_elementwise( &Simd::<*$constness (), LANES>::wrapping_add, @@ -28,6 +36,14 @@ macro_rules! common_tests { &|_, _| true, ); } + + fn wrapping_sub() { + test_helpers::test_binary_elementwise( + &Simd::<*$constness (), LANES>::wrapping_sub, + &<*$constness ()>::wrapping_sub, + &|_, _| true, + ); + } } } } From da25087f790e9c15fcf633a39e5de307608c9251 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 24 Jun 2022 01:26:24 -0400 Subject: [PATCH 106/161] Test a more useful pointer --- crates/core_simd/tests/pointers.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs index 2c20362119e..8eb0bd84042 100644 --- a/crates/core_simd/tests/pointers.rs +++ b/crates/core_simd/tests/pointers.rs @@ -7,40 +7,40 @@ macro_rules! common_tests { test_helpers::test_lanes! { fn is_null() { test_helpers::test_unary_mask_elementwise( - &Simd::<*$constness (), LANES>::is_null, - &<*$constness ()>::is_null, + &Simd::<*$constness u32, LANES>::is_null, + &<*$constness u32>::is_null, &|_| true, ); } fn addr() { test_helpers::test_unary_elementwise( - &Simd::<*$constness (), LANES>::addr, - &<*$constness ()>::addr, + &Simd::<*$constness u32, LANES>::addr, + &<*$constness u32>::addr, &|_| true, ); } fn wrapping_offset() { test_helpers::test_binary_elementwise( - &Simd::<*$constness (), LANES>::wrapping_offset, - &<*$constness ()>::wrapping_offset, + &Simd::<*$constness u32, LANES>::wrapping_offset, + &<*$constness u32>::wrapping_offset, &|_, _| true, ); } fn wrapping_add() { test_helpers::test_binary_elementwise( - &Simd::<*$constness (), LANES>::wrapping_add, - &<*$constness ()>::wrapping_add, + &Simd::<*$constness u32, LANES>::wrapping_add, + &<*$constness u32>::wrapping_add, &|_, _| true, ); } fn wrapping_sub() { test_helpers::test_binary_elementwise( - &Simd::<*$constness (), LANES>::wrapping_sub, - &<*$constness ()>::wrapping_sub, + &Simd::<*$constness u32, LANES>::wrapping_sub, + &<*$constness u32>::wrapping_sub, &|_, _| true, ); } From e7cc021189f1d18974057d60223bdbb5abd4dc15 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 25 Jun 2022 00:00:20 -0400 Subject: [PATCH 107/161] Fix casts --- crates/core_simd/src/cast.rs | 158 +++++++-------------- crates/core_simd/src/elements/const_ptr.rs | 33 ++++- crates/core_simd/src/elements/mut_ptr.rs | 30 +++- crates/core_simd/src/vector.rs | 5 +- 4 files changed, 115 insertions(+), 111 deletions(-) diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index d62d3f6635d..ddcc786afa4 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -1,129 +1,79 @@ use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount}; /// Supporting trait for `Simd::cast`. Typically doesn't need to be used directly. -pub trait SimdCast: SimdElement { +pub unsafe trait SimdCast: SimdElement { #[doc(hidden)] fn cast(x: Simd) -> Simd where - LaneCount: SupportedLaneCount; + LaneCount: SupportedLaneCount, + { + // Safety: implementing this trait indicates that the types are supported by `simd_as` + unsafe { intrinsics::simd_as(x) } + } + + #[doc(hidden)] + unsafe fn cast_unchecked(x: Simd) -> Simd + where + LaneCount: SupportedLaneCount, + { + // Safety: implementing this trait indicates that the types are supported by `simd_cast` + // The caller is responsible for the conversion invariants. + unsafe { intrinsics::simd_cast(x) } + } } macro_rules! into_number { - { $from:ty, $to:ty } => { - impl SimdCast<$to> for $from { - fn cast(x: Simd) -> Simd<$to, LANES> - where - LaneCount: SupportedLaneCount, - { - // Safety: simd_as can handle numeric conversions - unsafe { intrinsics::simd_as(x) } - } - } + { unsafe $from:ty as $to:ty } => { + // Safety: casting between numbers is supported by `simd_cast` and `simd_as` + unsafe impl SimdCast<$to> for $from {} }; - { $($type:ty),* } => { + { unsafe $($type:ty),* } => { $( - into_number! { $type, i8 } - into_number! { $type, i16 } - into_number! { $type, i32 } - into_number! { $type, i64 } - into_number! { $type, isize } + into_number! { unsafe $type as i8 } + into_number! { unsafe $type as i16 } + into_number! { unsafe $type as i32 } + into_number! { unsafe $type as i64 } + into_number! { unsafe $type as isize } - into_number! { $type, u8 } - into_number! { $type, u16 } - into_number! { $type, u32 } - into_number! { $type, u64 } - into_number! { $type, usize } + into_number! { unsafe $type as u8 } + into_number! { unsafe $type as u16 } + into_number! { unsafe $type as u32 } + into_number! { unsafe $type as u64 } + into_number! { unsafe $type as usize } - into_number! { $type, f32 } - into_number! { $type, f64 } + into_number! { unsafe $type as f32 } + into_number! { unsafe $type as f64 } )* } } -into_number! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, f32, f64 } +into_number! { unsafe i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, f32, f64 } +// TODO uncomment pending PR to rustc +/* macro_rules! into_pointer { - { $($type:ty),* } => { + { unsafe $($type:ty),* } => { $( - impl SimdCast<$type> for *const T { - fn cast(x: Simd) -> Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - // Safety: transmuting isize to pointers is safe - let x: Simd = unsafe { core::mem::transmute_copy(&x) }; - x.cast() - } - } - impl SimdCast<$type> for *mut T { - fn cast(x: Simd) -> Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - // Safety: transmuting isize to pointers is safe - let x: Simd = unsafe { core::mem::transmute_copy(&x) }; - x.cast() - } - } - impl SimdCast<*const T> for $type { - fn cast(x: Simd<$type, LANES>) -> Simd<*const T, LANES> - where - LaneCount: SupportedLaneCount, - { - let x: Simd = x.cast(); - // Safety: transmuting isize to pointers is safe - unsafe { core::mem::transmute_copy(&x) } - } - } - impl SimdCast<*mut T> for $type { - fn cast(x: Simd<$type, LANES>) -> Simd<*mut T, LANES> - where - LaneCount: SupportedLaneCount, - { - let x: Simd = x.cast(); - // Safety: transmuting isize to pointers is safe - unsafe { core::mem::transmute_copy(&x) } - } - } + // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` + unsafe impl SimdCast<$type> for *const T {} + // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` + unsafe impl SimdCast<$type> for *mut T {} + // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` + unsafe impl SimdCast<*const T> for $type {} + // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` + unsafe impl SimdCast<*mut T> for $type {} )* } } -into_pointer! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } +into_pointer! { unsafe i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } -impl SimdCast<*const T> for *const U { - fn cast(x: Simd<*const U, LANES>) -> Simd<*const T, LANES> - where - LaneCount: SupportedLaneCount, - { - // Safety: transmuting pointers is safe - unsafe { core::mem::transmute_copy(&x) } - } -} -impl SimdCast<*const T> for *mut U { - fn cast(x: Simd<*mut U, LANES>) -> Simd<*const T, LANES> - where - LaneCount: SupportedLaneCount, - { - // Safety: transmuting pointers is safe - unsafe { core::mem::transmute_copy(&x) } - } -} -impl SimdCast<*mut T> for *const U { - fn cast(x: Simd<*const U, LANES>) -> Simd<*mut T, LANES> - where - LaneCount: SupportedLaneCount, - { - // Safety: transmuting pointers is safe - unsafe { core::mem::transmute_copy(&x) } - } -} -impl SimdCast<*mut T> for *mut U { - fn cast(x: Simd<*mut U, LANES>) -> Simd<*mut T, LANES> - where - LaneCount: SupportedLaneCount, - { - // Safety: transmuting pointers is safe - unsafe { core::mem::transmute_copy(&x) } - } -} +// Safety: casting between pointers is supported by `simd_cast` and `simd_as` +unsafe impl SimdCast<*const T> for *const U {} +// Safety: casting between pointers is supported by `simd_cast` and `simd_as` +unsafe impl SimdCast<*const T> for *mut U {} +// Safety: casting between pointers is supported by `simd_cast` and `simd_as` +unsafe impl SimdCast<*mut T> for *const U {} +// Safety: casting between pointers is supported by `simd_cast` and `simd_as` +unsafe impl SimdCast<*mut T> for *mut U {} +*/ diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index d10bd1481d0..5a5faad23c8 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -23,9 +23,23 @@ pub trait SimdConstPtr: Copy + Sealed { /// Gets the "address" portion of the pointer. /// + /// This method discards pointer semantic metadata, so the result cannot be + /// directly cast into a valid pointer. + /// + /// This method semantically discards *provenance* and + /// *address-space* information. To properly restore that information, use [`with_addr`]. + /// /// Equivalent to calling [`pointer::addr`] on each lane. fn addr(self) -> Self::Usize; + /// Creates a new pointer with the given address. + /// + /// This performs the same operation as a cast, but copies the *address-space* and + /// *provenance* of `self` to the new pointer. + /// + /// Equivalent to calling [`pointer::with_addr`] on each lane. + fn with_addr(self, addr: Self::Usize) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. /// /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. @@ -63,12 +77,27 @@ where #[inline] fn as_mut(self) -> Self::MutPtr { - self.cast() + unimplemented!() + //self.cast() } #[inline] fn addr(self) -> Self::Usize { - self.cast() + // Safety: Since `addr` discards provenance, this is safe. + unsafe { core::mem::transmute_copy(&self) } + + //TODO switch to casts when available + //self.cast() + } + + #[inline] + fn with_addr(self, addr: Self::Usize) -> Self { + unimplemented!() + /* + self.cast::<*const u8>() + .wrapping_offset(addr.cast::() - self.addr().cast::()) + .cast() + */ } #[inline] diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index 4fc6202e14e..d7b05af0eac 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -23,9 +23,20 @@ pub trait SimdMutPtr: Copy + Sealed { /// Gets the "address" portion of the pointer. /// + /// This method discards pointer semantic metadata, so the result cannot be + /// directly cast into a valid pointer. + /// /// Equivalent to calling [`pointer::addr`] on each lane. fn addr(self) -> Self::Usize; + /// Creates a new pointer with the given address. + /// + /// This performs the same operation as a cast, but copies the *address-space* and + /// *provenance* of `self` to the new pointer. + /// + /// Equivalent to calling [`pointer::with_addr`] on each lane. + fn with_addr(self, addr: Self::Usize) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. /// /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. @@ -61,12 +72,27 @@ where #[inline] fn as_const(self) -> Self::ConstPtr { - self.cast() + unimplemented!() + //self.cast() } #[inline] fn addr(self) -> Self::Usize { - self.cast() + // Safety: Since `addr` discards provenance, this is safe. + unsafe { core::mem::transmute_copy(&self) } + + //TODO switch to casts when available + //self.cast() + } + + #[inline] + fn with_addr(self, addr: Self::Usize) -> Self { + unimplemented!() + /* + self.cast::<*mut u8>() + .wrapping_offset(addr.cast::() - self.addr().cast::()) + .cast() + */ } #[inline] diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 2fc090254d7..3987b7a747b 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -237,9 +237,8 @@ where T: core::convert::FloatToInt + SimdCast, I: SimdElement, { - // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to - // an integer. - unsafe { intrinsics::simd_cast(self) } + // Safety: the caller is responsible for the invariants + unsafe { SimdCast::cast_unchecked(self) } } /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. From 8a5a5732a1527fbdffbc825ae630d911fc130e2e Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 26 Jun 2022 10:07:48 -0400 Subject: [PATCH 108/161] Clarify addr and with_addr implementations --- crates/core_simd/src/elements/const_ptr.rs | 14 +++++++++----- crates/core_simd/src/elements/mut_ptr.rs | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index 5a5faad23c8..3485d31e44d 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -83,17 +83,21 @@ where #[inline] fn addr(self) -> Self::Usize { - // Safety: Since `addr` discards provenance, this is safe. + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the + // provenance). unsafe { core::mem::transmute_copy(&self) } - - //TODO switch to casts when available - //self.cast() } #[inline] - fn with_addr(self, addr: Self::Usize) -> Self { + fn with_addr(self, _addr: Self::Usize) -> Self { unimplemented!() /* + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. self.cast::<*const u8>() .wrapping_offset(addr.cast::() - self.addr().cast::()) .cast() diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index d7b05af0eac..39fe9f35621 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -78,17 +78,21 @@ where #[inline] fn addr(self) -> Self::Usize { - // Safety: Since `addr` discards provenance, this is safe. + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the + // provenance). unsafe { core::mem::transmute_copy(&self) } - - //TODO switch to casts when available - //self.cast() } #[inline] - fn with_addr(self, addr: Self::Usize) -> Self { + fn with_addr(self, _addr: Self::Usize) -> Self { unimplemented!() /* + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. self.cast::<*mut u8>() .wrapping_offset(addr.cast::() - self.addr().cast::()) .cast() From 176cc81324d008bd58e28136aa8e60b537caa3ce Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 4 Aug 2022 19:31:50 -0400 Subject: [PATCH 109/161] Update for new intrinsics --- crates/core_simd/src/cast.rs | 92 +++++----------------- crates/core_simd/src/elements/const_ptr.rs | 31 ++++++-- crates/core_simd/src/elements/mut_ptr.rs | 28 +++++-- crates/core_simd/src/intrinsics.rs | 23 ++++++ crates/core_simd/src/vector.rs | 30 ++++--- 5 files changed, 107 insertions(+), 97 deletions(-) diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index ddcc786afa4..d14b0de5d5e 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -1,79 +1,23 @@ -use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::SimdElement; /// Supporting trait for `Simd::cast`. Typically doesn't need to be used directly. -pub unsafe trait SimdCast: SimdElement { - #[doc(hidden)] - fn cast(x: Simd) -> Simd - where - LaneCount: SupportedLaneCount, - { - // Safety: implementing this trait indicates that the types are supported by `simd_as` - unsafe { intrinsics::simd_as(x) } - } +pub unsafe trait SimdCast: SimdElement {} - #[doc(hidden)] - unsafe fn cast_unchecked(x: Simd) -> Simd - where - LaneCount: SupportedLaneCount, - { - // Safety: implementing this trait indicates that the types are supported by `simd_cast` - // The caller is responsible for the conversion invariants. - unsafe { intrinsics::simd_cast(x) } - } -} +unsafe impl SimdCast for i8 {} +unsafe impl SimdCast for i16 {} +unsafe impl SimdCast for i32 {} +unsafe impl SimdCast for i64 {} +unsafe impl SimdCast for isize {} +unsafe impl SimdCast for u8 {} +unsafe impl SimdCast for u16 {} +unsafe impl SimdCast for u32 {} +unsafe impl SimdCast for u64 {} +unsafe impl SimdCast for usize {} +unsafe impl SimdCast for f32 {} +unsafe impl SimdCast for f64 {} -macro_rules! into_number { - { unsafe $from:ty as $to:ty } => { - // Safety: casting between numbers is supported by `simd_cast` and `simd_as` - unsafe impl SimdCast<$to> for $from {} - }; - { unsafe $($type:ty),* } => { - $( - into_number! { unsafe $type as i8 } - into_number! { unsafe $type as i16 } - into_number! { unsafe $type as i32 } - into_number! { unsafe $type as i64 } - into_number! { unsafe $type as isize } +/// Supporting trait for `Simd::cast_ptr`. Typically doesn't need to be used directly. +pub unsafe trait SimdCastPtr: SimdElement {} - into_number! { unsafe $type as u8 } - into_number! { unsafe $type as u16 } - into_number! { unsafe $type as u32 } - into_number! { unsafe $type as u64 } - into_number! { unsafe $type as usize } - - into_number! { unsafe $type as f32 } - into_number! { unsafe $type as f64 } - )* - } -} - -into_number! { unsafe i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, f32, f64 } - -// TODO uncomment pending PR to rustc -/* -macro_rules! into_pointer { - { unsafe $($type:ty),* } => { - $( - // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` - unsafe impl SimdCast<$type> for *const T {} - // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` - unsafe impl SimdCast<$type> for *mut T {} - // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` - unsafe impl SimdCast<*const T> for $type {} - // Safety: casting between numbers and pointers is supported by `simd_cast` and `simd_as` - unsafe impl SimdCast<*mut T> for $type {} - )* - } -} - -into_pointer! { unsafe i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } - -// Safety: casting between pointers is supported by `simd_cast` and `simd_as` -unsafe impl SimdCast<*const T> for *const U {} -// Safety: casting between pointers is supported by `simd_cast` and `simd_as` -unsafe impl SimdCast<*const T> for *mut U {} -// Safety: casting between pointers is supported by `simd_cast` and `simd_as` -unsafe impl SimdCast<*mut T> for *const U {} -// Safety: casting between pointers is supported by `simd_cast` and `simd_as` -unsafe impl SimdCast<*mut T> for *mut U {} -*/ +unsafe impl SimdCastPtr for *const T {} +unsafe impl SimdCastPtr for *mut T {} diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index 3485d31e44d..27b41019dc8 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -40,6 +40,15 @@ pub trait SimdConstPtr: Copy + Sealed { /// Equivalent to calling [`pointer::with_addr`] on each lane. fn with_addr(self, addr: Self::Usize) -> Self; + /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use + /// in [`from_exposed_addr`]. + fn expose_addr(self) -> Self::Usize; + + /// Convert an address back to a pointer, picking up a previously "exposed" provenance. + /// + /// Equivalent to calling [`pointer::from_exposed_addr`] on each lane. + fn from_exposed_addr(addr: Self::Usize) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. /// /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. @@ -77,8 +86,7 @@ where #[inline] fn as_mut(self) -> Self::MutPtr { - unimplemented!() - //self.cast() + unsafe { intrinsics::simd_cast_ptr(self) } } #[inline] @@ -90,18 +98,25 @@ where } #[inline] - fn with_addr(self, _addr: Self::Usize) -> Self { - unimplemented!() - /* + fn with_addr(self, addr: Self::Usize) -> Self { // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // // In the mean-time, this operation is defined to be "as if" it was // a wrapping_offset, so we can emulate it as such. This should properly // restore pointer provenance even under today's compiler. - self.cast::<*const u8>() + self.cast_ptr::<*const u8>() .wrapping_offset(addr.cast::() - self.addr().cast::()) - .cast() - */ + .cast_ptr() + } + + #[inline] + fn expose_addr(self) -> Self::Usize { + unsafe { intrinsics::simd_expose_addr(self) } + } + + #[inline] + fn from_exposed_addr(addr: Self::Usize) -> Self { + unsafe { intrinsics::simd_from_exposed_addr(addr) } } #[inline] diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index 39fe9f35621..59a8b6293b5 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -37,6 +37,15 @@ pub trait SimdMutPtr: Copy + Sealed { /// Equivalent to calling [`pointer::with_addr`] on each lane. fn with_addr(self, addr: Self::Usize) -> Self; + /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use + /// in [`from_exposed_addr`]. + fn expose_addr(self) -> Self::Usize; + + /// Convert an address back to a pointer, picking up a previously "exposed" provenance. + /// + /// Equivalent to calling [`pointer::from_exposed_addr`] on each lane. + fn from_exposed_addr(addr: Self::Usize) -> Self; + /// Calculates the offset from a pointer using wrapping arithmetic. /// /// Equivalent to calling [`pointer::wrapping_offset`] on each lane. @@ -85,18 +94,25 @@ where } #[inline] - fn with_addr(self, _addr: Self::Usize) -> Self { - unimplemented!() - /* + fn with_addr(self, addr: Self::Usize) -> Self { // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // // In the mean-time, this operation is defined to be "as if" it was // a wrapping_offset, so we can emulate it as such. This should properly // restore pointer provenance even under today's compiler. - self.cast::<*mut u8>() + self.cast_ptr::<*mut u8>() .wrapping_offset(addr.cast::() - self.addr().cast::()) - .cast() - */ + .cast_ptr() + } + + #[inline] + fn expose_addr(self) -> Self::Usize { + unsafe { intrinsics::simd_expose_addr(self) } + } + + #[inline] + fn from_exposed_addr(addr: Self::Usize) -> Self { + unsafe { intrinsics::simd_from_exposed_addr(addr) } } #[inline] diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 41128cd1481..c0fbae2db08 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -154,4 +154,27 @@ extern "platform-intrinsic" { // equivalent to wrapping_offset pub(crate) fn simd_arith_offset(ptr: T, offset: U) -> T; + + /* + /// equivalent to `T as U` semantics, specifically for pointers + pub(crate) fn simd_cast_ptr(ptr: T) -> U; + + /// expose a pointer as an address + pub(crate) fn simd_expose_addr(ptr: T) -> U; + + /// convert an exposed address back to a pointer + pub(crate) fn simd_from_exposed_addr(addr: T) -> U; + */ +} + +pub(crate) unsafe fn simd_cast_ptr(_ptr: T) -> U { + unimplemented!() +} + +pub(crate) unsafe fn simd_expose_addr(_ptr: T) -> U { + unimplemented!() +} + +pub(crate) unsafe fn simd_from_exposed_addr(_addr: T) -> U { + unimplemented!() } diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 3987b7a747b..3c435c4c805 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,6 +1,6 @@ use crate::simd::{ - intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdConstPtr, SimdMutPtr, SimdPartialOrd, - SupportedLaneCount, Swizzle, + intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdCastPtr, SimdConstPtr, SimdMutPtr, + SimdPartialOrd, SupportedLaneCount, Swizzle, }; /// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. @@ -209,11 +209,23 @@ where #[must_use] #[inline] #[cfg(not(bootstrap))] - pub fn cast(self) -> Simd + pub fn cast(self) -> Simd where - T: SimdCast, + T: SimdCast, { - SimdCast::cast(self) + // Safety: supported types are guaranteed by SimdCast + unsafe { intrinsics::simd_as(self) } + } + + /// Lanewise casts pointers to another pointer type. + #[must_use] + #[inline] + pub fn cast_ptr(self) -> Simd + where + T: SimdCastPtr, + { + // Safety: supported types are guaranteed by SimdCastPtr + unsafe { intrinsics::simd_cast_ptr(self) } } /// Rounds toward zero and converts to the same-width integer type, assuming that @@ -234,11 +246,11 @@ where #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn to_int_unchecked(self) -> Simd where - T: core::convert::FloatToInt + SimdCast, - I: SimdElement, + T: core::convert::FloatToInt + SimdCast, + I: SimdCast, { - // Safety: the caller is responsible for the invariants - unsafe { SimdCast::cast_unchecked(self) } + // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants + unsafe { intrinsics::simd_cast(self) } } /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. From dadf98a290e4f52d02a469f97931b90e953a98cf Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 4 Aug 2022 19:38:56 -0400 Subject: [PATCH 110/161] Remove duplicate intrinsic --- crates/core_simd/src/intrinsics.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index c0fbae2db08..45f01fa0f77 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -61,9 +61,6 @@ extern "platform-intrinsic" { /// xor pub(crate) fn simd_xor(x: T, y: T) -> T; - /// getelementptr (without inbounds) - pub(crate) fn simd_arith_offset(ptrs: T, offsets: U) -> T; - /// fptoui/fptosi/uitofp/sitofp /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5 /// but the truncated value must fit in the target type or the result is poison. @@ -152,7 +149,8 @@ extern "platform-intrinsic" { #[allow(unused)] pub(crate) fn simd_select_bitmask(m: M, yes: T, no: T) -> T; - // equivalent to wrapping_offset + /// getelementptr (without inbounds) + /// equivalent to wrapping_offset pub(crate) fn simd_arith_offset(ptr: T, offset: U) -> T; /* From e5db1ecc8209e90982cc4603514028ef2210e592 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 4 Aug 2022 19:46:39 -0400 Subject: [PATCH 111/161] Fix documentation --- crates/core_simd/src/elements/const_ptr.rs | 6 +++--- crates/core_simd/src/elements/mut_ptr.rs | 4 ++-- crates/core_simd/src/lib.rs | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index 27b41019dc8..0a3d4ec4087 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -27,7 +27,7 @@ pub trait SimdConstPtr: Copy + Sealed { /// directly cast into a valid pointer. /// /// This method semantically discards *provenance* and - /// *address-space* information. To properly restore that information, use [`with_addr`]. + /// *address-space* information. To properly restore that information, use [`Self::with_addr`]. /// /// Equivalent to calling [`pointer::addr`] on each lane. fn addr(self) -> Self::Usize; @@ -41,12 +41,12 @@ pub trait SimdConstPtr: Copy + Sealed { fn with_addr(self, addr: Self::Usize) -> Self; /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use - /// in [`from_exposed_addr`]. + /// in [`Self::from_exposed_addr`]. fn expose_addr(self) -> Self::Usize; /// Convert an address back to a pointer, picking up a previously "exposed" provenance. /// - /// Equivalent to calling [`pointer::from_exposed_addr`] on each lane. + /// Equivalent to calling [`core::ptr::from_exposed_addr`] on each lane. fn from_exposed_addr(addr: Self::Usize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index 59a8b6293b5..e6aa9808f37 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -38,12 +38,12 @@ pub trait SimdMutPtr: Copy + Sealed { fn with_addr(self, addr: Self::Usize) -> Self; /// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use - /// in [`from_exposed_addr`]. + /// in [`Self::from_exposed_addr`]. fn expose_addr(self) -> Self::Usize; /// Convert an address back to a pointer, picking up a previously "exposed" provenance. /// - /// Equivalent to calling [`pointer::from_exposed_addr`] on each lane. + /// Equivalent to calling [`core::ptr::from_exposed_addr_mut`] on each lane. fn from_exposed_addr(addr: Self::Usize) -> Self; /// Calculates the offset from a pointer using wrapping arithmetic. diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 715f258f617..05ac3e9338b 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -7,7 +7,8 @@ repr_simd, simd_ffi, staged_api, - stdsimd + stdsimd, + strict_provenance )] #![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))] #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))] From 0fcc4069c12a4cffa69397388a0be42d45afdd49 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 4 Aug 2022 20:17:16 -0400 Subject: [PATCH 112/161] Fix pointer mutability casts and safety lints --- crates/core_simd/src/cast.rs | 22 ++++++++++++++++++++++ crates/core_simd/src/elements/const_ptr.rs | 4 +++- crates/core_simd/src/elements/mut_ptr.rs | 5 +++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index d14b0de5d5e..33878581e0b 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -1,23 +1,45 @@ use crate::simd::SimdElement; /// Supporting trait for `Simd::cast`. Typically doesn't need to be used directly. +/// +/// # Safety +/// Implementing this trait asserts that the type is a valid vector element for the `simd_cast` or +/// `simd_as` intrinsics. pub unsafe trait SimdCast: SimdElement {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for i8 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for i16 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for i32 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for i64 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for isize {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for u8 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for u16 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for u32 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for u64 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for usize {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for f32 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl SimdCast for f64 {} /// Supporting trait for `Simd::cast_ptr`. Typically doesn't need to be used directly. +/// +/// # Safety +/// Implementing this trait asserts that the type is a valid vector element for the `simd_cast_ptr` +/// intrinsic. pub unsafe trait SimdCastPtr: SimdElement {} +// Safety: pointers can be cast to other pointer types unsafe impl SimdCastPtr for *const T {} +// Safety: pointers can be cast to other pointer types unsafe impl SimdCastPtr for *mut T {} diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index 0a3d4ec4087..7c856fd4332 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -86,7 +86,7 @@ where #[inline] fn as_mut(self) -> Self::MutPtr { - unsafe { intrinsics::simd_cast_ptr(self) } + self.cast_ptr() } #[inline] @@ -111,11 +111,13 @@ where #[inline] fn expose_addr(self) -> Self::Usize { + // Safety: `self` is a pointer vector unsafe { intrinsics::simd_expose_addr(self) } } #[inline] fn from_exposed_addr(addr: Self::Usize) -> Self { + // Safety: `self` is a pointer vector unsafe { intrinsics::simd_from_exposed_addr(addr) } } diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index e6aa9808f37..5e904d24a42 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -81,8 +81,7 @@ where #[inline] fn as_const(self) -> Self::ConstPtr { - unimplemented!() - //self.cast() + self.cast_ptr() } #[inline] @@ -107,11 +106,13 @@ where #[inline] fn expose_addr(self) -> Self::Usize { + // Safety: `self` is a pointer vector unsafe { intrinsics::simd_expose_addr(self) } } #[inline] fn from_exposed_addr(addr: Self::Usize) -> Self { + // Safety: `self` is a pointer vector unsafe { intrinsics::simd_from_exposed_addr(addr) } } From a79718ffa8cdfb5ee7ab3d9281b162fe37eb0606 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 18 Sep 2022 16:48:51 -0400 Subject: [PATCH 113/161] Use new intrinsics --- crates/core_simd/src/intrinsics.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 45f01fa0f77..d5466822b93 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -153,7 +153,6 @@ extern "platform-intrinsic" { /// equivalent to wrapping_offset pub(crate) fn simd_arith_offset(ptr: T, offset: U) -> T; - /* /// equivalent to `T as U` semantics, specifically for pointers pub(crate) fn simd_cast_ptr(ptr: T) -> U; @@ -162,17 +161,4 @@ extern "platform-intrinsic" { /// convert an exposed address back to a pointer pub(crate) fn simd_from_exposed_addr(addr: T) -> U; - */ -} - -pub(crate) unsafe fn simd_cast_ptr(_ptr: T) -> U { - unimplemented!() -} - -pub(crate) unsafe fn simd_expose_addr(_ptr: T) -> U { - unimplemented!() -} - -pub(crate) unsafe fn simd_from_exposed_addr(_addr: T) -> U { - unimplemented!() } From 078cb58e766c20a2705f22f7a6f9bc0cf451e16d Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 18 Sep 2022 22:47:34 -0400 Subject: [PATCH 114/161] Apply suggestions from code review Co-authored-by: Jacob Lifshay --- crates/core_simd/src/elements/const_ptr.rs | 2 +- crates/core_simd/src/elements/mut_ptr.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index 7c856fd4332..f7227a56d58 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -61,7 +61,7 @@ pub trait SimdConstPtr: Copy + Sealed { /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + /// Equivalent to calling [`pointer::wrapping_sub`] on each lane. fn wrapping_sub(self, count: Self::Usize) -> Self; } diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index 5e904d24a42..e2fd438ef8f 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -58,7 +58,7 @@ pub trait SimdMutPtr: Copy + Sealed { /// Calculates the offset from a pointer using wrapping arithmetic. /// - /// Equivalent to calling [`pointer::wrapping_add`] on each lane. + /// Equivalent to calling [`pointer::wrapping_sub`] on each lane. fn wrapping_sub(self, count: Self::Usize) -> Self; } From 469c620bded61d265ef020b2442b1f639b2d8c10 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 21 Oct 2022 21:43:48 -0400 Subject: [PATCH 115/161] Account for pointer metadata in pointer bounds --- crates/core_simd/src/cast.rs | 16 +++++++++++++--- crates/core_simd/src/lib.rs | 3 ++- crates/core_simd/src/vector.rs | 23 +++++++++++++++++------ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index 33878581e0b..65a3f845ffc 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -37,9 +37,19 @@ unsafe impl SimdCast for f64 {} /// # Safety /// Implementing this trait asserts that the type is a valid vector element for the `simd_cast_ptr` /// intrinsic. -pub unsafe trait SimdCastPtr: SimdElement {} +pub unsafe trait SimdCastPtr {} // Safety: pointers can be cast to other pointer types -unsafe impl SimdCastPtr for *const T {} +unsafe impl SimdCastPtr for *const U +where + U: core::ptr::Pointee, + T: core::ptr::Pointee, +{ +} // Safety: pointers can be cast to other pointer types -unsafe impl SimdCastPtr for *mut T {} +unsafe impl SimdCastPtr for *mut U +where + U: core::ptr::Pointee, + T: core::ptr::Pointee, +{ +} diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 05ac3e9338b..82873162969 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -8,7 +8,8 @@ simd_ffi, staged_api, stdsimd, - strict_provenance + strict_provenance, + ptr_metadata )] #![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))] #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))] diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 3c435c4c805..c5d68f1b921 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -220,9 +220,10 @@ where /// Lanewise casts pointers to another pointer type. #[must_use] #[inline] - pub fn cast_ptr(self) -> Simd + pub fn cast_ptr(self) -> Simd where - T: SimdCastPtr, + T: SimdCastPtr, + U: SimdElement, { // Safety: supported types are guaranteed by SimdCastPtr unsafe { intrinsics::simd_cast_ptr(self) } @@ -753,14 +754,24 @@ unsafe impl SimdElement for f64 { impl Sealed for *const T {} -// Safety: const pointers are valid SIMD element types, and are supported by this API -unsafe impl SimdElement for *const T { +// Safety: (thin) const pointers are valid SIMD element types, and are supported by this API +// +// Fat pointers may be supported in the future. +unsafe impl SimdElement for *const T +where + T: core::ptr::Pointee, +{ type Mask = isize; } impl Sealed for *mut T {} -// Safety: mut pointers are valid SIMD element types, and are supported by this API -unsafe impl SimdElement for *mut T { +// Safety: (thin) mut pointers are valid SIMD element types, and are supported by this API +// +// Fat pointers may be supported in the future. +unsafe impl SimdElement for *mut T +where + T: core::ptr::Pointee, +{ type Mask = isize; } From de30820035cb42d05f49575811a9f33661985e67 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 29 Oct 2022 21:39:08 -0400 Subject: [PATCH 116/161] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 80313157ea2..e8ac600debe 100644 --- a/README.md +++ b/README.md @@ -47,9 +47,10 @@ The supported element types are as follows: * **Floating Point:** `f32`, `f64` * **Signed Integers:** `i8`, `i16`, `i32`, `i64`, `isize` (`i128` excluded) * **Unsigned Integers:** `u8`, `u16`, `u32`, `u64`, `usize` (`u128` excluded) +* **Pointers:** `*const T` and `*mut T` (zero-sized metadata only) * **Masks:** 8-bit, 16-bit, 32-bit, 64-bit, and `usize`-sized masks -Floating point, signed integers, and unsigned integers are the [primitive types](https://doc.rust-lang.org/core/primitive/index.html) you're already used to. +Floating point, signed integers, unsigned integers, and pointers are the [primitive types](https://doc.rust-lang.org/core/primitive/index.html) you're already used to. The mask types have elements that are "truthy" values, like `bool`, but have an unspecified layout because different architectures prefer different layouts for mask types. [simd-guide]: ./beginners-guide.md From 572122a95da6f8aaf513f53c426732f4c0a91325 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 9 Nov 2022 21:28:38 -0500 Subject: [PATCH 117/161] Add missing pointer tests and rename pointer cast fns to match scalars --- crates/core_simd/src/elements/const_ptr.rs | 6 ++- crates/core_simd/src/elements/mut_ptr.rs | 6 ++- crates/core_simd/tests/pointers.rs | 52 ++++++++++++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index f7227a56d58..0ef9802b5e2 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -19,7 +19,9 @@ pub trait SimdConstPtr: Copy + Sealed { fn is_null(self) -> Self::Mask; /// Changes constness without changing the type. - fn as_mut(self) -> Self::MutPtr; + /// + /// Equivalent to calling [`pointer::cast_mut`] on each lane. + fn cast_mut(self) -> Self::MutPtr; /// Gets the "address" portion of the pointer. /// @@ -85,7 +87,7 @@ where } #[inline] - fn as_mut(self) -> Self::MutPtr { + fn cast_mut(self) -> Self::MutPtr { self.cast_ptr() } diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index e2fd438ef8f..d87986b4a09 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -19,7 +19,9 @@ pub trait SimdMutPtr: Copy + Sealed { fn is_null(self) -> Self::Mask; /// Changes constness without changing the type. - fn as_const(self) -> Self::ConstPtr; + /// + /// Equivalent to calling [`pointer::cast_const`] on each lane. + fn cast_const(self) -> Self::ConstPtr; /// Gets the "address" portion of the pointer. /// @@ -80,7 +82,7 @@ where } #[inline] - fn as_const(self) -> Self::ConstPtr { + fn cast_const(self) -> Self::ConstPtr { self.cast_ptr() } diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs index 8eb0bd84042..2b0008624ad 100644 --- a/crates/core_simd/tests/pointers.rs +++ b/crates/core_simd/tests/pointers.rs @@ -21,6 +21,22 @@ macro_rules! common_tests { ); } + fn with_addr() { + test_helpers::test_binary_elementwise( + &Simd::<*$constness u32, LANES>::with_addr, + &<*$constness u32>::with_addr, + &|_, _| true, + ); + } + + fn expose_addr() { + test_helpers::test_unary_elementwise( + &Simd::<*$constness u32, LANES>::expose_addr, + &<*$constness u32>::expose_addr, + &|_| true, + ); + } + fn wrapping_offset() { test_helpers::test_binary_elementwise( &Simd::<*$constness u32, LANES>::wrapping_offset, @@ -51,9 +67,45 @@ macro_rules! common_tests { mod const_ptr { use super::*; common_tests! { const } + + test_helpers::test_lanes! { + fn cast_mut() { + test_helpers::test_unary_elementwise( + &Simd::<*const u32, LANES>::cast_mut, + &<*const u32>::cast_mut, + &|_| true, + ); + } + + fn from_exposed_addr() { + test_helpers::test_unary_elementwise( + &Simd::<*const u32, LANES>::from_exposed_addr, + &core::ptr::from_exposed_addr::, + &|_| true, + ); + } + } } mod mut_ptr { use super::*; common_tests! { mut } + + test_helpers::test_lanes! { + fn cast_const() { + test_helpers::test_unary_elementwise( + &Simd::<*mut u32, LANES>::cast_const, + &<*mut u32>::cast_const, + &|_| true, + ); + } + + fn from_exposed_addr() { + test_helpers::test_unary_elementwise( + &Simd::<*mut u32, LANES>::from_exposed_addr, + &core::ptr::from_exposed_addr_mut::, + &|_| true, + ); + } + } } From 7ac1fbbcb14c05f778cf1c550e2b30f00606bb97 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 11 Nov 2022 17:32:48 -0500 Subject: [PATCH 118/161] impl TryFrom<&[T]> for Simd --- crates/core_simd/src/vector.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index c5d68f1b921..0095ed1648f 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -650,6 +650,30 @@ where } } +impl TryFrom<&[T]> for Simd +where + LaneCount: SupportedLaneCount, + T: SimdElement, +{ + type Error = core::array::TryFromSliceError; + + fn try_from(slice: &[T]) -> Result { + Ok(Self::from_array(slice.try_into()?)) + } +} + +impl TryFrom<&mut [T]> for Simd +where + LaneCount: SupportedLaneCount, + T: SimdElement, +{ + type Error = core::array::TryFromSliceError; + + fn try_from(slice: &mut [T]) -> Result { + Ok(Self::from_array(slice.try_into()?)) + } +} + mod sealed { pub trait Sealed {} } From 9dc690c48265bae58ca6e307d8f35a1f74b921e3 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 11 Nov 2022 18:10:51 -0500 Subject: [PATCH 119/161] Add TryFrom<&[T]> tests --- crates/core_simd/tests/try_from_slice.rs | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 crates/core_simd/tests/try_from_slice.rs diff --git a/crates/core_simd/tests/try_from_slice.rs b/crates/core_simd/tests/try_from_slice.rs new file mode 100644 index 00000000000..189c18c6039 --- /dev/null +++ b/crates/core_simd/tests/try_from_slice.rs @@ -0,0 +1,25 @@ +#![feature(portable_simd)] + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +use core_simd::i32x4; + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn try_from_slice() { + // Equal length + assert_eq!( + i32x4::try_from([1, 2, 3, 4].as_slice()).unwrap(), + i32x4::from_array([1, 2, 3, 4]) + ); + + // Slice length > vector length + assert!(i32x4::try_from([1, 2, 3, 4, 5].as_slice()).is_err()); + + // Slice length < vector length + assert!(i32x4::try_from([1, 2, 3].as_slice()).is_err()); +} From fd53445d05874d7662682b00d81cf073cfdbe505 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 11 Nov 2022 19:48:27 -0500 Subject: [PATCH 120/161] Add pointer scatter/gather --- crates/core_simd/src/vector.rs | 68 ++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index c5d68f1b921..850a517c799 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -364,8 +364,44 @@ where let base_ptr = Simd::<*const T, LANES>::splat(slice.as_ptr()); // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); - // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah - unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) } + // Safety: The caller is responsible for determining the indices are okay to read + unsafe { Self::gather_select_ptr(ptrs, enable, or) } + } + + /// Read pointers elementwise into a SIMD vector vector. + /// + /// # Safety + /// + /// Each read must satisfy the same conditions as [`core::ptr::read`]. + #[must_use] + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub unsafe fn gather_ptr(source: Simd<*const T, LANES>) -> Self + where + T: Default, + { + // TODO: add an intrinsic that doesn't use a passthru vector, and remove the T: Default bound + // Safety: The caller is responsible for upholding all invariants + unsafe { Self::gather_select_ptr(source, Mask::splat(true), Self::default()) } + } + + /// Conditionally read pointers elementwise into a SIMD vector vector. + /// The mask `enable`s all `true` lanes and disables all `false` lanes. + /// If a lane is disabled, the lane is selected from the `or` vector and no read is performed. + /// + /// # Safety + /// + /// Enabled lanes must satisfy the same conditions as [`core::ptr::read`]. + #[must_use] + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub unsafe fn gather_select_ptr( + source: Simd<*const T, LANES>, + enable: Mask, + or: Self, + ) -> Self { + // Safety: The caller is responsible for upholding all invariants + unsafe { intrinsics::simd_gather(or, source, enable.to_int()) } } /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. @@ -473,10 +509,36 @@ where // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah - intrinsics::simd_scatter(self, ptrs, enable.to_int()) + self.scatter_select_ptr(ptrs, enable); // Cleared ☢️ *mut T Zone } } + + /// Write pointers elementwise into a SIMD vector vector. + /// + /// # Safety + /// + /// Each write must satisfy the same conditions as [`core::ptr::write`]. + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, LANES>) { + // Safety: The caller is responsible for upholding all invariants + unsafe { self.scatter_select_ptr(dest, Mask::splat(true)) } + } + + /// Conditionally write pointers elementwise into a SIMD vector vector. + /// The mask `enable`s all `true` lanes and disables all `false` lanes. + /// If a lane is disabled, the writing that lane is skipped. + /// + /// # Safety + /// + /// Enabled lanes must satisfy the same conditions as [`core::ptr::write`]. + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, LANES>, enable: Mask) { + // Safety: The caller is responsible for upholding all invariants + unsafe { intrinsics::simd_scatter(self, dest, enable.to_int()) } + } } impl Copy for Simd From bef4c41fc0051444034ad9b488b06b2d512bfd17 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 11 Nov 2022 21:31:05 -0500 Subject: [PATCH 121/161] Add test examples --- crates/core_simd/src/vector.rs | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 850a517c799..52ed5490519 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -373,6 +373,19 @@ where /// # Safety /// /// Each read must satisfy the same conditions as [`core::ptr::read`]. + /// + /// # Example + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdConstPtr}; + /// let values = [6, 2, 4, 9]; + /// let offsets = Simd::from_array([1, 0, 0, 3]); + /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets); + /// let gathered = unsafe { Simd::gather_ptr(source) }; + /// assert_eq!(gathered, Simd::from_array([2, 6, 6, 9])); + /// ``` #[must_use] #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces @@ -392,6 +405,20 @@ where /// # Safety /// /// Enabled lanes must satisfy the same conditions as [`core::ptr::read`]. + /// + /// # Example + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Mask, Simd, SimdConstPtr}; + /// let values = [6, 2, 4, 9]; + /// let enable = Mask::from_array([true, true, false, true]); + /// let offsets = Simd::from_array([1, 0, 0, 3]); + /// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets); + /// let gathered = unsafe { Simd::gather_select_ptr(source, enable, Simd::splat(0)) }; + /// assert_eq!(gathered, Simd::from_array([2, 6, 0, 9])); + /// ``` #[must_use] #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces @@ -519,6 +546,19 @@ where /// # Safety /// /// Each write must satisfy the same conditions as [`core::ptr::write`]. + /// + /// # Example + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, SimdMutPtr}; + /// let mut values = [0; 4]; + /// let offset = Simd::from_array([3, 2, 1, 0]); + /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); + /// unsafe { Simd::from_array([6, 3, 5, 7]).scatter_ptr(ptrs); } + /// assert_eq!(values, [7, 5, 3, 6]); + /// ``` #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, LANES>) { @@ -533,6 +573,20 @@ where /// # Safety /// /// Enabled lanes must satisfy the same conditions as [`core::ptr::write`]. + /// + /// # Example + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Mask, Simd, SimdMutPtr}; + /// let mut values = [0; 4]; + /// let offset = Simd::from_array([3, 2, 1, 0]); + /// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset); + /// let enable = Mask::from_array([true, true, false, false]); + /// unsafe { Simd::from_array([6, 3, 5, 7]).scatter_select_ptr(ptrs, enable); } + /// assert_eq!(values, [0, 0, 3, 6]); + /// ``` #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, LANES>, enable: Mask) { From c247915eb88af33302b2dc393fa7b488ee680a5f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 12 Nov 2022 22:39:54 -0500 Subject: [PATCH 122/161] Update crates/core_simd/src/vector.rs Co-authored-by: Jubilee <46493976+workingjubilee@users.noreply.github.com> --- crates/core_simd/src/vector.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 52ed5490519..f25505f7c59 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -568,7 +568,7 @@ where /// Conditionally write pointers elementwise into a SIMD vector vector. /// The mask `enable`s all `true` lanes and disables all `false` lanes. - /// If a lane is disabled, the writing that lane is skipped. + /// If a lane is disabled, the write to that lane is skipped. /// /// # Safety /// From 7e614f0438324b60af24554699977757228d7acd Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 12 Nov 2022 22:41:44 -0500 Subject: [PATCH 123/161] Fix typo typo --- crates/core_simd/src/vector.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index f25505f7c59..0ddc3e1b395 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -368,7 +368,7 @@ where unsafe { Self::gather_select_ptr(ptrs, enable, or) } } - /// Read pointers elementwise into a SIMD vector vector. + /// Read pointers elementwise into a SIMD vector. /// /// # Safety /// @@ -398,7 +398,7 @@ where unsafe { Self::gather_select_ptr(source, Mask::splat(true), Self::default()) } } - /// Conditionally read pointers elementwise into a SIMD vector vector. + /// Conditionally read pointers elementwise into a SIMD vector. /// The mask `enable`s all `true` lanes and disables all `false` lanes. /// If a lane is disabled, the lane is selected from the `or` vector and no read is performed. /// @@ -541,7 +541,7 @@ where } } - /// Write pointers elementwise into a SIMD vector vector. + /// Write pointers elementwise into a SIMD vector. /// /// # Safety /// @@ -566,7 +566,7 @@ where unsafe { self.scatter_select_ptr(dest, Mask::splat(true)) } } - /// Conditionally write pointers elementwise into a SIMD vector vector. + /// Conditionally write pointers elementwise into a SIMD vector. /// The mask `enable`s all `true` lanes and disables all `false` lanes. /// If a lane is disabled, the write to that lane is skipped. /// From db8b23cea5ac9b45fafef65d95702f41cc02d486 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 27 Nov 2022 23:44:20 -0500 Subject: [PATCH 124/161] Remove reexport of simd::* --- crates/core_simd/src/lib.rs | 1 - crates/core_simd/tests/autoderef.rs | 2 +- .../tests/mask_ops_impl/mask_macros.rs | 2 +- crates/core_simd/tests/masks.rs | 59 ++++++++++--------- crates/core_simd/tests/ops_macros.rs | 14 ++--- crates/core_simd/tests/pointers.rs | 2 +- crates/core_simd/tests/round.rs | 2 +- crates/core_simd/tests/swizzle.rs | 2 +- crates/core_simd/tests/to_bytes.rs | 2 +- crates/core_simd/tests/try_from_slice.rs | 2 +- crates/test_helpers/src/lib.rs | 4 +- 11 files changed, 47 insertions(+), 45 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 82873162969..a6359d1e0be 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -21,4 +21,3 @@ #[path = "mod.rs"] mod core_simd; pub use self::core_simd::simd; -pub use simd::*; diff --git a/crates/core_simd/tests/autoderef.rs b/crates/core_simd/tests/autoderef.rs index 9359da16ee5..3181826ef59 100644 --- a/crates/core_simd/tests/autoderef.rs +++ b/crates/core_simd/tests/autoderef.rs @@ -1,6 +1,6 @@ // Test that we handle all our "auto-deref" cases correctly. #![feature(portable_simd)] -use core_simd::f32x4; +use core_simd::simd::f32x4; #[cfg(target_arch = "wasm32")] use wasm_bindgen_test::*; diff --git a/crates/core_simd/tests/mask_ops_impl/mask_macros.rs b/crates/core_simd/tests/mask_ops_impl/mask_macros.rs index 795f9e27c44..faafa5fa51f 100644 --- a/crates/core_simd/tests/mask_ops_impl/mask_macros.rs +++ b/crates/core_simd/tests/mask_ops_impl/mask_macros.rs @@ -2,7 +2,7 @@ macro_rules! mask_tests { { $vector:ident, $lanes:literal } => { #[cfg(test)] mod $vector { - use core_simd::$vector as Vector; + use core_simd::simd::$vector as Vector; const LANES: usize = $lanes; #[cfg(target_arch = "wasm32")] diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 673d0db93fe..9f8bad1c36c 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -13,11 +13,13 @@ macro_rules! test_mask_api { #[cfg(target_arch = "wasm32")] use wasm_bindgen_test::*; + use core_simd::simd::Mask; + #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn set_and_test() { let values = [true, false, false, true, false, false, true, false]; - let mut mask = core_simd::Mask::<$type, 8>::splat(false); + let mut mask = Mask::<$type, 8>::splat(false); for (lane, value) in values.iter().copied().enumerate() { mask.set(lane, value); } @@ -29,7 +31,7 @@ macro_rules! test_mask_api { #[test] #[should_panic] fn set_invalid_lane() { - let mut mask = core_simd::Mask::<$type, 8>::splat(false); + let mut mask = Mask::<$type, 8>::splat(false); mask.set(8, true); let _ = mask; } @@ -37,24 +39,24 @@ macro_rules! test_mask_api { #[test] #[should_panic] fn test_invalid_lane() { - let mask = core_simd::Mask::<$type, 8>::splat(false); + let mask = Mask::<$type, 8>::splat(false); let _ = mask.test(8); } #[test] fn any() { - assert!(!core_simd::Mask::<$type, 8>::splat(false).any()); - assert!(core_simd::Mask::<$type, 8>::splat(true).any()); - let mut v = core_simd::Mask::<$type, 8>::splat(false); + assert!(!Mask::<$type, 8>::splat(false).any()); + assert!(Mask::<$type, 8>::splat(true).any()); + let mut v = Mask::<$type, 8>::splat(false); v.set(2, true); assert!(v.any()); } #[test] fn all() { - assert!(!core_simd::Mask::<$type, 8>::splat(false).all()); - assert!(core_simd::Mask::<$type, 8>::splat(true).all()); - let mut v = core_simd::Mask::<$type, 8>::splat(false); + assert!(!Mask::<$type, 8>::splat(false).all()); + assert!(Mask::<$type, 8>::splat(true).all()); + let mut v = Mask::<$type, 8>::splat(false); v.set(2, true); assert!(!v.all()); } @@ -62,57 +64,57 @@ macro_rules! test_mask_api { #[test] fn roundtrip_int_conversion() { let values = [true, false, false, true, false, false, true, false]; - let mask = core_simd::Mask::<$type, 8>::from_array(values); + let mask = Mask::<$type, 8>::from_array(values); let int = mask.to_int(); assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]); - assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask); + assert_eq!(Mask::<$type, 8>::from_int(int), mask); } #[test] fn roundtrip_bitmask_conversion() { - use core_simd::ToBitMask; + use core_simd::simd::ToBitMask; let values = [ true, false, false, true, false, false, true, false, true, true, false, false, false, false, false, true, ]; - let mask = core_simd::Mask::<$type, 16>::from_array(values); + let mask = Mask::<$type, 16>::from_array(values); let bitmask = mask.to_bitmask(); assert_eq!(bitmask, 0b1000001101001001); - assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask); + assert_eq!(Mask::<$type, 16>::from_bitmask(bitmask), mask); } #[test] fn roundtrip_bitmask_conversion_short() { - use core_simd::ToBitMask; + use core_simd::simd::ToBitMask; let values = [ false, false, false, true, ]; - let mask = core_simd::Mask::<$type, 4>::from_array(values); + let mask = Mask::<$type, 4>::from_array(values); let bitmask = mask.to_bitmask(); assert_eq!(bitmask, 0b1000); - assert_eq!(core_simd::Mask::<$type, 4>::from_bitmask(bitmask), mask); + assert_eq!(Mask::<$type, 4>::from_bitmask(bitmask), mask); let values = [true, false]; - let mask = core_simd::Mask::<$type, 2>::from_array(values); + let mask = Mask::<$type, 2>::from_array(values); let bitmask = mask.to_bitmask(); assert_eq!(bitmask, 0b01); - assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask); + assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask); } #[test] fn cast() { - fn cast_impl() + fn cast_impl() where - core_simd::Mask<$type, 8>: Into>, + Mask<$type, 8>: Into>, { let values = [true, false, false, true, false, false, true, false]; - let mask = core_simd::Mask::<$type, 8>::from_array(values); + let mask = Mask::<$type, 8>::from_array(values); let cast_mask = mask.cast::(); assert_eq!(values, cast_mask.to_array()); - let into_mask: core_simd::Mask = mask.into(); + let into_mask: Mask = mask.into(); assert_eq!(values, into_mask.to_array()); } @@ -126,15 +128,15 @@ macro_rules! test_mask_api { #[cfg(feature = "generic_const_exprs")] #[test] fn roundtrip_bitmask_array_conversion() { - use core_simd::ToBitMaskArray; + use core_simd::simd::ToBitMaskArray; let values = [ true, false, false, true, false, false, true, false, true, true, false, false, false, false, false, true, ]; - let mask = core_simd::Mask::<$type, 16>::from_array(values); + let mask = Mask::<$type, 16>::from_array(values); let bitmask = mask.to_bitmask_array(); assert_eq!(bitmask, [0b01001001, 0b10000011]); - assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask_array(bitmask), mask); + assert_eq!(Mask::<$type, 16>::from_bitmask_array(bitmask), mask); } } } @@ -150,9 +152,10 @@ mod mask_api { #[test] fn convert() { + use core_simd::simd::Mask; let values = [true, false, false, true, false, false, true, false]; assert_eq!( - core_simd::Mask::::from_array(values), - core_simd::Mask::::from_array(values).into() + Mask::::from_array(values), + Mask::::from_array(values).into() ); } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index f759394d075..3a02f3f01e1 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -7,7 +7,7 @@ macro_rules! impl_unary_op_test { test_helpers::test_lanes! { fn $fn() { test_helpers::test_unary_elementwise( - & as core::ops::$trait>::$fn, + & as core::ops::$trait>::$fn, &$scalar_fn, &|_| true, ); @@ -27,7 +27,7 @@ macro_rules! impl_binary_op_test { { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr } => { mod $fn { use super::*; - use core_simd::Simd; + use core_simd::simd::Simd; test_helpers::test_lanes! { fn normal() { @@ -64,7 +64,7 @@ macro_rules! impl_binary_checked_op_test { { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr, $check_fn:expr } => { mod $fn { use super::*; - use core_simd::Simd; + use core_simd::simd::Simd; test_helpers::test_lanes! { fn normal() { @@ -173,7 +173,7 @@ macro_rules! impl_signed_tests { { $scalar:tt } => { mod $scalar { use core_simd::simd::SimdInt; - type Vector = core_simd::Simd; + type Vector = core_simd::simd::Simd; type Scalar = $scalar; impl_common_integer_tests! { Vector, Scalar } @@ -314,7 +314,7 @@ macro_rules! impl_unsigned_tests { { $scalar:tt } => { mod $scalar { use core_simd::simd::SimdUint; - type Vector = core_simd::Simd; + type Vector = core_simd::simd::Simd; type Scalar = $scalar; impl_common_integer_tests! { Vector, Scalar } @@ -348,8 +348,8 @@ macro_rules! impl_unsigned_tests { macro_rules! impl_float_tests { { $scalar:tt, $int_scalar:tt } => { mod $scalar { - use core_simd::SimdFloat; - type Vector = core_simd::Simd; + use core_simd::simd::SimdFloat; + type Vector = core_simd::simd::Simd; type Scalar = $scalar; impl_unary_op_test!(Scalar, Neg::neg); diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs index 2b0008624ad..0ae8f83b8b9 100644 --- a/crates/core_simd/tests/pointers.rs +++ b/crates/core_simd/tests/pointers.rs @@ -1,6 +1,6 @@ #![feature(portable_simd, strict_provenance)] -use core_simd::{Simd, SimdConstPtr, SimdMutPtr}; +use core_simd::simd::{Simd, SimdConstPtr, SimdMutPtr}; macro_rules! common_tests { { $constness:ident } => { diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 484fd5bf47d..8b9638ad466 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -5,7 +5,7 @@ macro_rules! float_rounding_test { mod $scalar { use std_float::StdFloat; - type Vector = core_simd::Simd<$scalar, LANES>; + type Vector = core_simd::simd::Simd<$scalar, LANES>; type Scalar = $scalar; type IntScalar = $int_scalar; diff --git a/crates/core_simd/tests/swizzle.rs b/crates/core_simd/tests/swizzle.rs index 33a7becb421..8cd7c33e823 100644 --- a/crates/core_simd/tests/swizzle.rs +++ b/crates/core_simd/tests/swizzle.rs @@ -1,5 +1,5 @@ #![feature(portable_simd)] -use core_simd::{Simd, Swizzle}; +use core_simd::simd::{Simd, Swizzle}; #[cfg(target_arch = "wasm32")] use wasm_bindgen_test::*; diff --git a/crates/core_simd/tests/to_bytes.rs b/crates/core_simd/tests/to_bytes.rs index debb4335e2c..be0ee4349c5 100644 --- a/crates/core_simd/tests/to_bytes.rs +++ b/crates/core_simd/tests/to_bytes.rs @@ -2,7 +2,7 @@ #![allow(incomplete_features)] #![cfg(feature = "generic_const_exprs")] -use core_simd::Simd; +use core_simd::simd::Simd; #[test] fn byte_convert() { diff --git a/crates/core_simd/tests/try_from_slice.rs b/crates/core_simd/tests/try_from_slice.rs index 189c18c6039..859e3b94f2c 100644 --- a/crates/core_simd/tests/try_from_slice.rs +++ b/crates/core_simd/tests/try_from_slice.rs @@ -6,7 +6,7 @@ use wasm_bindgen_test::*; #[cfg(target_arch = "wasm32")] wasm_bindgen_test_configure!(run_in_browser); -use core_simd::i32x4; +use core_simd::simd::i32x4; #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 5f2a928b5e4..b26cdc311a2 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -401,7 +401,7 @@ macro_rules! test_lanes { fn implementation() where - core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount, + core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount, $body #[cfg(target_arch = "wasm32")] @@ -508,7 +508,7 @@ macro_rules! test_lanes_panic { fn implementation() where - core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount, + core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount, $body $crate::test_lanes_helper!( From 54b6f6923e281ba68d13269b43faa927c6df83d5 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Mon, 28 Nov 2022 06:03:32 -0800 Subject: [PATCH 125/161] Avoid a scalar loop in `Simd::from_slice` --- crates/core_simd/src/lib.rs | 1 + crates/core_simd/src/vector.rs | 11 ++++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 82873162969..34b79e630a4 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,5 +1,6 @@ #![no_std] #![feature( + const_ptr_read, convert_float_to_int, decl_macro, intra_doc_pointers, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index d109087eaa6..51b0d999a81 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -174,13 +174,10 @@ where slice.len() >= LANES, "slice length must be at least the number of lanes" ); - let mut array = [slice[0]; LANES]; - let mut i = 0; - while i < LANES { - array[i] = slice[i]; - i += 1; - } - Self(array) + // Safety: + // - We've checked the length is sufficient. + // - `T` and `Simd` are Copy types. + unsafe { slice.as_ptr().cast::().read_unaligned() } } /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type. From df3a63906c44b23de7065d60c20bf99e2571ccc8 Mon Sep 17 00:00:00 2001 From: miguel raz Date: Fri, 4 Jun 2021 14:24:47 -0500 Subject: [PATCH 126/161] add dot_product example --- crates/core_simd/examples/dot_product.rs | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 crates/core_simd/examples/dot_product.rs diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs new file mode 100644 index 00000000000..812b0b23eeb --- /dev/null +++ b/crates/core_simd/examples/dot_product.rs @@ -0,0 +1,31 @@ +// Code taken from the `packed_simd` crate +// Run this code with `cargo test --example dot_product` +#![feature(array_chunks)] +use core_simd::*; + +pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + + // TODO handle remainder when a.len() % 4 != 0 + a.array_chunks::<4>() + .map(|&a| f32x4::from_array(a)) + .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .map(|(a, b)| (a * b).horizontal_sum()) + .sum() +} + +fn main() { + // Empty main to make cargo happy +} + +#[cfg(test)] +mod tests { + #[test] + fn test() { + use super::*; + let a: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + let b: Vec = vec![-8.0, -7.0, -6.0, -5.0, 4.0, 3.0, 2.0, 1.0]; + + assert_eq!(0.0, dot_prod(&a, &b)); + } +} From c08a4d1f10473bfbdddf3d2eefc40e1194a633a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Sat, 26 Mar 2022 14:04:37 -0600 Subject: [PATCH 127/161] add more basic dot products and comments, README --- crates/core_simd/examples/README.md | 19 ++++++++++++++++ crates/core_simd/examples/dot_product.rs | 29 +++++++++++++++++++++--- 2 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 crates/core_simd/examples/README.md diff --git a/crates/core_simd/examples/README.md b/crates/core_simd/examples/README.md new file mode 100644 index 00000000000..b37dffa8eaa --- /dev/null +++ b/crates/core_simd/examples/README.md @@ -0,0 +1,19 @@ +### `stdsimd` examples + +This crate is a port of example uses of `stdsimd`, mostly taken from the `packed_simd` crate. + +The examples contain, as in the case of `dot_product.rs`, multiple ways of solving the problem, in order to show idiomatic uses of SIMD and iteration of performance designs. + +Run the tests with the command + +``` +cargo run --example dot_product +``` + +and the benchmarks via the command + +``` +cargo run --example --benchmark ??? +``` + +and measure the timings on your local system. diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 812b0b23eeb..3e415fc4471 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -3,7 +3,27 @@ #![feature(array_chunks)] use core_simd::*; -pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 { +/// This is your barebones dot product implementation: +/// Take 2 vectors, multiply them element wise and *then* +/// add up the result. In the next example we will see if there +/// is any difference to adding as we go along multiplying. +pub fn dot_prod_0(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + + a.iter() + .zip(b.iter()) + .map(|a, b| a * b) + .sum() +} + +pub fn dot_prod_1(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + a.iter() + .zip(b.iter()) + .fold(0.0, |a, b| a * b) +} + +pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); // TODO handle remainder when a.len() % 4 != 0 @@ -21,11 +41,14 @@ fn main() { #[cfg(test)] mod tests { #[test] - fn test() { + fn smoke_test() { use super::*; let a: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; let b: Vec = vec![-8.0, -7.0, -6.0, -5.0, 4.0, 3.0, 2.0, 1.0]; - assert_eq!(0.0, dot_prod(&a, &b)); + assert_eq!(0.0, dot_prod_0(&a, &b)); + assert_eq!(0.0, dot_prod_1(&a, &b)); + assert_eq!(0.0, dot_prod_simd_0(&a, &b)); + assert_eq!(0.0, dot_prod_simd_1(&a, &b)); } } From 4615805ec2ce44c37792df3b5b179a795f57542b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Sat, 26 Mar 2022 16:10:25 -0600 Subject: [PATCH 128/161] add remainder dot_product and cleanup cleanup dot_product and README.md --- crates/core_simd/examples/README.md | 8 +- crates/core_simd/examples/dot_product.rs | 106 ++++++++++++++++++++--- 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/crates/core_simd/examples/README.md b/crates/core_simd/examples/README.md index b37dffa8eaa..82747f1b5a6 100644 --- a/crates/core_simd/examples/README.md +++ b/crates/core_simd/examples/README.md @@ -10,10 +10,4 @@ Run the tests with the command cargo run --example dot_product ``` -and the benchmarks via the command - -``` -cargo run --example --benchmark ??? -``` - -and measure the timings on your local system. +and verify the code for `dot_product.rs` on your machine. diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 3e415fc4471..ed210192e2a 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -1,39 +1,113 @@ // Code taken from the `packed_simd` crate // Run this code with `cargo test --example dot_product` +//use std::iter::zip; + #![feature(array_chunks)] +#![feature(slice_as_chunks)] +// Add these imports to use the stdsimd library +#![feature(portable_simd)] use core_simd::*; -/// This is your barebones dot product implementation: -/// Take 2 vectors, multiply them element wise and *then* -/// add up the result. In the next example we will see if there -/// is any difference to adding as we go along multiplying. +// This is your barebones dot product implementation: +// Take 2 vectors, multiply them element wise and *then* +// go along the resulting array and add up the result. +// In the next example we will see if there +// is any difference to adding and multiplying in tandem. pub fn dot_prod_0(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); - a.iter() - .zip(b.iter()) - .map(|a, b| a * b) - .sum() + a.iter().zip(b.iter()).map(|(a, b)| a * b).sum() } +// When dealing with SIMD, it is very important to think about the amount +// of data movement and when it happens. We're going over simple computation examples here, and yet +// it is not trivial to understand what may or may not contribute to performance +// changes. Eventually, you will need tools to inspect the generated assembly and confirm your +// hypothesis and benchmarks - we will mention them later on. +// With the use of `fold`, we're doing a multiplication, +// and then adding it to the sum, one element from both vectors at a time. pub fn dot_prod_1(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); a.iter() - .zip(b.iter()) - .fold(0.0, |a, b| a * b) + .zip(b.iter()) + .fold(0.0, |a, zipped| a + zipped.0 * zipped.1) } +// We now move on to the SIMD implementations: notice the following constructs: +// `array_chunks::<4>`: mapping this over the vector will let use construct SIMD vectors +// `f32x4::from_array`: construct the SIMD vector from a slice +// `(a * b).reduce_sum()`: Multiply both f32x4 vectors together, and then reduce them. +// This approach essentially uses SIMD to produce a vector of length N/4 of all the products, +// and then add those with `sum()`. This is suboptimal. +// TODO: ASCII diagrams pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); - // TODO handle remainder when a.len() % 4 != 0 a.array_chunks::<4>() .map(|&a| f32x4::from_array(a)) .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) - .map(|(a, b)| (a * b).horizontal_sum()) + .map(|(a, b)| (a * b).reduce_sum()) .sum() } +// There's some simple ways to improve the previous code: +// 1. Make a `zero` `f32x4` SIMD vector that we will be accumulating into +// So that there is only one `sum()` reduction when the last `f32x4` has been processed +// 2. Exploit Fused Multiply Add so that the multiplication, addition and sinking into the reduciton +// happen in the same step. +// If the arrays are large, minimizing the data shuffling will lead to great perf. +// If the arrays are small, handling the remainder elements when the length isn't a multiple of 4 +// Can become a problem. +pub fn dot_prod_simd_1(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + // TODO handle remainder when a.len() % 4 != 0 + a.array_chunks::<4>() + .map(|&a| f32x4::from_array(a)) + .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .fold(f32x4::splat(0.0), |acc, zipped| acc + zipped.0 * zipped.1) + .reduce_sum() +} + +// A lot of knowledgeable use of SIMD comes from knowing specific instructions that are +// available - let's try to use the `mul_add` instruction, which is the fused-multiply-add we were looking for. +use std_float::StdFloat; +pub fn dot_prod_simd_2(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + // TODO handle remainder when a.len() % 4 != 0 + let mut res = f32x4::splat(0.0); + a.array_chunks::<4>() + .map(|&a| f32x4::from_array(a)) + .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .for_each(|(a, b)| { + res = a.mul_add(b, res); + }); + res.reduce_sum() +} + +// Finally, we will write the same operation but handling the loop remainder. +const LANES: usize = 4; +pub fn dot_prod_simd_3(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + + let (a_extra, a_chunks) = a.as_rchunks(); + let (b_extra, b_chunks) = b.as_rchunks(); + + // These are always true, but for emphasis: + assert_eq!(a_chunks.len(), b_chunks.len()); + assert_eq!(a_extra.len(), b_extra.len()); + + let mut sums = [0.0; LANES]; + for ((x, y), d) in std::iter::zip(a_extra, b_extra).zip(&mut sums) { + *d = x * y; + } + + let mut sums = f32x4::from_array(sums); + std::iter::zip(a_chunks, b_chunks).for_each(|(x, y)| { + sums += f32x4::from_array(*x) * f32x4::from_array(*y); + }); + + sums.reduce_sum() +} fn main() { // Empty main to make cargo happy } @@ -45,10 +119,18 @@ mod tests { use super::*; let a: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; let b: Vec = vec![-8.0, -7.0, -6.0, -5.0, 4.0, 3.0, 2.0, 1.0]; + let x: Vec = [0.5; 1003].to_vec(); + let y: Vec = [2.0; 1003].to_vec(); + // Basic check assert_eq!(0.0, dot_prod_0(&a, &b)); assert_eq!(0.0, dot_prod_1(&a, &b)); assert_eq!(0.0, dot_prod_simd_0(&a, &b)); assert_eq!(0.0, dot_prod_simd_1(&a, &b)); + assert_eq!(0.0, dot_prod_simd_2(&a, &b)); + assert_eq!(0.0, dot_prod_simd_3(&a, &b)); + + // We can handle vectors that are non-multiples of 4 + assert_eq!(1003.0, dot_prod_simd_3(&x, &y)); } } From 4ddfd2f3f8c547fa7c42a0f9a5979665262a30c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Tue, 29 Mar 2022 16:52:54 -0600 Subject: [PATCH 129/161] non allocating fold simd allocating fold with std::ops::Add::add --- crates/core_simd/examples/dot_product.rs | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index ed210192e2a..75d628ee392 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -108,6 +108,37 @@ pub fn dot_prod_simd_3(a: &[f32], b: &[f32]) -> f32 { sums.reduce_sum() } + +// Finally, we present an iterator version for handling remainders in a scalar fashion at the end of the loop. +// Unfortunately, this is allocating 1 `XMM` register on the order of `~len(a)` - we'll see how we can get around it in the +// next example. +pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 { + let mut sum = a + .array_chunks::<4>() + .map(|&a| f32x4::from_array(a)) + .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .map(|(a, b)| a * b) + .fold(f32x4::splat(0.0), std::ops::Add::add) + .reduce_sum(); + let remain = a.len() - (a.len() % 4); + sum += a[remain..] + .iter() + .zip(&b[remain..]) + .map(|(a, b)| a * b) + .sum::(); + sum +} + +// This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that. +// Notice the the use of `mul_add`, which can do a multiply and an add operation ber iteration. +pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 { + a.array_chunks::<4>() + .map(|&a| f32x4::from_array(a)) + .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .fold(f32x4::splat(0.), |acc, (a, b)| acc.mul_add(a, b)) + .reduce_sum() +} + fn main() { // Empty main to make cargo happy } From aeac9ed37339c463a6a155b12135b7f167611e26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Tue, 29 Mar 2022 17:36:47 -0600 Subject: [PATCH 130/161] proper mul_add arg order, added tests --- crates/core_simd/examples/dot_product.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 75d628ee392..84824c2e5c4 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -135,7 +135,7 @@ pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 { a.array_chunks::<4>() .map(|&a| f32x4::from_array(a)) .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) - .fold(f32x4::splat(0.), |acc, (a, b)| acc.mul_add(a, b)) + .fold(f32x4::splat(0.), |acc, (a, b)| a.mul_add(b, acc)) .reduce_sum() } @@ -160,6 +160,8 @@ mod tests { assert_eq!(0.0, dot_prod_simd_1(&a, &b)); assert_eq!(0.0, dot_prod_simd_2(&a, &b)); assert_eq!(0.0, dot_prod_simd_3(&a, &b)); + assert_eq!(0.0, dot_prod_simd_4(&a, &b)); + assert_eq!(0.0, dot_prod_simd_5(&a, &b)); // We can handle vectors that are non-multiples of 4 assert_eq!(1003.0, dot_prod_simd_3(&x, &y)); From 64247a327d30a2d5fe7ad3d98f527bff1cc8fb85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Raz=20Guzm=C3=A1n=20Macedo?= Date: Wed, 30 Mar 2022 17:45:59 -0600 Subject: [PATCH 131/161] add _scalar names for dot_product examples --- crates/core_simd/examples/dot_product.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 84824c2e5c4..936741a2ceb 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -13,7 +13,7 @@ use core_simd::*; // go along the resulting array and add up the result. // In the next example we will see if there // is any difference to adding and multiplying in tandem. -pub fn dot_prod_0(a: &[f32], b: &[f32]) -> f32 { +pub fn dot_prod_scalar_0(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); a.iter().zip(b.iter()).map(|(a, b)| a * b).sum() @@ -26,7 +26,7 @@ pub fn dot_prod_0(a: &[f32], b: &[f32]) -> f32 { // hypothesis and benchmarks - we will mention them later on. // With the use of `fold`, we're doing a multiplication, // and then adding it to the sum, one element from both vectors at a time. -pub fn dot_prod_1(a: &[f32], b: &[f32]) -> f32 { +pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); a.iter() .zip(b.iter()) @@ -154,8 +154,8 @@ mod tests { let y: Vec = [2.0; 1003].to_vec(); // Basic check - assert_eq!(0.0, dot_prod_0(&a, &b)); - assert_eq!(0.0, dot_prod_1(&a, &b)); + assert_eq!(0.0, dot_prod_scalar_0(&a, &b)); + assert_eq!(0.0, dot_prod_scalar_1(&a, &b)); assert_eq!(0.0, dot_prod_simd_0(&a, &b)); assert_eq!(0.0, dot_prod_simd_1(&a, &b)); assert_eq!(0.0, dot_prod_simd_2(&a, &b)); From da3bd6d3a04f84ebc7fc6314f2e1f8a74e379018 Mon Sep 17 00:00:00 2001 From: The Atelier Date: Sat, 3 Dec 2022 18:40:07 -0800 Subject: [PATCH 132/161] Update dot_product example import --- crates/core_simd/examples/dot_product.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 936741a2ceb..391f08f55a0 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -6,7 +6,7 @@ #![feature(slice_as_chunks)] // Add these imports to use the stdsimd library #![feature(portable_simd)] -use core_simd::*; +use core_simd::simd::*; // This is your barebones dot product implementation: // Take 2 vectors, multiply them element wise and *then* From e3ef226f7b33e7257d0e549046bed44cabfd5585 Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Mon, 23 Jan 2023 11:00:35 +0700 Subject: [PATCH 133/161] Fix the typo --- crates/core_simd/src/masks/to_bitmask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs index 46914dfe0d9..fc7d6b781f2 100644 --- a/crates/core_simd/src/masks/to_bitmask.rs +++ b/crates/core_simd/src/masks/to_bitmask.rs @@ -72,7 +72,7 @@ impl_integer_intrinsic! { impl ToBitMask for Mask<_, 64> } -/// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes. +/// Returns the minimum number of bytes in a bitmask with `lanes` lanes. #[cfg(feature = "generic_const_exprs")] pub const fn bitmask_len(lanes: usize) -> usize { (lanes + 7) / 8 From 0fd7c8e138db1362e3cba9cdb40403dc7a83364b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 19 Feb 2023 12:21:27 -0500 Subject: [PATCH 134/161] Add copy_to_slice --- crates/core_simd/src/vector.rs | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 51b0d999a81..870c2eefee1 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -159,7 +159,7 @@ where /// /// Panics if the slice's length is less than the vector's `Simd::LANES`. /// - /// # Examples + /// # Example /// /// ``` /// # #![feature(portable_simd)] @@ -180,6 +180,35 @@ where unsafe { slice.as_ptr().cast::().read_unaligned() } } + /// Writes a SIMD vector to the first `LANES` elements of a slice. + /// + /// # Panics + /// + /// Panics if the slice's length is less than the vector's `Simd::LANES`. + /// + /// # Example + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::u32x4; + /// let mut dest = vec![0; 6]; + /// let v = u32x4::from_array([1, 2, 3, 4]); + /// v.copy_to_slice(&mut dest); + /// assert_eq!(&dest, &[1, 2, 3, 4, 0, 0]); + /// ``` + pub fn copy_to_slice(self, slice: &mut [T]) { + assert!( + slice.len() >= LANES, + "slice length must be at least the number of lanes" + ); + // Safety: + // - We've checked the length is sufficient + // - `T` and `Simd` are Copy types. + unsafe { slice.as_mut_ptr().cast::().write_unaligned(self) } + } + /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type. /// /// This follows the semantics of Rust's `as` conversion for casting From 36829ddca7de02b4d8bad31bdfb0fbc83664017b Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 19 Feb 2023 15:35:36 -0500 Subject: [PATCH 135/161] Check that vectors aren't padded --- crates/core_simd/src/vector.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 870c2eefee1..3e39f1d623c 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -174,6 +174,7 @@ where slice.len() >= LANES, "slice length must be at least the number of lanes" ); + assert!(core::mem::size_of::() == LANES * core::mem::size_of::()); // Safety: // - We've checked the length is sufficient. // - `T` and `Simd` are Copy types. @@ -203,6 +204,7 @@ where slice.len() >= LANES, "slice length must be at least the number of lanes" ); + assert!(core::mem::size_of::() == LANES * core::mem::size_of::()); // Safety: // - We've checked the length is sufficient // - `T` and `Simd` are Copy types. From 65b5210bdbb3a7af57e5c39d41424ba260ee3fbc Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 26 Mar 2023 12:56:35 +0200 Subject: [PATCH 136/161] Skip building wasm-bindgen-test on non-wasm targets This reduces compilation time --- crates/core_simd/Cargo.toml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index 7435e24edd3..d1a3a515a7e 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -15,11 +15,9 @@ std = [] generic_const_exprs = [] all_lane_counts = [] -[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen] -version = "0.2" - -[dev-dependencies.wasm-bindgen-test] -version = "0.3" +[target.'cfg(target_arch = "wasm32")'.dev-dependencies] +wasm-bindgen = "0.2" +wasm-bindgen-test = "0.3" [dev-dependencies.proptest] version = "0.10" From 90f2af774ae3149ad52ec6bb2d48649b72844a2c Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 26 Mar 2023 16:11:05 -0400 Subject: [PATCH 137/161] Fix lint --- crates/test_helpers/src/array.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/test_helpers/src/array.rs b/crates/test_helpers/src/array.rs index 5ffc9226976..984a427320d 100644 --- a/crates/test_helpers/src/array.rs +++ b/crates/test_helpers/src/array.rs @@ -41,6 +41,7 @@ where fn new_tree(&self, runner: &mut TestRunner) -> NewTree { let tree: [S::Tree; LANES] = unsafe { + #[allow(clippy::uninit_assumed_init)] let mut tree: [MaybeUninit; LANES] = MaybeUninit::uninit().assume_init(); for t in tree.iter_mut() { *t = MaybeUninit::new(self.strategy.new_tree(runner)?) @@ -60,6 +61,7 @@ impl ValueTree for ArrayValueTree<[T; LANES]> fn current(&self) -> Self::Value { unsafe { + #[allow(clippy::uninit_assumed_init)] let mut value: [MaybeUninit; LANES] = MaybeUninit::uninit().assume_init(); for (tree_elem, value_elem) in self.tree.iter().zip(value.iter_mut()) { *value_elem = MaybeUninit::new(tree_elem.current()); From ceb26115928c5c69b10268fd2f9e500865c142d6 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Sun, 9 Apr 2023 21:26:40 -0700 Subject: [PATCH 138/161] Remove formats `[T; N]` does not impl (rust-lang/portable-simd#337) Remove these extra formatting traits, as they are inconsistent with how arrays and slices format, and it can cause unnecessary code bloat in binaries. We can revisit this if people ever agree on doing these formatters for the other slice-y types. Prefer to dispatch to the `impl `fmt::Debug for [T]`, to reduce the chances of monomorphizing twice. Inlining it seems like a good idea for similar reasons? --- crates/core_simd/src/fmt.rs | 50 ++++++++++++------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/crates/core_simd/src/fmt.rs b/crates/core_simd/src/fmt.rs index dbd9839c4bf..b7317969cbb 100644 --- a/crates/core_simd/src/fmt.rs +++ b/crates/core_simd/src/fmt.rs @@ -1,39 +1,21 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::fmt; -macro_rules! impl_fmt_trait { - { $($trait:ident,)* } => { - $( - impl fmt::$trait for Simd - where - LaneCount: SupportedLaneCount, - T: SimdElement + fmt::$trait, - { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[repr(transparent)] - struct Wrapper<'a, T: fmt::$trait>(&'a T); - - impl fmt::Debug for Wrapper<'_, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } - } - - f.debug_list() - .entries(self.as_array().iter().map(|x| Wrapper(x))) - .finish() - } - } - )* +impl fmt::Debug for Simd +where + LaneCount: SupportedLaneCount, + T: SimdElement + fmt::Debug, +{ + /// A `Simd` has a debug format like the one for `[T]`: + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd::Simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd::Simd; + /// let floats = Simd::::splat(-1.0); + /// assert_eq!(format!("{:?}", [-1.0; 4]), format!("{:?}", floats)); + /// ``` + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + <[T] as fmt::Debug>::fmt(self.as_array(), f) } } - -impl_fmt_trait! { - Debug, - Binary, - LowerExp, - UpperExp, - Octal, - LowerHex, - UpperHex, -} From afad9c3f644ddbfef3301f617cb9d23ca4e71fe0 Mon Sep 17 00:00:00 2001 From: Markus Everling Date: Sat, 22 Apr 2023 21:12:35 +0000 Subject: [PATCH 139/161] Don't use direct field access in `Simd` functions --- crates/core_simd/src/lib.rs | 2 ++ crates/core_simd/src/vector.rs | 26 ++++++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 927b1654f8e..a372e2e40c4 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,6 +1,8 @@ #![no_std] #![feature( const_ptr_read, + const_refs_to_cell, + const_transmute_copy, convert_float_to_int, decl_macro, intra_doc_pointers, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 3e39f1d623c..c1af4af5f57 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -135,22 +135,32 @@ where /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); /// ``` pub const fn as_array(&self) -> &[T; LANES] { - &self.0 + // SAFETY: Transmuting between `Simd` and `[T; LANES]` + // is always valid and `Simd` never has a lower alignment + // than `[T; LANES]`. + unsafe { &*(self as *const Self as *const [T; LANES]) } } /// Returns a mutable array reference containing the entire SIMD vector. pub fn as_mut_array(&mut self) -> &mut [T; LANES] { - &mut self.0 + // SAFETY: Transmuting between `Simd` and `[T; LANES]` + // is always valid and `Simd` never has a lower alignment + // than `[T; LANES]`. + unsafe { &mut *(self as *mut Self as *mut [T; LANES]) } } /// Converts an array to a SIMD vector. pub const fn from_array(array: [T; LANES]) -> Self { - Self(array) + // SAFETY: Transmuting between `Simd` and `[T; LANES]` + // is always valid. + unsafe { core::mem::transmute_copy(&array) } } /// Converts a SIMD vector to an array. pub const fn to_array(self) -> [T; LANES] { - self.0 + // SAFETY: Transmuting between `Simd` and `[T; LANES]` + // is always valid. + unsafe { core::mem::transmute_copy(&self) } } /// Converts a slice to a SIMD vector containing `slice[..LANES]`. @@ -735,7 +745,7 @@ where { #[inline] fn as_ref(&self) -> &[T; LANES] { - &self.0 + self.as_array() } } @@ -746,7 +756,7 @@ where { #[inline] fn as_mut(&mut self) -> &mut [T; LANES] { - &mut self.0 + self.as_mut_array() } } @@ -758,7 +768,7 @@ where { #[inline] fn as_ref(&self) -> &[T] { - &self.0 + self.as_array() } } @@ -769,7 +779,7 @@ where { #[inline] fn as_mut(&mut self) -> &mut [T] { - &mut self.0 + self.as_mut_array() } } From 52833ccbe88ed98b73d0ccd7299f2a667439bb4b Mon Sep 17 00:00:00 2001 From: Markus Everling Date: Sat, 22 Apr 2023 23:02:45 +0000 Subject: [PATCH 140/161] Add notes to avoid direct field accesses --- crates/core_simd/src/vector.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index c1af4af5f57..eee105ff5fc 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -76,6 +76,11 @@ use crate::simd::{ /// [`read`]: pointer::read /// [`write`]: pointer::write /// [as_simd]: slice::as_simd +// +// NOTE: Accessing the inner array directly in any way (e.g. by using the `.0` field syntax) or +// directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be +// avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also +// causes rustc to emit illegal LLVM IR in some cases. #[repr(simd)] pub struct Simd([T; LANES]) where @@ -138,6 +143,9 @@ where // SAFETY: Transmuting between `Simd` and `[T; LANES]` // is always valid and `Simd` never has a lower alignment // than `[T; LANES]`. + // + // NOTE: This deliberately doesn't just use `&self.0`, see the comment + // on the struct definition for details. unsafe { &*(self as *const Self as *const [T; LANES]) } } @@ -146,6 +154,9 @@ where // SAFETY: Transmuting between `Simd` and `[T; LANES]` // is always valid and `Simd` never has a lower alignment // than `[T; LANES]`. + // + // NOTE: This deliberately doesn't just use `&mut self.0`, see the comment + // on the struct definition for details. unsafe { &mut *(self as *mut Self as *mut [T; LANES]) } } @@ -153,6 +164,9 @@ where pub const fn from_array(array: [T; LANES]) -> Self { // SAFETY: Transmuting between `Simd` and `[T; LANES]` // is always valid. + // + // NOTE: This deliberately doesn't just use `Self(array)`, see the comment + // on the struct definition for details. unsafe { core::mem::transmute_copy(&array) } } @@ -160,6 +174,9 @@ where pub const fn to_array(self) -> [T; LANES] { // SAFETY: Transmuting between `Simd` and `[T; LANES]` // is always valid. + // + // NOTE: This deliberately doesn't just use `self.0`, see the comment + // on the struct definition for details. unsafe { core::mem::transmute_copy(&self) } } From f1b86baf8453733c72e196ce2c08b4d85e94d81a Mon Sep 17 00:00:00 2001 From: Markus Everling Date: Sat, 22 Apr 2023 23:22:39 +0000 Subject: [PATCH 141/161] Use pointer reads for better codegen in debug mode --- crates/core_simd/src/lib.rs | 1 - crates/core_simd/src/vector.rs | 18 ++++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index a372e2e40c4..e054d483ca5 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -2,7 +2,6 @@ #![feature( const_ptr_read, const_refs_to_cell, - const_transmute_copy, convert_float_to_int, decl_macro, intra_doc_pointers, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index eee105ff5fc..a38d701588c 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -163,21 +163,31 @@ where /// Converts an array to a SIMD vector. pub const fn from_array(array: [T; LANES]) -> Self { // SAFETY: Transmuting between `Simd` and `[T; LANES]` - // is always valid. + // is always valid. We need to use `read_unaligned` here, since + // the array may have a lower alignment than the vector. + // + // FIXME: We currently use a pointer read instead of `transmute_copy` because + // it results in better codegen with optimizations disabled, but we should + // probably just use `transmute` once that works on const generic types. // // NOTE: This deliberately doesn't just use `Self(array)`, see the comment // on the struct definition for details. - unsafe { core::mem::transmute_copy(&array) } + unsafe { (&array as *const [T; LANES] as *const Self).read_unaligned() } } /// Converts a SIMD vector to an array. pub const fn to_array(self) -> [T; LANES] { // SAFETY: Transmuting between `Simd` and `[T; LANES]` - // is always valid. + // is always valid. No need to use `read_unaligned` here, since + // the vector never has a lower alignment than the array. + // + // FIXME: We currently use a pointer read instead of `transmute_copy` because + // it results in better codegen with optimizations disabled, but we should + // probably just use `transmute` once that works on const generic types. // // NOTE: This deliberately doesn't just use `self.0`, see the comment // on the struct definition for details. - unsafe { core::mem::transmute_copy(&self) } + unsafe { (&self as *const Self as *const [T; LANES]).read() } } /// Converts a slice to a SIMD vector containing `slice[..LANES]`. From 71d4c368509536f7277e9a1cb6e6286ba6de7911 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Fri, 17 Mar 2023 17:56:45 -0700 Subject: [PATCH 142/161] lane -> element for core::simd::Simd A while ago we began saying T, N instead of T, LANES in reference to Simd. At some point that leaked in to us checking in code with const N: usize. After a while, we had a discussion and agreed that "lanes", while common, is unnecessary jargon for Rust learners who aren't familiar with SIMD, and is fully interchangeable with terms for arrays like element and index. But we never acted on that. Let's update the main type's docs, at least. The example tweaks also enable removing a slated-for-removal nightly fn. --- crates/core_simd/src/vector.rs | 390 +++++++++++++++++---------------- 1 file changed, 199 insertions(+), 191 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index a38d701588c..154b467752b 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -3,48 +3,55 @@ use crate::simd::{ SimdPartialOrd, SupportedLaneCount, Swizzle, }; -/// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. +/// A SIMD vector with the shape of `[T; N]` but the operations of `T`. /// -/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does. -/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation, -/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping -/// the two arrays together and mapping the operator over each lane. +/// `Simd` supports the operators (+, *, etc.) that `T` does in "elementwise" fashion. +/// These take the element at each index from the left-hand side and right-hand side, +/// perform the operation, then return the result in the same index in a vector of equal size. +/// In other words, an elementwise operation is equivalent to a zip, then map. /// /// ```rust -/// # #![feature(array_zip, portable_simd)] +/// # #![feature(portable_simd)] /// # use core::simd::{Simd}; -/// let a0: [i32; 4] = [-2, 0, 2, 4]; -/// let a1 = [10, 9, 8, 7]; -/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs); -/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs); +/// # use core::array; +/// let a: [i32; 4] = [-2, 0, 2, 4]; +/// let b = [10, 9, 8, 7]; +/// let sum = array::from_fn(|i| a[i] + b[i]); +/// let prod = array::from_fn(|i| a[i] * b[i]); /// /// // `Simd` implements `From<[T; N]> -/// let (v0, v1) = (Simd::from(a0), Simd::from(a1)); +/// let (v, w) = (Simd::from(a), Simd::from(b)); /// // Which means arrays implement `Into>`. -/// assert_eq!(v0 + v1, zm_add.into()); -/// assert_eq!(v0 * v1, zm_mul.into()); +/// assert_eq!(v + w, sum.into()); +/// assert_eq!(v * w, prod.into()); /// ``` /// -/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping`]. +/// +/// `Simd` with integer elements treats operators as wrapping, as if `T` was [`Wrapping`]. /// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior. /// This means there is no warning on overflows, even in "debug" builds. /// For most applications where `Simd` is appropriate, it is "not a bug" to wrap, /// and even "debug builds" are unlikely to tolerate the loss of performance. /// You may want to consider using explicitly checked arithmetic if such is required. -/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable. +/// Division by zero on integers still causes a panic, so +/// you may want to consider using `f32` or `f64` if that is unacceptable. /// /// [`Wrapping`]: core::num::Wrapping /// /// # Layout -/// `Simd` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment. +/// `Simd` has a layout similar to `[T; N]` (identical "shapes"), with a greater alignment. /// `[T; N]` is aligned to `T`, but `Simd` will have an alignment based on both `T` and `N`. -/// It is thus sound to [`transmute`] `Simd` to `[T; N]`, and will typically optimize to zero cost, -/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide. +/// Thus it is sound to [`transmute`] `Simd` to `[T; N]` and should optimize to "zero cost", +/// but the reverse transmutation may require a copy the compiler cannot simply elide. /// /// # ABI "Features" -/// Due to Rust's safety guarantees, `Simd` is currently passed to and from functions via memory, not SIMD registers, -/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd` or return it. -/// The need for this may be corrected in the future. +/// Due to Rust's safety guarantees, `Simd` is currently passed and returned via memory, +/// not SIMD registers, except as an optimization. Using `#[inline]` on functions that accept +/// `Simd` or return it is recommended, at the cost of code generation time, as +/// inlining SIMD-using functions can omit a large function prolog or epilog and thus +/// improve both speed and code size. The need for this may be corrected in the future. +/// +/// Using `#[inline(always)]` still requires additional care. /// /// # Safe SIMD with Unsafe Rust /// @@ -55,18 +62,22 @@ use crate::simd::{ /// Thus, when using `unsafe` Rust to read and write `Simd` through [raw pointers], it is a good idea to first try with /// [`read_unaligned`] and [`write_unaligned`]. This is because: /// - [`read`] and [`write`] require full alignment (in this case, `Simd`'s alignment) -/// - the likely source for reading or destination for writing `Simd` is [`[T]`](slice) and similar types, aligned to `T` -/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior** -/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization -/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime +/// - `Simd` is often read from or written to [`[T]`](slice) and other types aligned to `T` +/// - combining these actions violates the `unsafe` contract and explodes the program into +/// a puff of **undefined behavior** +/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned +/// if it sees the optimization +/// - most contemporary processors with "aligned" and "unaligned" read and write instructions +/// exhibit no performance difference if the "unaligned" variant is aligned at runtime /// -/// By imposing less obligations, unaligned functions are less likely to make the program unsound, +/// Less obligations mean unaligned reads and writes are less likely to make the program unsound, /// and may be just as fast as stricter alternatives. -/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd]`, -/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail. -/// If these are not sufficient, then it is most ideal to design data structures to be already aligned -/// to the `Simd` you wish to use before using `unsafe` Rust to read or write. -/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first, +/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for +/// converting `[T]` to `[Simd]`, and allows soundly operating on an aligned SIMD body, +/// but it may cost more time when handling the scalar head and tail. +/// If these are not enough, it is most ideal to design data structures to be already aligned +/// to `mem::align_of::>()` before using `unsafe` Rust to read or write. +/// Other ways to compensate for these facts, like materializing `Simd` to or from an array first, /// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`]. /// /// [`transmute`]: core::mem::transmute @@ -82,20 +93,20 @@ use crate::simd::{ // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also // causes rustc to emit illegal LLVM IR in some cases. #[repr(simd)] -pub struct Simd([T; LANES]) +pub struct Simd([T; N]) where - T: SimdElement, - LaneCount: SupportedLaneCount; + LaneCount: SupportedLaneCount, + T: SimdElement; -impl Simd +impl Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { - /// Number of lanes in this vector. - pub const LANES: usize = LANES; + /// Number of elements in this vector. + pub const N: usize = N; - /// Returns the number of lanes in this SIMD vector. + /// Returns the number of elements in this SIMD vector. /// /// # Examples /// @@ -106,10 +117,10 @@ where /// assert_eq!(v.lanes(), 4); /// ``` pub const fn lanes(&self) -> usize { - LANES + Self::N } - /// Constructs a new SIMD vector with all lanes set to the given value. + /// Constructs a new SIMD vector with all elements set to the given value. /// /// # Examples /// @@ -120,11 +131,11 @@ where /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); /// ``` pub fn splat(value: T) -> Self { - // This is preferred over `[value; LANES]`, since it's explicitly a splat: + // This is preferred over `[value; N]`, since it's explicitly a splat: // https://github.com/rust-lang/rust/issues/97804 struct Splat; - impl Swizzle<1, LANES> for Splat { - const INDEX: [usize; LANES] = [0; LANES]; + impl Swizzle<1, N> for Splat { + const INDEX: [usize; N] = [0; N]; } Splat::swizzle(Simd::::from([value])) } @@ -139,30 +150,30 @@ where /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]); /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); /// ``` - pub const fn as_array(&self) -> &[T; LANES] { - // SAFETY: Transmuting between `Simd` and `[T; LANES]` - // is always valid and `Simd` never has a lower alignment - // than `[T; LANES]`. + pub const fn as_array(&self) -> &[T; N] { + // SAFETY: Transmuting between `Simd` and `[T; N]` + // is always valid and `Simd` never has a lower alignment + // than `[T; N]`. // // NOTE: This deliberately doesn't just use `&self.0`, see the comment // on the struct definition for details. - unsafe { &*(self as *const Self as *const [T; LANES]) } + unsafe { &*(self as *const Self as *const [T; N]) } } /// Returns a mutable array reference containing the entire SIMD vector. - pub fn as_mut_array(&mut self) -> &mut [T; LANES] { - // SAFETY: Transmuting between `Simd` and `[T; LANES]` - // is always valid and `Simd` never has a lower alignment - // than `[T; LANES]`. + pub fn as_mut_array(&mut self) -> &mut [T; N] { + // SAFETY: Transmuting between `Simd` and `[T; N]` + // is always valid and `Simd` never has a lower alignment + // than `[T; N]`. // // NOTE: This deliberately doesn't just use `&mut self.0`, see the comment // on the struct definition for details. - unsafe { &mut *(self as *mut Self as *mut [T; LANES]) } + unsafe { &mut *(self as *mut Self as *mut [T; N]) } } /// Converts an array to a SIMD vector. - pub const fn from_array(array: [T; LANES]) -> Self { - // SAFETY: Transmuting between `Simd` and `[T; LANES]` + pub const fn from_array(array: [T; N]) -> Self { + // SAFETY: Transmuting between `Simd` and `[T; N]` // is always valid. We need to use `read_unaligned` here, since // the array may have a lower alignment than the vector. // @@ -172,12 +183,12 @@ where // // NOTE: This deliberately doesn't just use `Self(array)`, see the comment // on the struct definition for details. - unsafe { (&array as *const [T; LANES] as *const Self).read_unaligned() } + unsafe { (&array as *const [T; N] as *const Self).read_unaligned() } } /// Converts a SIMD vector to an array. - pub const fn to_array(self) -> [T; LANES] { - // SAFETY: Transmuting between `Simd` and `[T; LANES]` + pub const fn to_array(self) -> [T; N] { + // SAFETY: Transmuting between `Simd` and `[T; N]` // is always valid. No need to use `read_unaligned` here, since // the vector never has a lower alignment than the array. // @@ -187,14 +198,14 @@ where // // NOTE: This deliberately doesn't just use `self.0`, see the comment // on the struct definition for details. - unsafe { (&self as *const Self as *const [T; LANES]).read() } + unsafe { (&self as *const Self as *const [T; N]).read() } } - /// Converts a slice to a SIMD vector containing `slice[..LANES]`. + /// Converts a slice to a SIMD vector containing `slice[..N]`. /// /// # Panics /// - /// Panics if the slice's length is less than the vector's `Simd::LANES`. + /// Panics if the slice's length is less than the vector's `Simd::N`. /// /// # Example /// @@ -208,21 +219,21 @@ where #[must_use] pub const fn from_slice(slice: &[T]) -> Self { assert!( - slice.len() >= LANES, - "slice length must be at least the number of lanes" + slice.len() >= Self::N, + "slice length must be at least the number of elements" ); - assert!(core::mem::size_of::() == LANES * core::mem::size_of::()); + assert!(core::mem::size_of::() == Self::N * core::mem::size_of::()); // Safety: // - We've checked the length is sufficient. // - `T` and `Simd` are Copy types. unsafe { slice.as_ptr().cast::().read_unaligned() } } - /// Writes a SIMD vector to the first `LANES` elements of a slice. + /// Writes a SIMD vector to the first `N` elements of a slice. /// /// # Panics /// - /// Panics if the slice's length is less than the vector's `Simd::LANES`. + /// Panics if the slice's length is less than the vector's `Simd::N`. /// /// # Example /// @@ -238,22 +249,22 @@ where /// ``` pub fn copy_to_slice(self, slice: &mut [T]) { assert!( - slice.len() >= LANES, - "slice length must be at least the number of lanes" + slice.len() >= Self::N, + "slice length must be at least the number of elements" ); - assert!(core::mem::size_of::() == LANES * core::mem::size_of::()); + assert!(core::mem::size_of::() == Self::N * core::mem::size_of::()); // Safety: // - We've checked the length is sufficient // - `T` and `Simd` are Copy types. unsafe { slice.as_mut_ptr().cast::().write_unaligned(self) } } - /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type. + /// Performs elementwise conversion of a SIMD vector's elements to another SIMD-valid type. /// - /// This follows the semantics of Rust's `as` conversion for casting - /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`), - /// and from floats to integers (truncating, or saturating at the limits) for each lane, - /// or vice versa. + /// This follows the semantics of Rust's `as` conversion for casting integers between + /// signed and unsigned (interpreting integers as 2s complement, so `-1` to `U::MAX` and + /// `1 << (U::BITS -1)` becoming `I::MIN` ), and from floats to integers (truncating, + /// or saturating at the limits) for each element. /// /// # Examples /// ``` @@ -274,7 +285,7 @@ where #[must_use] #[inline] #[cfg(not(bootstrap))] - pub fn cast(self) -> Simd + pub fn cast(self) -> Simd where T: SimdCast, { @@ -282,10 +293,10 @@ where unsafe { intrinsics::simd_as(self) } } - /// Lanewise casts pointers to another pointer type. + /// Casts a vector of pointers to another pointer type. #[must_use] #[inline] - pub fn cast_ptr(self) -> Simd + pub fn cast_ptr(self) -> Simd where T: SimdCastPtr, U: SimdElement, @@ -310,7 +321,7 @@ where /// [cast]: Simd::cast #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn to_int_unchecked(self) -> Simd + pub unsafe fn to_int_unchecked(self) -> Simd where T: core::convert::FloatToInt + SimdCast, I: SimdCast, @@ -320,79 +331,79 @@ where } /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. - /// If an index is out-of-bounds, the lane is instead selected from the `or` vector. + /// If an index is out-of-bounds, the element is instead selected from the `or` vector. /// /// # Examples /// ``` /// # #![feature(portable_simd)] /// # use core::simd::Simd; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// let idxs = Simd::from_array([9, 3, 0, 5]); // Note the index that is out-of-bounds /// let alt = Simd::from_array([-5, -4, -3, -2]); /// - /// let result = Simd::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds. + /// let result = Simd::gather_or(&vec, idxs, alt); /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15])); /// ``` #[must_use] #[inline] - pub fn gather_or(slice: &[T], idxs: Simd, or: Self) -> Self { + pub fn gather_or(slice: &[T], idxs: Simd, or: Self) -> Self { Self::gather_select(slice, Mask::splat(true), idxs, or) } - /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. - /// If an index is out-of-bounds, the lane is set to the default value for the type. + /// Reads from indices in `slice` to construct a SIMD vector. + /// If an index is out-of-bounds, the element is set to the default given by `T: Default`. /// /// # Examples /// ``` /// # #![feature(portable_simd)] /// # use core::simd::Simd; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// let idxs = Simd::from_array([9, 3, 0, 5]); // Note the index that is out-of-bounds /// - /// let result = Simd::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds. + /// let result = Simd::gather_or_default(&vec, idxs); /// assert_eq!(result, Simd::from_array([0, 13, 10, 15])); /// ``` #[must_use] #[inline] - pub fn gather_or_default(slice: &[T], idxs: Simd) -> Self + pub fn gather_or_default(slice: &[T], idxs: Simd) -> Self where T: Default, { Self::gather_or(slice, idxs, Self::splat(T::default())) } - /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. - /// The mask `enable`s all `true` lanes and disables all `false` lanes. - /// If an index is disabled or is out-of-bounds, the lane is selected from the `or` vector. + /// Reads from indices in `slice` to construct a SIMD vector. + /// The mask `enable`s all `true` indices and disables all `false` indices. + /// If an index is disabled or is out-of-bounds, the element is selected from the `or` vector. /// /// # Examples /// ``` /// # #![feature(portable_simd)] /// # use core::simd::{Simd, Mask}; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index /// let alt = Simd::from_array([-5, -4, -3, -2]); - /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane. + /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element /// - /// let result = Simd::gather_select(&vec, enable, idxs, alt); // Note the lane that is out-of-bounds. + /// let result = Simd::gather_select(&vec, enable, idxs, alt); /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); /// ``` #[must_use] #[inline] pub fn gather_select( slice: &[T], - enable: Mask, - idxs: Simd, + enable: Mask, + idxs: Simd, or: Self, ) -> Self { - let enable: Mask = enable & idxs.simd_lt(Simd::splat(slice.len())); - // Safety: We have masked-off out-of-bounds lanes. + let enable: Mask = enable & idxs.simd_lt(Simd::splat(slice.len())); + // Safety: We have masked-off out-of-bounds indices. unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) } } - /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. - /// The mask `enable`s all `true` lanes and disables all `false` lanes. - /// If an index is disabled, the lane is selected from the `or` vector. + /// Reads from indices in `slice` to construct a SIMD vector. + /// The mask `enable`s all `true` indices and disables all `false` indices. + /// If an index is disabled, the element is selected from the `or` vector. /// /// # Safety /// @@ -406,13 +417,13 @@ where /// # #[cfg(not(feature = "as_crate"))] use core::simd; /// # use simd::{Simd, SimdPartialOrd, Mask}; /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let idxs = Simd::from_array([9, 3, 0, 5]); + /// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index /// let alt = Simd::from_array([-5, -4, -3, -2]); - /// let enable = Mask::from_array([true, true, true, false]); // Note the final mask lane. + /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element /// // If this mask was used to gather, it would be unsound. Let's fix that. /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); /// - /// // We have masked the OOB lane, so it's safe to gather now. + /// // The out-of-bounds index has been masked, so it's safe to gather now. /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) }; /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2])); /// ``` @@ -422,18 +433,18 @@ where #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn gather_select_unchecked( slice: &[T], - enable: Mask, - idxs: Simd, + enable: Mask, + idxs: Simd, or: Self, ) -> Self { - let base_ptr = Simd::<*const T, LANES>::splat(slice.as_ptr()); + let base_ptr = Simd::<*const T, N>::splat(slice.as_ptr()); // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); // Safety: The caller is responsible for determining the indices are okay to read unsafe { Self::gather_select_ptr(ptrs, enable, or) } } - /// Read pointers elementwise into a SIMD vector. + /// Read elementwise from pointers into a SIMD vector. /// /// # Safety /// @@ -454,7 +465,7 @@ where #[must_use] #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn gather_ptr(source: Simd<*const T, LANES>) -> Self + pub unsafe fn gather_ptr(source: Simd<*const T, N>) -> Self where T: Default, { @@ -463,13 +474,14 @@ where unsafe { Self::gather_select_ptr(source, Mask::splat(true), Self::default()) } } - /// Conditionally read pointers elementwise into a SIMD vector. - /// The mask `enable`s all `true` lanes and disables all `false` lanes. - /// If a lane is disabled, the lane is selected from the `or` vector and no read is performed. + /// Conditionally read elementwise from pointers into a SIMD vector. + /// The mask `enable`s all `true` pointers and disables all `false` pointers. + /// If a pointer is disabled, the element is selected from the `or` vector, + /// and no read is performed. /// /// # Safety /// - /// Enabled lanes must satisfy the same conditions as [`core::ptr::read`]. + /// Enabled elements must satisfy the same conditions as [`core::ptr::read`]. /// /// # Example /// ``` @@ -488,8 +500,8 @@ where #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn gather_select_ptr( - source: Simd<*const T, LANES>, - enable: Mask, + source: Simd<*const T, N>, + enable: Mask, or: Self, ) -> Self { // Safety: The caller is responsible for upholding all invariants @@ -497,30 +509,31 @@ where } /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. - /// If two lanes in the scattered vector would write to the same index - /// only the last lane is guaranteed to actually be written. + /// If an index is out-of-bounds, the write is suppressed without panicking. + /// If two elements in the scattered vector would write to the same index + /// only the last element is guaranteed to actually be written. /// /// # Examples /// ``` /// # #![feature(portable_simd)] /// # use core::simd::Simd; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let idxs = Simd::from_array([9, 3, 0, 0]); + /// let idxs = Simd::from_array([9, 3, 0, 0]); // Note the duplicate index. /// let vals = Simd::from_array([-27, 82, -41, 124]); /// - /// vals.scatter(&mut vec, idxs); // index 0 receives two writes. + /// vals.scatter(&mut vec, idxs); // two logical writes means the last wins. /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]); /// ``` #[inline] - pub fn scatter(self, slice: &mut [T], idxs: Simd) { + pub fn scatter(self, slice: &mut [T], idxs: Simd) { self.scatter_select(slice, Mask::splat(true), idxs) } - /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`. - /// The mask `enable`s all `true` lanes and disables all `false` lanes. - /// If an enabled index is out-of-bounds, the lane is not written. - /// If two enabled lanes in the scattered vector would write to the same index, - /// only the last lane is guaranteed to actually be written. + /// Writes values from a SIMD vector to multiple potentially discontiguous indices in `slice`. + /// The mask `enable`s all `true` indices and disables all `false` indices. + /// If an enabled index is out-of-bounds, the write is suppressed without panicking. + /// If two enabled elements in the scattered vector would write to the same index, + /// only the last element is guaranteed to actually be written. /// /// # Examples /// ``` @@ -529,29 +542,24 @@ where /// # #[cfg(not(feature = "as_crate"))] use core::simd; /// # use simd::{Simd, Mask}; /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let idxs = Simd::from_array([9, 3, 0, 0]); + /// let idxs = Simd::from_array([9, 3, 0, 0]); // Includes an out-of-bounds index /// let vals = Simd::from_array([-27, 82, -41, 124]); - /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane. + /// let enable = Mask::from_array([true, true, true, false]); // Includes a masked element /// - /// vals.scatter_select(&mut vec, enable, idxs); // index 0's second write is masked, thus omitted. + /// vals.scatter_select(&mut vec, enable, idxs); // The last write is masked, thus omitted. /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); /// ``` #[inline] - pub fn scatter_select( - self, - slice: &mut [T], - enable: Mask, - idxs: Simd, - ) { - let enable: Mask = enable & idxs.simd_lt(Simd::splat(slice.len())); - // Safety: We have masked-off out-of-bounds lanes. + pub fn scatter_select(self, slice: &mut [T], enable: Mask, idxs: Simd) { + let enable: Mask = enable & idxs.simd_lt(Simd::splat(slice.len())); + // Safety: We have masked-off out-of-bounds indices. unsafe { self.scatter_select_unchecked(slice, enable, idxs) } } - /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`. - /// The mask `enable`s all `true` lanes and disables all `false` lanes. - /// If two enabled lanes in the scattered vector would write to the same index, - /// only the last lane is guaranteed to actually be written. + /// Writes values from a SIMD vector to multiple potentially discontiguous indices in `slice`. + /// The mask `enable`s all `true` indices and disables all `false` indices. + /// If two enabled elements in the scattered vector would write to the same index, + /// only the last element is guaranteed to actually be written. /// /// # Safety /// @@ -567,13 +575,13 @@ where /// let mut vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; /// let idxs = Simd::from_array([9, 3, 0, 0]); /// let vals = Simd::from_array([-27, 82, -41, 124]); - /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane. + /// let enable = Mask::from_array([true, true, true, false]); // Masks the final index /// // If this mask was used to scatter, it would be unsound. Let's fix that. /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len())); /// - /// // We have masked the OOB lane, so it's safe to scatter now. + /// // We have masked the OOB index, so it's safe to scatter now. /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); } - /// // index 0's second write is masked, thus was omitted. + /// // The second write to index 0 was masked, thus omitted. /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); /// ``` /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html @@ -582,8 +590,8 @@ where pub unsafe fn scatter_select_unchecked( self, slice: &mut [T], - enable: Mask, - idxs: Simd, + enable: Mask, + idxs: Simd, ) { // Safety: This block works with *mut T derived from &mut 'a [T], // which means it is delicate in Rust's borrowing model, circa 2021: @@ -597,7 +605,7 @@ where // 3. &mut [T] which will become our base ptr. unsafe { // Now Entering ☢️ *mut T Zone - let base_ptr = Simd::<*mut T, LANES>::splat(slice.as_mut_ptr()); + let base_ptr = Simd::<*mut T, N>::splat(slice.as_mut_ptr()); // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah @@ -626,18 +634,18 @@ where /// ``` #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, LANES>) { + pub unsafe fn scatter_ptr(self, dest: Simd<*mut T, N>) { // Safety: The caller is responsible for upholding all invariants unsafe { self.scatter_select_ptr(dest, Mask::splat(true)) } } /// Conditionally write pointers elementwise into a SIMD vector. - /// The mask `enable`s all `true` lanes and disables all `false` lanes. - /// If a lane is disabled, the write to that lane is skipped. + /// The mask `enable`s all `true` pointers and disables all `false` pointers. + /// If a pointer is disabled, the write to its pointee is skipped. /// /// # Safety /// - /// Enabled lanes must satisfy the same conditions as [`core::ptr::write`]. + /// Enabled pointers must satisfy the same conditions as [`core::ptr::write`]. /// /// # Example /// ``` @@ -654,32 +662,32 @@ where /// ``` #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, LANES>, enable: Mask) { + pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask) { // Safety: The caller is responsible for upholding all invariants unsafe { intrinsics::simd_scatter(self, dest, enable.to_int()) } } } -impl Copy for Simd +impl Copy for Simd where + LaneCount: SupportedLaneCount, T: SimdElement, - LaneCount: SupportedLaneCount, { } -impl Clone for Simd +impl Clone for Simd where + LaneCount: SupportedLaneCount, T: SimdElement, - LaneCount: SupportedLaneCount, { fn clone(&self) -> Self { *self } } -impl Default for Simd +impl Default for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement + Default, { #[inline] @@ -688,20 +696,20 @@ where } } -impl PartialEq for Simd +impl PartialEq for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement + PartialEq, { #[inline] fn eq(&self, other: &Self) -> bool { // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. let mask = unsafe { - let tfvec: Simd<::Mask, LANES> = intrinsics::simd_eq(*self, *other); + let tfvec: Simd<::Mask, N> = intrinsics::simd_eq(*self, *other); Mask::from_int_unchecked(tfvec) }; - // Two vectors are equal if all lanes tested true for vertical equality. + // Two vectors are equal if they are elementwise equal mask.all() } @@ -710,18 +718,18 @@ where fn ne(&self, other: &Self) -> bool { // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. let mask = unsafe { - let tfvec: Simd<::Mask, LANES> = intrinsics::simd_ne(*self, *other); + let tfvec: Simd<::Mask, N> = intrinsics::simd_ne(*self, *other); Mask::from_int_unchecked(tfvec) }; - // Two vectors are non-equal if any lane tested true for vertical non-equality. + // Two vectors are non-equal if they are elementwise non-equal mask.any() } } -impl PartialOrd for Simd +impl PartialOrd for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement + PartialOrd, { #[inline] @@ -731,16 +739,16 @@ where } } -impl Eq for Simd +impl Eq for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement + Eq, { } -impl Ord for Simd +impl Ord for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement + Ord, { #[inline] @@ -750,9 +758,9 @@ where } } -impl core::hash::Hash for Simd +impl core::hash::Hash for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement + core::hash::Hash, { #[inline] @@ -765,32 +773,32 @@ where } // array references -impl AsRef<[T; LANES]> for Simd +impl AsRef<[T; N]> for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] - fn as_ref(&self) -> &[T; LANES] { + fn as_ref(&self) -> &[T; N] { self.as_array() } } -impl AsMut<[T; LANES]> for Simd +impl AsMut<[T; N]> for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] - fn as_mut(&mut self) -> &mut [T; LANES] { + fn as_mut(&mut self) -> &mut [T; N] { self.as_mut_array() } } // slice references -impl AsRef<[T]> for Simd +impl AsRef<[T]> for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -799,9 +807,9 @@ where } } -impl AsMut<[T]> for Simd +impl AsMut<[T]> for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -811,29 +819,29 @@ where } // vector/array conversion -impl From<[T; LANES]> for Simd +impl From<[T; N]> for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { - fn from(array: [T; LANES]) -> Self { + fn from(array: [T; N]) -> Self { Self(array) } } -impl From> for [T; LANES] +impl From> for [T; N] where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { - fn from(vector: Simd) -> Self { + fn from(vector: Simd) -> Self { vector.to_array() } } -impl TryFrom<&[T]> for Simd +impl TryFrom<&[T]> for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { type Error = core::array::TryFromSliceError; @@ -843,9 +851,9 @@ where } } -impl TryFrom<&mut [T]> for Simd +impl TryFrom<&mut [T]> for Simd where - LaneCount: SupportedLaneCount, + LaneCount: SupportedLaneCount, T: SimdElement, { type Error = core::array::TryFromSliceError; From 92259a4a6c20b02e87e0589a286bef7b71cd95a9 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Mon, 10 Apr 2023 00:11:37 -0700 Subject: [PATCH 143/161] Clarify elementwise cmp reduces Saying "elementwise (non-)equal" may suggest it returns a vector. The comments should be clear that it instead reduces to a scalar. Co-authored-by: Jacob Lifshay --- crates/core_simd/src/vector.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 154b467752b..b7b5e0b002f 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -709,7 +709,7 @@ where Mask::from_int_unchecked(tfvec) }; - // Two vectors are equal if they are elementwise equal + // Two vectors are equal if all elements are equal when compared elementwise mask.all() } @@ -722,7 +722,7 @@ where Mask::from_int_unchecked(tfvec) }; - // Two vectors are non-equal if they are elementwise non-equal + // Two vectors are non-equal if any elements are non-equal when compared elementwise mask.any() } } From 4064678dafd3907253353a1efc01bc0ada78c1bc Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Mon, 10 Apr 2023 22:06:01 -0700 Subject: [PATCH 144/161] Explain why to use Simd early --- crates/core_simd/src/vector.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index b7b5e0b002f..ef67fcfeee6 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -8,7 +8,12 @@ use crate::simd::{ /// `Simd` supports the operators (+, *, etc.) that `T` does in "elementwise" fashion. /// These take the element at each index from the left-hand side and right-hand side, /// perform the operation, then return the result in the same index in a vector of equal size. -/// In other words, an elementwise operation is equivalent to a zip, then map. +/// However, `Simd` differs from normal iteration and normal arrays: +/// - `Simd` executes `N` operations in a single step with no `break`s +/// - `Simd` can have an alignment greater than `T`, for better mechanical sympathy +/// +/// By always imposing these constraints on `Simd`, it is easier to compile elementwise operations +/// into machine instructions that can themselves be executed in parallel. /// /// ```rust /// # #![feature(portable_simd)] From 2b32732d0f64a27560c9c4ca15e89bc454c482da Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sat, 22 Apr 2023 18:22:04 -0700 Subject: [PATCH 145/161] Do not construct Simd --- crates/core_simd/src/vector.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index ef67fcfeee6..106f1965959 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -830,7 +830,7 @@ where T: SimdElement, { fn from(array: [T; N]) -> Self { - Self(array) + Self::from_array(array) } } From 4f0d8225fa6e503ba785da93b0ab900d597af133 Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Sat, 22 Apr 2023 18:27:52 -0700 Subject: [PATCH 146/161] Implement dynamic byte-swizzle prototype (rust-lang/portable-simd#334) This is meant to be an example that is used to test a Rust intrinsic against, which will replace it. The interface is fairly direct and doesn't address more nuanced or interesting permutations one can do, nevermind on types other than bytes. The ultimate goal is for direct LLVM support for this. --- crates/core_simd/src/mod.rs | 2 + crates/core_simd/src/swizzle_dyn.rs | 155 ++++++++++++++++++++++++++ crates/core_simd/tests/swizzle_dyn.rs | 74 ++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 crates/core_simd/src/swizzle_dyn.rs create mode 100644 crates/core_simd/tests/swizzle_dyn.rs diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index ece026a448b..35c659b7a42 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -17,6 +17,7 @@ mod masks; mod ops; mod ord; mod select; +mod swizzle_dyn; mod vector; mod vendor; @@ -32,5 +33,6 @@ pub mod simd { pub use crate::core_simd::masks::*; pub use crate::core_simd::ord::*; pub use crate::core_simd::swizzle::*; + pub use crate::core_simd::swizzle_dyn::*; pub use crate::core_simd::vector::*; } diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs new file mode 100644 index 00000000000..5c3a2c1824f --- /dev/null +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -0,0 +1,155 @@ +use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use core::mem; + +impl Simd +where + LaneCount: SupportedLaneCount, +{ + /// Swizzle a vector of bytes according to the index vector. + /// Indices within range select the appropriate byte. + /// Indices "out of bounds" instead select 0. + /// + /// Note that the current implementation is selected during build-time + /// of the standard library, so `cargo build -Zbuild-std` may be necessary + /// to unlock better performance, especially for larger vectors. + /// A planned compiler improvement will enable using `#[target_feature]` instead. + #[inline] + pub fn swizzle_dyn(self, idxs: Simd) -> Self { + #![allow(unused_imports, unused_unsafe)] + #[cfg(target_arch = "aarch64")] + use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8}; + #[cfg(all(target_arch = "arm", target_feature = "v7"))] + use core::arch::arm::{uint8x8_t, vtbl1_u8}; + #[cfg(target_arch = "wasm32")] + use core::arch::wasm32 as wasm; + #[cfg(target_arch = "x86")] + use core::arch::x86; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64 as x86; + // SAFETY: Intrinsics covered by cfg + unsafe { + match N { + #[cfg(target_feature = "neon")] + 8 => transize(vtbl1_u8, self, idxs), + #[cfg(target_feature = "ssse3")] + 16 => transize(x86::_mm_shuffle_epi8, self, idxs), + #[cfg(target_feature = "simd128")] + 16 => transize(wasm::i8x16_swizzle, self, idxs), + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + 16 => transize(vqtbl1q_u8, self, idxs), + #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))] + 32 => transize_raw(avx2_pshufb, self, idxs), + #[cfg(target_feature = "avx512vl,avx512vbmi")] + 32 => transize(x86::_mm256_permutexvar_epi8, self, idxs), + // Notable absence: avx512bw shuffle + // If avx512bw is available, odds of avx512vbmi are good + #[cfg(target_feature = "avx512vbmi")] + 64 => transize(x86::_mm512_permutexvar_epi8, self, idxs), + _ => { + let mut array = [0; N]; + for (i, k) in idxs.to_array().into_iter().enumerate() { + if (k as usize) < N { + array[i] = self[k as usize]; + }; + } + array.into() + } + } + } + } +} + +/// "vpshufb like it was meant to be" on AVX2 +/// +/// # Safety +/// This requires AVX2 to work +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx2")] +#[allow(unused)] +#[inline] +unsafe fn avx2_pshufb(bytes: Simd, idxs: Simd) -> Simd { + use crate::simd::SimdPartialOrd; + #[cfg(target_arch = "x86")] + use core::arch::x86; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64 as x86; + use x86::_mm256_permute2x128_si256 as avx2_cross_shuffle; + use x86::_mm256_shuffle_epi8 as avx2_half_pshufb; + let mid = Simd::splat(16u8); + let high = mid + mid; + // SAFETY: Caller promised AVX2 + unsafe { + // This is ordering sensitive, and LLVM will order these how you put them. + // Most AVX2 impls use ~5 "ports", and only 1 or 2 are capable of permutes. + // But the "compose" step will lower to ops that can also use at least 1 other port. + // So this tries to break up permutes so composition flows through "open" ports. + // Comparative benches should be done on multiple AVX2 CPUs before reordering this + + let hihi = avx2_cross_shuffle::<0x11>(bytes.into(), bytes.into()); + let hi_shuf = Simd::from(avx2_half_pshufb( + hihi, // duplicate the vector's top half + idxs.into(), // so that using only 4 bits of an index still picks bytes 16-31 + )); + // A zero-fill during the compose step gives the "all-Neon-like" OOB-is-0 semantics + let compose = idxs.simd_lt(high).select(hi_shuf, Simd::splat(0)); + let lolo = avx2_cross_shuffle::<0x00>(bytes.into(), bytes.into()); + let lo_shuf = Simd::from(avx2_half_pshufb(lolo, idxs.into())); + // Repeat, then pick indices < 16, overwriting indices 0-15 from previous compose step + let compose = idxs.simd_lt(mid).select(lo_shuf, compose); + compose + } +} + +/// This sets up a call to an architecture-specific function, and in doing so +/// it persuades rustc that everything is the correct size. Which it is. +/// This would not be needed if one could convince Rust that, by matching on N, +/// N is that value, and thus it would be valid to substitute e.g. 16. +/// +/// # Safety +/// The correctness of this function hinges on the sizes agreeing in actuality. +#[allow(dead_code)] +#[inline(always)] +unsafe fn transize( + f: unsafe fn(T, T) -> T, + bytes: Simd, + idxs: Simd, +) -> Simd +where + LaneCount: SupportedLaneCount, +{ + let idxs = zeroing_idxs(idxs); + // SAFETY: Same obligation to use this function as to use mem::transmute_copy. + unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) } +} + +/// Make indices that yield 0 for this architecture +#[inline(always)] +fn zeroing_idxs(idxs: Simd) -> Simd +where + LaneCount: SupportedLaneCount, +{ + // On x86, make sure the top bit is set. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + let idxs = { + use crate::simd::SimdPartialOrd; + idxs.simd_lt(Simd::splat(N as u8)) + .select(idxs, Simd::splat(u8::MAX)) + }; + // Simply do nothing on most architectures. + idxs +} + +/// As transize but no implicit call to `zeroing_idxs`. +#[allow(dead_code)] +#[inline(always)] +unsafe fn transize_raw( + f: unsafe fn(T, T) -> T, + bytes: Simd, + idxs: Simd, +) -> Simd +where + LaneCount: SupportedLaneCount, +{ + // SAFETY: Same obligation to use this function as to use mem::transmute_copy. + unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) } +} diff --git a/crates/core_simd/tests/swizzle_dyn.rs b/crates/core_simd/tests/swizzle_dyn.rs new file mode 100644 index 00000000000..646cd5f3383 --- /dev/null +++ b/crates/core_simd/tests/swizzle_dyn.rs @@ -0,0 +1,74 @@ +#![feature(portable_simd)] +use core::{fmt, ops::RangeInclusive}; +use proptest; +use test_helpers::{self, biteq, make_runner, prop_assert_biteq}; + +fn swizzle_dyn_scalar_ver(values: [u8; N], idxs: [u8; N]) -> [u8; N] { + let mut array = [0; N]; + for (i, k) in idxs.into_iter().enumerate() { + if (k as usize) < N { + array[i] = values[k as usize]; + }; + } + array +} + +test_helpers::test_lanes! { + fn swizzle_dyn() { + match_simd_with_fallback( + &core_simd::simd::Simd::::swizzle_dyn, + &swizzle_dyn_scalar_ver, + &|_, _| true, + ); + } +} + +fn match_simd_with_fallback( + fv: &dyn Fn(Vector, Vector) -> VectorResult, + fs: &dyn Fn([Scalar; N], [Scalar; N]) -> [ScalarResult; N], + check: &dyn Fn([Scalar; N], [Scalar; N]) -> bool, +) where + Scalar: Copy + fmt::Debug + SwizzleStrategy, + ScalarResult: Copy + biteq::BitEq + fmt::Debug + SwizzleStrategy, + Vector: Into<[Scalar; N]> + From<[Scalar; N]> + Copy, + VectorResult: Into<[ScalarResult; N]> + From<[ScalarResult; N]> + Copy, +{ + test_swizzles_2(&|x: [Scalar; N], y: [Scalar; N]| { + proptest::prop_assume!(check(x, y)); + let result_v: [ScalarResult; N] = fv(x.into(), y.into()).into(); + let result_s: [ScalarResult; N] = fs(x, y); + crate::prop_assert_biteq!(result_v, result_s); + Ok(()) + }); +} + +fn test_swizzles_2( + f: &dyn Fn(A, B) -> proptest::test_runner::TestCaseResult, +) { + let mut runner = make_runner(); + runner + .run( + &(A::swizzled_strategy(), B::swizzled_strategy()), + |(a, b)| f(a, b), + ) + .unwrap(); +} + +pub trait SwizzleStrategy { + type Strategy: proptest::strategy::Strategy; + fn swizzled_strategy() -> Self::Strategy; +} + +impl SwizzleStrategy for u8 { + type Strategy = RangeInclusive; + fn swizzled_strategy() -> Self::Strategy { + 0..=64 + } +} + +impl SwizzleStrategy for [T; N] { + type Strategy = test_helpers::array::UniformArrayStrategy; + fn swizzled_strategy() -> Self::Strategy { + Self::Strategy::new(T::swizzled_strategy()) + } +} From 394a8845c699b5c6b47c6a17e2926a549f8801be Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 23 Apr 2023 14:52:38 -0400 Subject: [PATCH 147/161] Fix {to,from}_array UB when repr(simd) produces padding --- crates/core_simd/src/lib.rs | 2 ++ crates/core_simd/src/vector.rs | 56 +++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index e054d483ca5..31e7a3617bc 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -2,6 +2,8 @@ #![feature( const_ptr_read, const_refs_to_cell, + const_maybe_uninit_as_mut_ptr, + const_mut_refs, convert_float_to_int, decl_macro, intra_doc_pointers, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 106f1965959..8c6c7036081 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -176,34 +176,62 @@ where unsafe { &mut *(self as *mut Self as *mut [T; N]) } } + /// Load a vector from an array of `T`. + /// + /// This function is necessary since `repr(simd)` has padding for non-power-of-2 vectors (at the time of writing). + /// With padding, `read_unaligned` will read past the end of an array of N elements. + /// + /// # Safety + /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`. + const unsafe fn load(ptr: *const [T; N]) -> Self { + let mut tmp = core::mem::MaybeUninit::uninit(); + // SAFETY: `Simd` always contains `N` elements of type `T`. It may have padding + // which does not need to be initialized. The safety of reading `ptr` is ensured by the + // caller. + unsafe { + core::ptr::copy_nonoverlapping(ptr, tmp.as_mut_ptr() as *mut _, 1); + tmp.assume_init() + } + } + + /// Store a vector to an array of `T`. + /// + /// See `load` as to why this function is necessary. + /// + /// # Safety + /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`. + const unsafe fn store(self, ptr: *mut [T; N]) { + // SAFETY: `Simd` always contains `N` elements of type `T`. The safety of writing + // `ptr` is ensured by the caller. + unsafe { core::ptr::copy_nonoverlapping(self.as_array(), ptr, 1) } + } + /// Converts an array to a SIMD vector. pub const fn from_array(array: [T; N]) -> Self { - // SAFETY: Transmuting between `Simd` and `[T; N]` - // is always valid. We need to use `read_unaligned` here, since - // the array may have a lower alignment than the vector. + // SAFETY: `&array` is safe to read. // - // FIXME: We currently use a pointer read instead of `transmute_copy` because - // it results in better codegen with optimizations disabled, but we should - // probably just use `transmute` once that works on const generic types. + // FIXME: We currently use a pointer load instead of `transmute_copy` because `repr(simd)` + // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). // // NOTE: This deliberately doesn't just use `Self(array)`, see the comment // on the struct definition for details. - unsafe { (&array as *const [T; N] as *const Self).read_unaligned() } + unsafe { Self::load(&array) } } /// Converts a SIMD vector to an array. pub const fn to_array(self) -> [T; N] { - // SAFETY: Transmuting between `Simd` and `[T; N]` - // is always valid. No need to use `read_unaligned` here, since - // the vector never has a lower alignment than the array. + let mut tmp = core::mem::MaybeUninit::uninit(); + // SAFETY: writing to `tmp` is safe and initializes it. // - // FIXME: We currently use a pointer read instead of `transmute_copy` because - // it results in better codegen with optimizations disabled, but we should - // probably just use `transmute` once that works on const generic types. + // FIXME: We currently use a pointer store instead of `transmute_copy` because `repr(simd)` + // results in padding for non-power-of-2 vectors (so vectors are larger than arrays). // // NOTE: This deliberately doesn't just use `self.0`, see the comment // on the struct definition for details. - unsafe { (&self as *const Self as *const [T; N]).read() } + unsafe { + self.store(tmp.as_mut_ptr()); + tmp.assume_init() + } } /// Converts a slice to a SIMD vector containing `slice[..N]`. From c504f01abeba606a5fa7d081ed8aec25d118a486 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 25 Apr 2023 21:37:04 -0400 Subject: [PATCH 148/161] Use cast and improve comments --- crates/core_simd/src/vector.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 8c6c7036081..92984f55e45 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -184,12 +184,15 @@ where /// # Safety /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`. const unsafe fn load(ptr: *const [T; N]) -> Self { - let mut tmp = core::mem::MaybeUninit::uninit(); + // There are potentially simpler ways to write this function, but this should result in + // LLVM `load ` + + let mut tmp = core::mem::MaybeUninit::::uninit(); // SAFETY: `Simd` always contains `N` elements of type `T`. It may have padding // which does not need to be initialized. The safety of reading `ptr` is ensured by the // caller. unsafe { - core::ptr::copy_nonoverlapping(ptr, tmp.as_mut_ptr() as *mut _, 1); + core::ptr::copy_nonoverlapping(ptr, tmp.as_mut_ptr().cast(), 1); tmp.assume_init() } } @@ -201,9 +204,14 @@ where /// # Safety /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`. const unsafe fn store(self, ptr: *mut [T; N]) { + // There are potentially simpler ways to write this function, but this should result in + // LLVM `store ` + + // Creating a temporary helps LLVM turn the memcpy into a store. + let tmp = self; // SAFETY: `Simd` always contains `N` elements of type `T`. The safety of writing // `ptr` is ensured by the caller. - unsafe { core::ptr::copy_nonoverlapping(self.as_array(), ptr, 1) } + unsafe { core::ptr::copy_nonoverlapping(tmp.as_array(), ptr, 1) } } /// Converts an array to a SIMD vector. From 4967f25f6bf930a5f79d5c66f2ffc53159d43c4a Mon Sep 17 00:00:00 2001 From: Markus Everling Date: Sun, 7 May 2023 00:11:56 +0000 Subject: [PATCH 149/161] Use the new `load`/`store` functions in `{from,to}_slice` --- crates/core_simd/src/vector.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 92984f55e45..a793ae9e391 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -263,11 +263,9 @@ where slice.len() >= Self::N, "slice length must be at least the number of elements" ); - assert!(core::mem::size_of::() == Self::N * core::mem::size_of::()); - // Safety: - // - We've checked the length is sufficient. - // - `T` and `Simd` are Copy types. - unsafe { slice.as_ptr().cast::().read_unaligned() } + // SAFETY: We just checked that the slice contains + // at least `N` elements. + unsafe { Self::load(slice.as_ptr().cast()) } } /// Writes a SIMD vector to the first `N` elements of a slice. @@ -293,11 +291,9 @@ where slice.len() >= Self::N, "slice length must be at least the number of elements" ); - assert!(core::mem::size_of::() == Self::N * core::mem::size_of::()); - // Safety: - // - We've checked the length is sufficient - // - `T` and `Simd` are Copy types. - unsafe { slice.as_mut_ptr().cast::().write_unaligned(self) } + // SAFETY: We just checked that the slice contains + // at least `N` elements. + unsafe { self.store(slice.as_mut_ptr().cast()) } } /// Performs elementwise conversion of a SIMD vector's elements to another SIMD-valid type. From b246e454387ef2d80078db36975d2df5d957f9fa Mon Sep 17 00:00:00 2001 From: Markus Everling Date: Sun, 7 May 2023 00:15:18 +0000 Subject: [PATCH 150/161] Fix inaccurate safety comments --- crates/core_simd/src/vector.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 92984f55e45..ff761fc900f 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -156,9 +156,9 @@ where /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); /// ``` pub const fn as_array(&self) -> &[T; N] { - // SAFETY: Transmuting between `Simd` and `[T; N]` - // is always valid and `Simd` never has a lower alignment - // than `[T; N]`. + // SAFETY: `Simd` is just an overaligned `[T; N]` with + // potential padding at the end, so pointer casting to a + // `&[T; N]` is safe. // // NOTE: This deliberately doesn't just use `&self.0`, see the comment // on the struct definition for details. @@ -167,9 +167,9 @@ where /// Returns a mutable array reference containing the entire SIMD vector. pub fn as_mut_array(&mut self) -> &mut [T; N] { - // SAFETY: Transmuting between `Simd` and `[T; N]` - // is always valid and `Simd` never has a lower alignment - // than `[T; N]`. + // SAFETY: `Simd` is just an overaligned `[T; N]` with + // potential padding at the end, so pointer casting to a + // `&mut [T; N]` is safe. // // NOTE: This deliberately doesn't just use `&mut self.0`, see the comment // on the struct definition for details. From 8f50a17c37a214632c2f5cf5b8f2833a7286883b Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sat, 22 Apr 2023 19:27:22 -0700 Subject: [PATCH 151/161] Fixups for sync - Fix LANES over-replace - Bring in traits - Use less inference-heavy types --- crates/core_simd/src/vector.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 0253f122c98..3323b92e37b 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -2,6 +2,7 @@ use crate::simd::{ intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdCastPtr, SimdConstPtr, SimdMutPtr, SimdPartialOrd, SupportedLaneCount, Swizzle, }; +use core::convert::{TryFrom, TryInto}; /// A SIMD vector with the shape of `[T; N]` but the operations of `T`. /// @@ -109,7 +110,7 @@ where T: SimdElement, { /// Number of elements in this vector. - pub const N: usize = N; + pub const LANES: usize = N; /// Returns the number of elements in this SIMD vector. /// @@ -122,7 +123,7 @@ where /// assert_eq!(v.lanes(), 4); /// ``` pub const fn lanes(&self) -> usize { - Self::N + Self::LANES } /// Constructs a new SIMD vector with all elements set to the given value. @@ -260,7 +261,7 @@ where #[must_use] pub const fn from_slice(slice: &[T]) -> Self { assert!( - slice.len() >= Self::N, + slice.len() >= Self::LANES, "slice length must be at least the number of elements" ); // SAFETY: We just checked that the slice contains @@ -288,7 +289,7 @@ where /// ``` pub fn copy_to_slice(self, slice: &mut [T]) { assert!( - slice.len() >= Self::N, + slice.len() >= Self::LANES, "slice length must be at least the number of elements" ); // SAFETY: We just checked that the slice contains @@ -883,7 +884,7 @@ where { type Error = core::array::TryFromSliceError; - fn try_from(slice: &[T]) -> Result { + fn try_from(slice: &[T]) -> Result { Ok(Self::from_array(slice.try_into()?)) } } @@ -895,7 +896,7 @@ where { type Error = core::array::TryFromSliceError; - fn try_from(slice: &mut [T]) -> Result { + fn try_from(slice: &mut [T]) -> Result { Ok(Self::from_array(slice.try_into()?)) } } From d361e4335f7e37d2409820510e059744d1c96457 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 10 May 2023 05:36:16 -0700 Subject: [PATCH 152/161] Drop const_ptr_read feature gate --- crates/core_simd/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 31e7a3617bc..e5307de2155 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,6 +1,5 @@ #![no_std] #![feature( - const_ptr_read, const_refs_to_cell, const_maybe_uninit_as_mut_ptr, const_mut_refs, From 852762563aa890286eda2f668b8af30f8aa84216 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 10 May 2023 05:45:24 -0700 Subject: [PATCH 153/161] Temp fix for swizzle_dyn - disable the AVX512 variant for now (flaky) - tell Clippy to knock it off --- crates/core_simd/src/swizzle_dyn.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index 5c3a2c1824f..3eb80d5dca1 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -43,8 +43,9 @@ where 32 => transize(x86::_mm256_permutexvar_epi8, self, idxs), // Notable absence: avx512bw shuffle // If avx512bw is available, odds of avx512vbmi are good - #[cfg(target_feature = "avx512vbmi")] - 64 => transize(x86::_mm512_permutexvar_epi8, self, idxs), + // FIXME: initial AVX512VBMI variant didn't actually pass muster + // #[cfg(target_feature = "avx512vbmi")] + // 64 => transize(x86::_mm512_permutexvar_epi8, self, idxs), _ => { let mut array = [0; N]; for (i, k) in idxs.to_array().into_iter().enumerate() { @@ -67,6 +68,7 @@ where #[target_feature(enable = "avx2")] #[allow(unused)] #[inline] +#[allow(clippy::let_and_return)] unsafe fn avx2_pshufb(bytes: Simd, idxs: Simd) -> Simd { use crate::simd::SimdPartialOrd; #[cfg(target_arch = "x86")] From b3b5cfca660d8926b5dd6e69c82c263381acab5e Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 2 May 2023 21:17:22 -0400 Subject: [PATCH 154/161] Add a prelude --- crates/core_simd/src/mod.rs | 2 + crates/core_simd/src/simd/prelude.rs | 79 ++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 crates/core_simd/src/simd/prelude.rs diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 35c659b7a42..f9891a3b7c1 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -23,6 +23,8 @@ mod vendor; #[doc = include_str!("core_simd_docs.md")] pub mod simd { + pub mod prelude; + pub(crate) use crate::core_simd::intrinsics; pub use crate::core_simd::alias::*; diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs new file mode 100644 index 00000000000..7d4f0798afa --- /dev/null +++ b/crates/core_simd/src/simd/prelude.rs @@ -0,0 +1,79 @@ +//! The portable SIMD prelude. +//! +//! Includes important traits and types to be imported with a glob: +//! ``` +//! use std::simd::prelude::*; +//! ``` + +#[doc(no_inline)] +pub use super::{ + SimdConstPtr, SimdFloat, SimdInt, SimdMutPtr, SimdOrd, SimdPartialEq, SimdPartialOrd, SimdUint, +}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{f32x1, f32x2, f32x4, f32x8, f32x16, f32x32, f32x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{f64x1, f64x2, f64x4, f64x8, f64x16, f64x32, f64x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{i8x1, i8x2, i8x4, i8x8, i8x16, i8x32, i8x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{i16x1, i16x2, i16x4, i16x8, i16x16, i16x32, i16x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{i32x1, i32x2, i32x4, i32x8, i32x16, i32x32, i32x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{i64x1, i64x2, i64x4, i64x8, i64x16, i64x32, i64x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{isizex1, isizex2, isizex4, isizex8, isizex16, isizex32, isizex64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{u8x1, u8x2, u8x4, u8x8, u8x16, u8x32, u8x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{u16x1, u16x2, u16x4, u16x8, u16x16, u16x32, u16x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{u32x1, u32x2, u32x4, u32x8, u32x16, u32x32, u32x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{u64x1, u64x2, u64x4, u64x8, u64x16, u64x32, u64x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{usizex1, usizex2, usizex4, usizex8, usizex16, usizex32, usizex64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{mask8x1, mask8x2, mask8x4, mask8x8, mask8x16, mask8x32, mask8x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{mask16x1, mask16x2, mask16x4, mask16x8, mask16x16, mask16x32, mask16x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{mask32x1, mask32x2, mask32x4, mask32x8, mask32x16, mask32x32, mask32x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{mask64x1, mask64x2, mask64x4, mask64x8, mask64x16, mask64x32, mask64x64}; + +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{masksizex1, masksizex2, masksizex4, masksizex8, masksizex16, masksizex32, masksizex64}; From 9e818d62b3b2c12bf516229555990ecb85d249f5 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 2 May 2023 22:32:20 -0400 Subject: [PATCH 155/161] Ignore doctest --- crates/core_simd/src/simd/prelude.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs index 7d4f0798afa..bdaed3ba067 100644 --- a/crates/core_simd/src/simd/prelude.rs +++ b/crates/core_simd/src/simd/prelude.rs @@ -1,7 +1,7 @@ //! The portable SIMD prelude. //! //! Includes important traits and types to be imported with a glob: -//! ``` +//! ```ignore //! use std::simd::prelude::*; //! ``` From c55e19cb00f65580a81a59a6f8d31ee29c59ea7e Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 2 May 2023 22:33:01 -0400 Subject: [PATCH 156/161] Add Simd, Mask, simd_swizzle to prelude Co-authored-by: Jacob Lifshay --- crates/core_simd/src/simd/prelude.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs index bdaed3ba067..e8fdc932d49 100644 --- a/crates/core_simd/src/simd/prelude.rs +++ b/crates/core_simd/src/simd/prelude.rs @@ -7,7 +7,8 @@ #[doc(no_inline)] pub use super::{ - SimdConstPtr, SimdFloat, SimdInt, SimdMutPtr, SimdOrd, SimdPartialEq, SimdPartialOrd, SimdUint, + simd_swizzle, Mask, Simd, SimdConstPtr, SimdFloat, SimdInt, SimdMutPtr, SimdOrd, SimdPartialEq, + SimdPartialOrd, SimdUint, }; #[rustfmt::skip] From 6626cd824936e10e492957537c26f28d0739b567 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 20 May 2023 17:17:56 -0400 Subject: [PATCH 157/161] Remove cast_ptr in favor of cast which acts like pointer::cast. Move number casts to number traits. --- crates/core_simd/src/cast.rs | 74 ++++++++++----------- crates/core_simd/src/elements/const_ptr.rs | 30 +++++++-- crates/core_simd/src/elements/float.rs | 67 ++++++++++++++++++- crates/core_simd/src/elements/int.rs | 19 +++++- crates/core_simd/src/elements/mut_ptr.rs | 30 +++++++-- crates/core_simd/src/elements/uint.rs | 19 +++++- crates/core_simd/src/vector.rs | 75 +--------------------- crates/core_simd/tests/cast.rs | 3 +- crates/core_simd/tests/round.rs | 1 + 9 files changed, 194 insertions(+), 124 deletions(-) diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index 65a3f845ffc..1c3592f8075 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -1,55 +1,51 @@ use crate::simd::SimdElement; +mod sealed { + /// Cast vector elements to other types. + /// + /// # Safety + /// Implementing this trait asserts that the type is a valid vector element for the `simd_cast` + /// or `simd_as` intrinsics. + pub unsafe trait Sealed {} +} +use sealed::Sealed; + /// Supporting trait for `Simd::cast`. Typically doesn't need to be used directly. -/// -/// # Safety -/// Implementing this trait asserts that the type is a valid vector element for the `simd_cast` or -/// `simd_as` intrinsics. -pub unsafe trait SimdCast: SimdElement {} +pub trait SimdCast: Sealed + SimdElement {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for i8 {} +unsafe impl Sealed for i8 {} +impl SimdCast for i8 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for i16 {} +unsafe impl Sealed for i16 {} +impl SimdCast for i16 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for i32 {} +unsafe impl Sealed for i32 {} +impl SimdCast for i32 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for i64 {} +unsafe impl Sealed for i64 {} +impl SimdCast for i64 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for isize {} +unsafe impl Sealed for isize {} +impl SimdCast for isize {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for u8 {} +unsafe impl Sealed for u8 {} +impl SimdCast for u8 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for u16 {} +unsafe impl Sealed for u16 {} +impl SimdCast for u16 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for u32 {} +unsafe impl Sealed for u32 {} +impl SimdCast for u32 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for u64 {} +unsafe impl Sealed for u64 {} +impl SimdCast for u64 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for usize {} +unsafe impl Sealed for usize {} +impl SimdCast for usize {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for f32 {} +unsafe impl Sealed for f32 {} +impl SimdCast for f32 {} // Safety: primitive number types can be cast to other primitive number types -unsafe impl SimdCast for f64 {} - -/// Supporting trait for `Simd::cast_ptr`. Typically doesn't need to be used directly. -/// -/// # Safety -/// Implementing this trait asserts that the type is a valid vector element for the `simd_cast_ptr` -/// intrinsic. -pub unsafe trait SimdCastPtr {} - -// Safety: pointers can be cast to other pointer types -unsafe impl SimdCastPtr for *const U -where - U: core::ptr::Pointee, - T: core::ptr::Pointee, -{ -} -// Safety: pointers can be cast to other pointer types -unsafe impl SimdCastPtr for *mut U -where - U: core::ptr::Pointee, - T: core::ptr::Pointee, -{ -} +unsafe impl Sealed for f64 {} +impl SimdCast for f64 {} diff --git a/crates/core_simd/src/elements/const_ptr.rs b/crates/core_simd/src/elements/const_ptr.rs index 0ef9802b5e2..f215f9a61d0 100644 --- a/crates/core_simd/src/elements/const_ptr.rs +++ b/crates/core_simd/src/elements/const_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { @@ -9,6 +9,9 @@ pub trait SimdConstPtr: Copy + Sealed { /// Vector of `isize` with the same number of lanes. type Isize; + /// Vector of const pointers with the same number of lanes. + type CastPtr; + /// Vector of mutable pointers to the same type. type MutPtr; @@ -18,6 +21,11 @@ pub trait SimdConstPtr: Copy + Sealed { /// Returns `true` for each lane that is null. fn is_null(self) -> Self::Mask; + /// Casts to a pointer of another type. + /// + /// Equivalent to calling [`pointer::cast`] on each lane. + fn cast(self) -> Self::CastPtr; + /// Changes constness without changing the type. /// /// Equivalent to calling [`pointer::cast_mut`] on each lane. @@ -78,6 +86,7 @@ where { type Usize = Simd; type Isize = Simd; + type CastPtr = Simd<*const U, LANES>; type MutPtr = Simd<*mut T, LANES>; type Mask = Mask; @@ -86,9 +95,22 @@ where Simd::splat(core::ptr::null()).simd_eq(self) } + #[inline] + fn cast(self) -> Self::CastPtr { + // SimdElement currently requires zero-sized metadata, so this should never fail. + // If this ever changes, `simd_cast_ptr` should produce a post-mono error. + use core::{mem::size_of, ptr::Pointee}; + assert_eq!(size_of::<::Metadata>(), 0); + assert_eq!(size_of::<::Metadata>(), 0); + + // Safety: pointers can be cast + unsafe { intrinsics::simd_cast_ptr(self) } + } + #[inline] fn cast_mut(self) -> Self::MutPtr { - self.cast_ptr() + // Safety: pointers can be cast + unsafe { intrinsics::simd_cast_ptr(self) } } #[inline] @@ -106,9 +128,9 @@ where // In the mean-time, this operation is defined to be "as if" it was // a wrapping_offset, so we can emulate it as such. This should properly // restore pointer provenance even under today's compiler. - self.cast_ptr::<*const u8>() + self.cast::() .wrapping_offset(addr.cast::() - self.addr().cast::()) - .cast_ptr() + .cast() } #[inline] diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index d6022327055..f6f6f51de53 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -1,6 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialEq, SimdPartialOrd, + intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialEq, SimdPartialOrd, SupportedLaneCount, }; @@ -15,6 +15,52 @@ pub trait SimdFloat: Copy + Sealed { /// Bit representation of this SIMD vector type. type Bits; + /// A SIMD vector with a different element type. + type Cast; + + /// Performs elementwise conversion of this vector's elements to another SIMD-valid type. + /// + /// This follows the semantics of Rust's `as` conversion for floats (truncating or saturating + /// at the limits) for each element. + /// + /// # Example + /// ``` + /// # #![feature(portable_simd)] + /// # use core::simd::Simd; + /// let floats: Simd = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]); + /// let ints = floats.cast::(); + /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0])); + /// + /// // Formally equivalent, but `Simd::cast` can optimize better. + /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32))); + /// + /// // The float conversion does not round-trip. + /// let floats_again = ints.cast(); + /// assert_ne!(floats, floats_again); + /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0])); + /// ``` + #[must_use] + fn cast(self) -> Self::Cast; + + /// Rounds toward zero and converts to the same-width integer type, assuming that + /// the value is finite and fits in that type. + /// + /// # Safety + /// The value must: + /// + /// * Not be NaN + /// * Not be infinite + /// * Be representable in the return type, after truncating off its fractional part + /// + /// If these requirements are infeasible or costly, consider using the safe function [cast], + /// which saturates on conversion. + /// + /// [cast]: Simd::cast + unsafe fn to_int_unchecked(self) -> Self::Cast + where + Self::Scalar: core::convert::FloatToInt + SimdCast, + I: SimdCast; + /// Raw transmutation to an unsigned integer vector type with the /// same size and number of lanes. #[must_use = "method returns a new vector and does not mutate the original value"] @@ -206,6 +252,25 @@ macro_rules! impl_trait { type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>; type Scalar = $ty; type Bits = Simd<$bits_ty, LANES>; + type Cast = Simd; + + #[inline] + fn cast(self) -> Self::Cast + { + // Safety: supported types are guaranteed by SimdCast + unsafe { intrinsics::simd_as(self) } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn to_int_unchecked(self) -> Self::Cast + where + Self::Scalar: core::convert::FloatToInt + SimdCast, + I: SimdCast, + { + // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants + unsafe { intrinsics::simd_cast(self) } + } #[inline] fn to_bits(self) -> Simd<$bits_ty, LANES> { diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs index 9b8c37ed466..6db89ff9a65 100644 --- a/crates/core_simd/src/elements/int.rs +++ b/crates/core_simd/src/elements/int.rs @@ -1,6 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount, + intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SupportedLaneCount, }; /// Operations on SIMD vectors of signed integers. @@ -11,6 +11,16 @@ pub trait SimdInt: Copy + Sealed { /// Scalar type contained by this SIMD vector type. type Scalar; + /// A SIMD vector with a different element type. + type Cast; + + /// Performs elementwise conversion of this vector's elements to another SIMD-valid type. + /// + /// This follows the semantics of Rust's `as` conversion for casting integers (wrapping to + /// other integer types, and saturating to float types). + #[must_use] + fn cast(self) -> Self::Cast; + /// Lanewise saturating add. /// /// # Examples @@ -198,6 +208,13 @@ macro_rules! impl_trait { { type Mask = Mask<<$ty as SimdElement>::Mask, LANES>; type Scalar = $ty; + type Cast = Simd; + + #[inline] + fn cast(self) -> Self::Cast { + // Safety: supported types are guaranteed by SimdCast + unsafe { intrinsics::simd_as(self) } + } #[inline] fn saturating_add(self, second: Self) -> Self { diff --git a/crates/core_simd/src/elements/mut_ptr.rs b/crates/core_simd/src/elements/mut_ptr.rs index d87986b4a09..4bdc6a14ce4 100644 --- a/crates/core_simd/src/elements/mut_ptr.rs +++ b/crates/core_simd/src/elements/mut_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount}; +use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { @@ -9,6 +9,9 @@ pub trait SimdMutPtr: Copy + Sealed { /// Vector of `isize` with the same number of lanes. type Isize; + /// Vector of const pointers with the same number of lanes. + type CastPtr; + /// Vector of constant pointers to the same type. type ConstPtr; @@ -18,6 +21,11 @@ pub trait SimdMutPtr: Copy + Sealed { /// Returns `true` for each lane that is null. fn is_null(self) -> Self::Mask; + /// Casts to a pointer of another type. + /// + /// Equivalent to calling [`pointer::cast`] on each lane. + fn cast(self) -> Self::CastPtr; + /// Changes constness without changing the type. /// /// Equivalent to calling [`pointer::cast_const`] on each lane. @@ -73,6 +81,7 @@ where { type Usize = Simd; type Isize = Simd; + type CastPtr = Simd<*mut U, LANES>; type ConstPtr = Simd<*const T, LANES>; type Mask = Mask; @@ -81,9 +90,22 @@ where Simd::splat(core::ptr::null_mut()).simd_eq(self) } + #[inline] + fn cast(self) -> Self::CastPtr { + // SimdElement currently requires zero-sized metadata, so this should never fail. + // If this ever changes, `simd_cast_ptr` should produce a post-mono error. + use core::{mem::size_of, ptr::Pointee}; + assert_eq!(size_of::<::Metadata>(), 0); + assert_eq!(size_of::<::Metadata>(), 0); + + // Safety: pointers can be cast + unsafe { intrinsics::simd_cast_ptr(self) } + } + #[inline] fn cast_const(self) -> Self::ConstPtr { - self.cast_ptr() + // Safety: pointers can be cast + unsafe { intrinsics::simd_cast_ptr(self) } } #[inline] @@ -101,9 +123,9 @@ where // In the mean-time, this operation is defined to be "as if" it was // a wrapping_offset, so we can emulate it as such. This should properly // restore pointer provenance even under today's compiler. - self.cast_ptr::<*mut u8>() + self.cast::() .wrapping_offset(addr.cast::() - self.addr().cast::()) - .cast_ptr() + .cast() } #[inline] diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs index 21e7e76eb3d..3926c395ec9 100644 --- a/crates/core_simd/src/elements/uint.rs +++ b/crates/core_simd/src/elements/uint.rs @@ -1,11 +1,21 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; /// Operations on SIMD vectors of unsigned integers. pub trait SimdUint: Copy + Sealed { /// Scalar type contained by this SIMD vector type. type Scalar; + /// A SIMD vector with a different element type. + type Cast; + + /// Performs elementwise conversion of this vector's elements to another SIMD-valid type. + /// + /// This follows the semantics of Rust's `as` conversion for casting integers (wrapping to + /// other integer types, and saturating to float types). + #[must_use] + fn cast(self) -> Self::Cast; + /// Lanewise saturating add. /// /// # Examples @@ -77,6 +87,13 @@ macro_rules! impl_trait { LaneCount: SupportedLaneCount, { type Scalar = $ty; + type Cast = Simd; + + #[inline] + fn cast(self) -> Self::Cast { + // Safety: supported types are guaranteed by SimdCast + unsafe { intrinsics::simd_as(self) } + } #[inline] fn saturating_add(self, second: Self) -> Self { diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 3323b92e37b..10a4c8e86f0 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,6 +1,6 @@ use crate::simd::{ - intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdCastPtr, SimdConstPtr, SimdMutPtr, - SimdPartialOrd, SupportedLaneCount, Swizzle, + intrinsics, LaneCount, Mask, MaskElement, SimdConstPtr, SimdMutPtr, SimdPartialOrd, + SupportedLaneCount, Swizzle, }; use core::convert::{TryFrom, TryInto}; @@ -297,77 +297,6 @@ where unsafe { self.store(slice.as_mut_ptr().cast()) } } - /// Performs elementwise conversion of a SIMD vector's elements to another SIMD-valid type. - /// - /// This follows the semantics of Rust's `as` conversion for casting integers between - /// signed and unsigned (interpreting integers as 2s complement, so `-1` to `U::MAX` and - /// `1 << (U::BITS -1)` becoming `I::MIN` ), and from floats to integers (truncating, - /// or saturating at the limits) for each element. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Simd; - /// let floats: Simd = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]); - /// let ints = floats.cast::(); - /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0])); - /// - /// // Formally equivalent, but `Simd::cast` can optimize better. - /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32))); - /// - /// // The float conversion does not round-trip. - /// let floats_again = ints.cast(); - /// assert_ne!(floats, floats_again); - /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0])); - /// ``` - #[must_use] - #[inline] - #[cfg(not(bootstrap))] - pub fn cast(self) -> Simd - where - T: SimdCast, - { - // Safety: supported types are guaranteed by SimdCast - unsafe { intrinsics::simd_as(self) } - } - - /// Casts a vector of pointers to another pointer type. - #[must_use] - #[inline] - pub fn cast_ptr(self) -> Simd - where - T: SimdCastPtr, - U: SimdElement, - { - // Safety: supported types are guaranteed by SimdCastPtr - unsafe { intrinsics::simd_cast_ptr(self) } - } - - /// Rounds toward zero and converts to the same-width integer type, assuming that - /// the value is finite and fits in that type. - /// - /// # Safety - /// The value must: - /// - /// * Not be NaN - /// * Not be infinite - /// * Be representable in the return type, after truncating off its fractional part - /// - /// If these requirements are infeasible or costly, consider using the safe function [cast], - /// which saturates on conversion. - /// - /// [cast]: Simd::cast - #[inline] - #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn to_int_unchecked(self) -> Simd - where - T: core::convert::FloatToInt + SimdCast, - I: SimdCast, - { - // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants - unsafe { intrinsics::simd_cast(self) } - } - /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. /// If an index is out-of-bounds, the element is instead selected from the `or` vector. /// diff --git a/crates/core_simd/tests/cast.rs b/crates/core_simd/tests/cast.rs index ab5650f0713..00545936ea2 100644 --- a/crates/core_simd/tests/cast.rs +++ b/crates/core_simd/tests/cast.rs @@ -2,7 +2,8 @@ macro_rules! cast_types { ($start:ident, $($target:ident),*) => { mod $start { - use core_simd::simd::Simd; + #[allow(unused)] + use core_simd::simd::{Simd, SimdInt, SimdUint, SimdFloat}; type Vector = Simd<$start, N>; $( mod $target { diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 8b9638ad466..aacf7bd3bcc 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -53,6 +53,7 @@ macro_rules! float_rounding_test { test_helpers::test_lanes! { fn to_int_unchecked() { + use core_simd::simd::SimdFloat; // The maximum integer that can be represented by the equivalently sized float has // all of the mantissa digits set to 1, pushed up to the MSB. const ALL_MANTISSA_BITS: IntScalar = ((1 << ::MANTISSA_DIGITS) - 1); From f4ee1ab71174329e04af88370948e08a11c668a5 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 20 May 2023 17:28:05 -0400 Subject: [PATCH 158/161] Simplify to_int_unchecked --- crates/core_simd/src/elements/float.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index f6f6f51de53..70b18eee0c0 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -56,10 +56,9 @@ pub trait SimdFloat: Copy + Sealed { /// which saturates on conversion. /// /// [cast]: Simd::cast - unsafe fn to_int_unchecked(self) -> Self::Cast + unsafe fn to_int_unchecked(self) -> Self::Cast where - Self::Scalar: core::convert::FloatToInt + SimdCast, - I: SimdCast; + Self::Scalar: core::convert::FloatToInt; /// Raw transmutation to an unsigned integer vector type with the /// same size and number of lanes. @@ -263,10 +262,9 @@ macro_rules! impl_trait { #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - unsafe fn to_int_unchecked(self) -> Self::Cast + unsafe fn to_int_unchecked(self) -> Self::Cast where - Self::Scalar: core::convert::FloatToInt + SimdCast, - I: SimdCast, + Self::Scalar: core::convert::FloatToInt, { // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants unsafe { intrinsics::simd_cast(self) } From c0b7df522ff64773b783e702719537a61f55288a Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Sun, 7 May 2023 07:30:44 -0700 Subject: [PATCH 159/161] Add `#[inline]` to functions which were missing it, and `#[track_caller]` to ones with runtime panics from user input --- crates/core_simd/src/iter.rs | 4 ++++ crates/core_simd/src/lib.rs | 2 +- crates/core_simd/src/masks.rs | 4 ++++ crates/core_simd/src/ops.rs | 8 +++++++- crates/core_simd/src/ord.rs | 4 ++++ crates/core_simd/src/vector.rs | 18 +++++++++++++++++- 6 files changed, 37 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/iter.rs b/crates/core_simd/src/iter.rs index 3275b4db8e4..328c995b81d 100644 --- a/crates/core_simd/src/iter.rs +++ b/crates/core_simd/src/iter.rs @@ -10,6 +10,7 @@ macro_rules! impl_traits { where LaneCount: SupportedLaneCount, { + #[inline] fn sum>(iter: I) -> Self { iter.fold(Simd::splat(0 as $type), Add::add) } @@ -19,6 +20,7 @@ macro_rules! impl_traits { where LaneCount: SupportedLaneCount, { + #[inline] fn product>(iter: I) -> Self { iter.fold(Simd::splat(1 as $type), Mul::mul) } @@ -28,6 +30,7 @@ macro_rules! impl_traits { where LaneCount: SupportedLaneCount, { + #[inline] fn sum>(iter: I) -> Self { iter.fold(Simd::splat(0 as $type), Add::add) } @@ -37,6 +40,7 @@ macro_rules! impl_traits { where LaneCount: SupportedLaneCount, { + #[inline] fn product>(iter: I) -> Self { iter.fold(Simd::splat(1 as $type), Mul::mul) } diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index e5307de2155..fde406bda70 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -16,7 +16,7 @@ )] #![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))] #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))] -#![warn(missing_docs)] +#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] #![unstable(feature = "portable_simd", issue = "86656")] //! Portable SIMD module. diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index e58df80fca8..e04448a50be 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -179,6 +179,7 @@ where /// Panics if any lane is not 0 or -1. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] + #[track_caller] pub fn from_int(value: Simd) -> Self { assert!(T::valid(value), "all values must be either 0 or -1",); // Safety: the validity has been checked @@ -217,6 +218,7 @@ where /// Panics if `lane` is greater than or equal to the number of lanes in the vector. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] + #[track_caller] pub fn test(&self, lane: usize) -> bool { assert!(lane < LANES, "lane index out of range"); // Safety: the lane index has been checked @@ -240,6 +242,7 @@ where /// # Panics /// Panics if `lane` is greater than or equal to the number of lanes in the vector. #[inline] + #[track_caller] pub fn set(&mut self, lane: usize, value: bool) { assert!(lane < LANES, "lane index out of range"); // Safety: the lane index has been checked @@ -327,6 +330,7 @@ where T: MaskElement + fmt::Debug, LaneCount: SupportedLaneCount, { + #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list() .entries((0..LANES).map(|lane| self.test(lane))) diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 5a077a469d8..6e32eb11daf 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -15,6 +15,7 @@ where I: core::slice::SliceIndex<[T]>, { type Output = I::Output; + #[inline] fn index(&self, index: I) -> &Self::Output { &self.as_array()[index] } @@ -26,6 +27,7 @@ where LaneCount: SupportedLaneCount, I: core::slice::SliceIndex<[T]>, { + #[inline] fn index_mut(&mut self, index: I) -> &mut Self::Output { &mut self.as_mut_array()[index] } @@ -118,10 +120,14 @@ macro_rules! for_base_types { #[inline] #[must_use = "operator returns a new vector without mutating the inputs"] + // TODO: only useful for int Div::div, but we hope that this + // will essentially always always get inlined anyway. + #[track_caller] fn $call(self, rhs: Self) -> Self::Output { $macro_impl!(self, rhs, $inner, $scalar) } - })* + } + )* } } diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/ord.rs index 1ae9cd061fb..b2455190e82 100644 --- a/crates/core_simd/src/ord.rs +++ b/crates/core_simd/src/ord.rs @@ -94,6 +94,7 @@ macro_rules! impl_integer { } #[inline] + #[track_caller] fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), @@ -200,6 +201,7 @@ macro_rules! impl_mask { } #[inline] + #[track_caller] fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), @@ -254,6 +256,7 @@ where } #[inline] + #[track_caller] fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), @@ -303,6 +306,7 @@ where } #[inline] + #[track_caller] fn simd_clamp(self, min: Self, max: Self) -> Self { assert!( min.simd_le(max).all(), diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 3323b92e37b..475b7533a8b 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -122,6 +122,7 @@ where /// let v = u32x4::splat(0); /// assert_eq!(v.lanes(), 4); /// ``` + #[inline] pub const fn lanes(&self) -> usize { Self::LANES } @@ -136,6 +137,7 @@ where /// let v = u32x4::splat(8); /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); /// ``` + #[inline] pub fn splat(value: T) -> Self { // This is preferred over `[value; N]`, since it's explicitly a splat: // https://github.com/rust-lang/rust/issues/97804 @@ -156,6 +158,7 @@ where /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]); /// assert_eq!(v.as_array(), &[0, 1, 2, 3]); /// ``` + #[inline] pub const fn as_array(&self) -> &[T; N] { // SAFETY: `Simd` is just an overaligned `[T; N]` with // potential padding at the end, so pointer casting to a @@ -167,6 +170,7 @@ where } /// Returns a mutable array reference containing the entire SIMD vector. + #[inline] pub fn as_mut_array(&mut self) -> &mut [T; N] { // SAFETY: `Simd` is just an overaligned `[T; N]` with // potential padding at the end, so pointer casting to a @@ -184,6 +188,7 @@ where /// /// # Safety /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`. + #[inline] const unsafe fn load(ptr: *const [T; N]) -> Self { // There are potentially simpler ways to write this function, but this should result in // LLVM `load ` @@ -204,6 +209,7 @@ where /// /// # Safety /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`. + #[inline] const unsafe fn store(self, ptr: *mut [T; N]) { // There are potentially simpler ways to write this function, but this should result in // LLVM `store ` @@ -216,6 +222,7 @@ where } /// Converts an array to a SIMD vector. + #[inline] pub const fn from_array(array: [T; N]) -> Self { // SAFETY: `&array` is safe to read. // @@ -228,6 +235,7 @@ where } /// Converts a SIMD vector to an array. + #[inline] pub const fn to_array(self) -> [T; N] { let mut tmp = core::mem::MaybeUninit::uninit(); // SAFETY: writing to `tmp` is safe and initializes it. @@ -258,7 +266,8 @@ where /// let v = u32x4::from_slice(&source); /// assert_eq!(v.as_array(), &[1, 2, 3, 4]); /// ``` - #[must_use] + #[inline] + #[track_caller] pub const fn from_slice(slice: &[T]) -> Self { assert!( slice.len() >= Self::LANES, @@ -287,6 +296,8 @@ where /// v.copy_to_slice(&mut dest); /// assert_eq!(&dest, &[1, 2, 3, 4, 0, 0]); /// ``` + #[inline] + #[track_caller] pub fn copy_to_slice(self, slice: &mut [T]) { assert!( slice.len() >= Self::LANES, @@ -718,6 +729,7 @@ where LaneCount: SupportedLaneCount, T: SimdElement, { + #[inline] fn clone(&self) -> Self { *self } @@ -862,6 +874,7 @@ where LaneCount: SupportedLaneCount, T: SimdElement, { + #[inline] fn from(array: [T; N]) -> Self { Self::from_array(array) } @@ -872,6 +885,7 @@ where LaneCount: SupportedLaneCount, T: SimdElement, { + #[inline] fn from(vector: Simd) -> Self { vector.to_array() } @@ -884,6 +898,7 @@ where { type Error = core::array::TryFromSliceError; + #[inline] fn try_from(slice: &[T]) -> Result { Ok(Self::from_array(slice.try_into()?)) } @@ -896,6 +911,7 @@ where { type Error = core::array::TryFromSliceError; + #[inline] fn try_from(slice: &mut [T]) -> Result { Ok(Self::from_array(slice.try_into()?)) } From 0315db3cc183994545a29faeff8dbe944257fc91 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 21 May 2023 14:00:48 -0400 Subject: [PATCH 160/161] Re-add missing #[must_use] --- crates/core_simd/src/vector.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 475b7533a8b..b1be2a2a114 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -266,6 +266,7 @@ where /// let v = u32x4::from_slice(&source); /// assert_eq!(v.as_array(), &[1, 2, 3, 4]); /// ``` + #[must_use] #[inline] #[track_caller] pub const fn from_slice(slice: &[T]) -> Self { From 1af32f0a3a2c34ab9822f96d199d0d9bad7c5b66 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Tue, 30 May 2023 23:10:05 +0900 Subject: [PATCH 161/161] Fix build error on big endian arm/aarch64 --- crates/core_simd/src/swizzle_dyn.rs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index 3eb80d5dca1..a4da461d546 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -16,9 +16,9 @@ where #[inline] pub fn swizzle_dyn(self, idxs: Simd) -> Self { #![allow(unused_imports, unused_unsafe)] - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8}; - #[cfg(all(target_arch = "arm", target_feature = "v7"))] + #[cfg(all(target_arch = "arm", target_feature = "v7", target_endian = "little"))] use core::arch::arm::{uint8x8_t, vtbl1_u8}; #[cfg(target_arch = "wasm32")] use core::arch::wasm32 as wasm; @@ -29,13 +29,24 @@ where // SAFETY: Intrinsics covered by cfg unsafe { match N { - #[cfg(target_feature = "neon")] + #[cfg(all( + any( + target_arch = "aarch64", + all(target_arch = "arm", target_feature = "v7") + ), + target_feature = "neon", + target_endian = "little" + ))] 8 => transize(vtbl1_u8, self, idxs), #[cfg(target_feature = "ssse3")] 16 => transize(x86::_mm_shuffle_epi8, self, idxs), #[cfg(target_feature = "simd128")] 16 => transize(wasm::i8x16_swizzle, self, idxs), - #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + #[cfg(all( + target_arch = "aarch64", + target_feature = "neon", + target_endian = "little" + ))] 16 => transize(vqtbl1q_u8, self, idxs), #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))] 32 => transize_raw(avx2_pshufb, self, idxs),