Add new swizzle API
Expand swizzle API and migrate existing functions. Add rotate_left, rotate_right. Hide implementation details Add simd_shuffle macro
This commit is contained in:
parent
a16b481a08
commit
10168fb7c4
@ -2,6 +2,7 @@
|
||||
// Code ported from the `packed_simd` crate
|
||||
// Run this code with `cargo test --example matrix_inversion`
|
||||
#![feature(array_chunks, portable_simd)]
|
||||
use core_simd::Which::*;
|
||||
use core_simd::*;
|
||||
|
||||
// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
|
||||
@ -163,86 +164,81 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
|
||||
let m_2 = f32x4::from_array(m[2]);
|
||||
let m_3 = f32x4::from_array(m[3]);
|
||||
|
||||
// 2 argument shuffle, returns an f32x4
|
||||
// the first f32x4 is indexes 0..=3
|
||||
// the second f32x4 is indexed 4..=7
|
||||
let tmp1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_0, m_1);
|
||||
let row1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_2, m_3);
|
||||
const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
|
||||
const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
|
||||
const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
|
||||
const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
|
||||
|
||||
let row0 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row1);
|
||||
let row1 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row1, tmp1);
|
||||
let tmp = simd_shuffle!(m_0, m_1, SHUFFLE01);
|
||||
let row1 = simd_shuffle!(m_2, m_3, SHUFFLE01);
|
||||
|
||||
let tmp1 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_0, m_1);
|
||||
let row3 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_2, m_3);
|
||||
let row2 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row3);
|
||||
let row3 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row3, tmp1);
|
||||
let row0 = simd_shuffle!(tmp, row1, SHUFFLE02);
|
||||
let row1 = simd_shuffle!(row1, tmp, SHUFFLE13);
|
||||
|
||||
let tmp1 = row2 * row3;
|
||||
// there's no syntax for a 1 arg shuffle yet,
|
||||
// so we just pass the same f32x4 twice
|
||||
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
|
||||
let tmp = simd_shuffle!(m_0, m_1, SHUFFLE23);
|
||||
let row3 = simd_shuffle!(m_2, m_3, SHUFFLE23);
|
||||
let row2 = simd_shuffle!(tmp, row3, SHUFFLE02);
|
||||
let row3 = simd_shuffle!(row3, tmp, SHUFFLE13);
|
||||
|
||||
let minor0 = row1 * tmp1;
|
||||
let minor1 = row0 * tmp1;
|
||||
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
|
||||
let minor0 = (row1 * tmp1) - minor0;
|
||||
let minor1 = (row0 * tmp1) - minor1;
|
||||
let minor1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor1, minor1);
|
||||
let tmp = (row2 * row3).reverse().rotate_right::<2>();
|
||||
let minor0 = row1 * tmp;
|
||||
let minor1 = row0 * tmp;
|
||||
let tmp = tmp.rotate_right::<2>();
|
||||
let minor0 = (row1 * tmp) - minor0;
|
||||
let minor1 = (row0 * tmp) - minor1;
|
||||
let minor1 = minor1.rotate_right::<2>();
|
||||
|
||||
let tmp1 = row1 * row2;
|
||||
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
|
||||
let minor0 = (row3 * tmp1) + minor0;
|
||||
let minor3 = row0 * tmp1;
|
||||
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
|
||||
let tmp = (row1 * row2).reverse().rotate_right::<2>();
|
||||
let minor0 = (row3 * tmp) + minor0;
|
||||
let minor3 = row0 * tmp;
|
||||
let tmp = tmp.rotate_right::<2>();
|
||||
|
||||
let minor0 = minor0 - row3 * tmp1;
|
||||
let minor3 = row0 * tmp1 - minor3;
|
||||
let minor3 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor3, minor3);
|
||||
let minor0 = minor0 - row3 * tmp;
|
||||
let minor3 = row0 * tmp - minor3;
|
||||
let minor3 = minor3.rotate_right::<2>();
|
||||
|
||||
let tmp1 = row3 * f32x4::shuffle::<{ [2, 3, 0, 1] }>(row1, row1);
|
||||
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
|
||||
let row2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(row2, row2);
|
||||
let minor0 = row2 * tmp1 + minor0;
|
||||
let minor2 = row0 * tmp1;
|
||||
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
|
||||
let minor0 = minor0 - row2 * tmp1;
|
||||
let minor2 = row0 * tmp1 - minor2;
|
||||
let minor2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor2, minor2);
|
||||
let tmp = (row3 * row1.rotate_right::<2>())
|
||||
.reverse()
|
||||
.rotate_right::<2>();
|
||||
let row2 = row2.rotate_right::<2>();
|
||||
let minor0 = row2 * tmp + minor0;
|
||||
let minor2 = row0 * tmp;
|
||||
let tmp = tmp.rotate_right::<2>();
|
||||
let minor0 = minor0 - row2 * tmp;
|
||||
let minor2 = row0 * tmp - minor2;
|
||||
let minor2 = minor2.rotate_right::<2>();
|
||||
|
||||
let tmp1 = row0 * row1;
|
||||
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
|
||||
let minor2 = minor2 + row3 * tmp1;
|
||||
let minor3 = row2 * tmp1 - minor3;
|
||||
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
|
||||
let minor2 = row3 * tmp1 - minor2;
|
||||
let minor3 = minor3 - row2 * tmp1;
|
||||
let tmp = (row0 * row1).reverse().rotate_right::<2>();
|
||||
let minor2 = minor2 + row3 * tmp;
|
||||
let minor3 = row2 * tmp - minor3;
|
||||
let tmp = tmp.rotate_right::<2>();
|
||||
let minor2 = row3 * tmp - minor2;
|
||||
let minor3 = minor3 - row2 * tmp;
|
||||
|
||||
let tmp1 = row0 * row3;
|
||||
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
|
||||
let minor1 = minor1 - row2 * tmp1;
|
||||
let minor2 = row1 * tmp1 + minor2;
|
||||
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
|
||||
let minor1 = row2 * tmp1 + minor1;
|
||||
let minor2 = minor2 - row1 * tmp1;
|
||||
let tmp = (row0 * row3).reverse().rotate_right::<2>();
|
||||
let minor1 = minor1 - row2 * tmp;
|
||||
let minor2 = row1 * tmp + minor2;
|
||||
let tmp = tmp.rotate_right::<2>();
|
||||
let minor1 = row2 * tmp + minor1;
|
||||
let minor2 = minor2 - row1 * tmp;
|
||||
|
||||
let tmp1 = row0 * row2;
|
||||
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
|
||||
let minor1 = row3 * tmp1 + minor1;
|
||||
let minor3 = minor3 - row1 * tmp1;
|
||||
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
|
||||
let minor1 = minor1 - row3 * tmp1;
|
||||
let minor3 = row1 * tmp1 + minor3;
|
||||
let tmp = (row0 * row2).reverse().rotate_right::<2>();
|
||||
let minor1 = row3 * tmp + minor1;
|
||||
let minor3 = minor3 - row1 * tmp;
|
||||
let tmp = tmp.rotate_right::<2>();
|
||||
let minor1 = minor1 - row3 * tmp;
|
||||
let minor3 = row1 * tmp + minor3;
|
||||
|
||||
let det = row0 * minor0;
|
||||
let det = f32x4::shuffle::<{ [2, 3, 0, 1] }>(det, det) + det;
|
||||
let det = f32x4::shuffle::<{ [1, 0, 3, 2] }>(det, det) + det;
|
||||
let det = det.rotate_right::<2>() + det;
|
||||
let det = det.reverse().rotate_right::<2>() + det;
|
||||
|
||||
if det.horizontal_sum() == 0. {
|
||||
return None;
|
||||
}
|
||||
// calculate the reciprocal
|
||||
let tmp1 = f32x4::splat(1.0) / det;
|
||||
let det = tmp1 + tmp1 - det * tmp1 * tmp1;
|
||||
let tmp = f32x4::splat(1.0) / det;
|
||||
let det = tmp + tmp - det * tmp * tmp;
|
||||
|
||||
let res0 = minor0 * det;
|
||||
let res1 = minor1 * det;
|
||||
|
@ -54,11 +54,7 @@ extern "platform-intrinsic" {
|
||||
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
|
||||
|
||||
// shufflevector
|
||||
pub(crate) fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
|
||||
pub(crate) fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
|
||||
pub(crate) fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
|
||||
pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
|
||||
pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
|
||||
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
|
||||
|
||||
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
|
||||
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#![feature(
|
||||
adt_const_params,
|
||||
const_fn_trait_bound,
|
||||
const_panic,
|
||||
platform_intrinsics,
|
||||
repr_simd,
|
||||
simd_ffi,
|
||||
|
@ -1,8 +1,9 @@
|
||||
#[macro_use]
|
||||
mod permute;
|
||||
#[macro_use]
|
||||
mod reduction;
|
||||
|
||||
#[macro_use]
|
||||
mod swizzle;
|
||||
|
||||
pub(crate) mod intrinsics;
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
@ -27,5 +28,6 @@ pub mod simd {
|
||||
pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
|
||||
pub use crate::core_simd::masks::*;
|
||||
pub use crate::core_simd::select::Select;
|
||||
pub use crate::core_simd::swizzle::*;
|
||||
pub use crate::core_simd::vector::*;
|
||||
}
|
||||
|
@ -1,154 +0,0 @@
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{Simd, SimdElement};
|
||||
|
||||
macro_rules! impl_shuffle_lane {
|
||||
{ $fn:ident, $n:literal } => {
|
||||
impl<T> Simd<T, $n>
|
||||
where
|
||||
T: SimdElement,
|
||||
{
|
||||
/// A const SIMD shuffle that takes 2 SIMD vectors and produces another vector, using
|
||||
/// the indices in the const parameter. The first or "self" vector will have its lanes
|
||||
/// indexed from 0, and the second vector will have its first lane indexed at $n.
|
||||
/// Indices must be in-bounds of either vector at compile time.
|
||||
///
|
||||
/// Some SIMD shuffle instructions can be quite slow, so avoiding them by loading data
|
||||
/// into the desired patterns in advance is preferred, but shuffles are still faster
|
||||
/// than storing and reloading from memory.
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "std")] use core_simd::Simd;
|
||||
/// # #[cfg(not(feature = "std"))] use core::simd::Simd;
|
||||
/// let a = Simd::from_array([1.0, 2.0, 3.0, 4.0]);
|
||||
/// let b = Simd::from_array([5.0, 6.0, 7.0, 8.0]);
|
||||
/// const IDXS: [u32; 4] = [4,0,3,7];
|
||||
/// let c = Simd::<_, 4>::shuffle::<IDXS>(a,b);
|
||||
/// assert_eq!(Simd::from_array([5.0, 1.0, 4.0, 8.0]), c);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn shuffle<const IDX: [u32; $n]>(self, second: Self) -> Self {
|
||||
unsafe { intrinsics::$fn(self, second, IDX) }
|
||||
}
|
||||
|
||||
/// Reverse the order of the lanes in the vector.
|
||||
#[inline]
|
||||
pub fn reverse(self) -> Self {
|
||||
const fn idx() -> [u32; $n] {
|
||||
let mut idx = [0u32; $n];
|
||||
let mut i = 0;
|
||||
while i < $n {
|
||||
idx[i] = ($n - i - 1) as u32;
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
self.shuffle::<{ idx() }>(self)
|
||||
}
|
||||
|
||||
/// Interleave two vectors.
|
||||
///
|
||||
/// Produces two vectors with lanes taken alternately from `self` and `other`.
|
||||
///
|
||||
/// The first result contains the first `LANES / 2` lanes from `self` and `other`,
|
||||
/// alternating, starting with the first lane of `self`.
|
||||
///
|
||||
/// The second result contains the last `LANES / 2` lanes from `self` and `other`,
|
||||
/// alternating, starting with the lane `LANES / 2` from the start of `self`.
|
||||
///
|
||||
/// This particular permutation is efficient on many architectures.
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "std")] use core_simd::Simd;
|
||||
/// # #[cfg(not(feature = "std"))] use core::simd::Simd;
|
||||
/// let a = Simd::from_array([0, 1, 2, 3]);
|
||||
/// let b = Simd::from_array([4, 5, 6, 7]);
|
||||
/// let (x, y) = a.interleave(b);
|
||||
/// assert_eq!(x.to_array(), [0, 4, 1, 5]);
|
||||
/// assert_eq!(y.to_array(), [2, 6, 3, 7]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn interleave(self, other: Self) -> (Self, Self) {
|
||||
const fn lo() -> [u32; $n] {
|
||||
let mut idx = [0u32; $n];
|
||||
let mut i = 0;
|
||||
while i < $n {
|
||||
let offset = i / 2;
|
||||
idx[i] = if i % 2 == 0 {
|
||||
offset
|
||||
} else {
|
||||
$n + offset
|
||||
} as u32;
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
const fn hi() -> [u32; $n] {
|
||||
let mut idx = [0u32; $n];
|
||||
let mut i = 0;
|
||||
while i < $n {
|
||||
let offset = ($n + i) / 2;
|
||||
idx[i] = if i % 2 == 0 {
|
||||
offset
|
||||
} else {
|
||||
$n + offset
|
||||
} as u32;
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
(self.shuffle::<{ lo() }>(other), self.shuffle::<{ hi() }>(other))
|
||||
}
|
||||
|
||||
/// Deinterleave two vectors.
|
||||
///
|
||||
/// The first result takes every other lane of `self` and then `other`, starting with
|
||||
/// the first lane.
|
||||
///
|
||||
/// The second result takes every other lane of `self` and then `other`, starting with
|
||||
/// the second lane.
|
||||
///
|
||||
/// This particular permutation is efficient on many architectures.
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "std")] use core_simd::Simd;
|
||||
/// # #[cfg(not(feature = "std"))] use core::simd::Simd;
|
||||
/// let a = Simd::from_array([0, 4, 1, 5]);
|
||||
/// let b = Simd::from_array([2, 6, 3, 7]);
|
||||
/// let (x, y) = a.deinterleave(b);
|
||||
/// assert_eq!(x.to_array(), [0, 1, 2, 3]);
|
||||
/// assert_eq!(y.to_array(), [4, 5, 6, 7]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn deinterleave(self, other: Self) -> (Self, Self) {
|
||||
const fn even() -> [u32; $n] {
|
||||
let mut idx = [0u32; $n];
|
||||
let mut i = 0;
|
||||
while i < $n {
|
||||
idx[i] = 2 * i as u32;
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
const fn odd() -> [u32; $n] {
|
||||
let mut idx = [0u32; $n];
|
||||
let mut i = 0;
|
||||
while i < $n {
|
||||
idx[i] = 1 + 2 * i as u32;
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
(self.shuffle::<{ even() }>(other), self.shuffle::<{ odd() }>(other))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_shuffle_lane! { simd_shuffle2, 2 }
|
||||
impl_shuffle_lane! { simd_shuffle4, 4 }
|
||||
impl_shuffle_lane! { simd_shuffle8, 8 }
|
||||
impl_shuffle_lane! { simd_shuffle16, 16 }
|
||||
impl_shuffle_lane! { simd_shuffle32, 32 }
|
364
crates/core_simd/src/swizzle.rs
Normal file
364
crates/core_simd/src/swizzle.rs
Normal file
@ -0,0 +1,364 @@
|
||||
use crate::simd::intrinsics;
|
||||
use crate::{LaneCount, Simd, SimdElement, SupportedLaneCount};
|
||||
|
||||
/// Rearrange vector elements.
|
||||
///
|
||||
/// A new vector is constructed by specifying the the lanes of the source vector or vectors to use.
|
||||
///
|
||||
/// When shuffling one vector, the indices of the result vector are indicated by a `const` array
|
||||
/// of `usize`, like [`Swizzle`].
|
||||
/// When shuffling two vectors, the indices are indicated by a `const` array of [`Which`], like
|
||||
/// [`Swizzle2`].
|
||||
///
|
||||
/// # Examples
|
||||
/// ## One source vector
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::{Simd, simd_shuffle};
|
||||
/// let v = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
|
||||
/// let v = simd_shuffle!(v, [3, 0, 1, 2]);
|
||||
/// assert_eq!(v.to_array(), [3., 0., 1., 2.]);
|
||||
/// ```
|
||||
///
|
||||
/// ## Two source vectors
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::{Simd, simd_shuffle, Which};
|
||||
/// use Which::*;
|
||||
/// let a = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
|
||||
/// let b = Simd::<f32, 4>::from_array([4., 5., 6., 7.]);
|
||||
/// let v = simd_shuffle!(a, b, [First(0), First(1), Second(2), Second(3)]);
|
||||
/// assert_eq!(v.to_array(), [0., 1., 6., 7.]);
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! simd_shuffle {
|
||||
{
|
||||
$vector:expr, $index:expr $(,)?
|
||||
} => {
|
||||
{
|
||||
// FIXME this won't work when we are in `core`!
|
||||
use $crate::Swizzle;
|
||||
struct Shuffle;
|
||||
impl Swizzle<{$index.len()}, {$index.len()}> for Shuffle {
|
||||
const INDEX: [usize; {$index.len()}] = $index;
|
||||
}
|
||||
Shuffle::swizzle($vector)
|
||||
}
|
||||
};
|
||||
{
|
||||
$first:expr, $second:expr, $index:expr $(,)?
|
||||
} => {
|
||||
{
|
||||
// FIXME this won't work when we are in `core`!
|
||||
use $crate::{Which, Swizzle2};
|
||||
struct Shuffle;
|
||||
impl Swizzle2<{$index.len()}, {$index.len()}> for Shuffle {
|
||||
const INDEX: [Which; {$index.len()}] = $index;
|
||||
}
|
||||
Shuffle::swizzle2($first, $second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An index into one of two vectors.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Which {
|
||||
/// Indexes the first vector.
|
||||
First(usize),
|
||||
/// Indexes the second vector.
|
||||
Second(usize),
|
||||
}
|
||||
|
||||
/// Create a vector from the elements of another vector.
|
||||
pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
|
||||
/// Map from the lanes of the input vector to the output vector.
|
||||
const INDEX: [usize; OUTPUT_LANES];
|
||||
|
||||
/// Create a new vector from the lanes of `vector`.
|
||||
///
|
||||
/// Lane `i` of the output is `vector[Self::INDEX[i]]`.
|
||||
fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
|
||||
where
|
||||
T: SimdElement,
|
||||
LaneCount<INPUT_LANES>: SupportedLaneCount,
|
||||
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
|
||||
{
|
||||
unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a vector from the elements of two other vectors.
|
||||
pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
|
||||
/// Map from the lanes of the input vectors to the output vector
|
||||
const INDEX: [Which; OUTPUT_LANES];
|
||||
|
||||
/// Create a new vector from the lanes of `first` and `second`.
|
||||
///
|
||||
/// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is
|
||||
/// `Second(j)`.
|
||||
fn swizzle2<T>(
|
||||
first: Simd<T, INPUT_LANES>,
|
||||
second: Simd<T, INPUT_LANES>,
|
||||
) -> Simd<T, OUTPUT_LANES>
|
||||
where
|
||||
T: SimdElement,
|
||||
LaneCount<INPUT_LANES>: SupportedLaneCount,
|
||||
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
|
||||
{
|
||||
unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
|
||||
}
|
||||
}
|
||||
|
||||
/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
|
||||
trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
|
||||
const INDEX_IMPL: [u32; OUTPUT_LANES];
|
||||
}
|
||||
|
||||
impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES>
|
||||
for T
|
||||
where
|
||||
T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized,
|
||||
{
|
||||
const INDEX_IMPL: [u32; OUTPUT_LANES] = {
|
||||
let mut output = [0; OUTPUT_LANES];
|
||||
let mut i = 0;
|
||||
while i < OUTPUT_LANES {
|
||||
let index = Self::INDEX[i];
|
||||
assert!(index as u32 as usize == index);
|
||||
assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
|
||||
output[i] = index as u32;
|
||||
i += 1;
|
||||
}
|
||||
output
|
||||
};
|
||||
}
|
||||
|
||||
/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
|
||||
trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
|
||||
const INDEX_IMPL: [u32; OUTPUT_LANES];
|
||||
}
|
||||
|
||||
impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES>
|
||||
for T
|
||||
where
|
||||
T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized,
|
||||
{
|
||||
const INDEX_IMPL: [u32; OUTPUT_LANES] = {
|
||||
let mut output = [0; OUTPUT_LANES];
|
||||
let mut i = 0;
|
||||
while i < OUTPUT_LANES {
|
||||
let (offset, index) = match Self::INDEX[i] {
|
||||
Which::First(index) => (false, index),
|
||||
Which::Second(index) => (true, index),
|
||||
};
|
||||
assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
|
||||
|
||||
// lanes are indexed by the first vector, then second vector
|
||||
let index = if offset { index + INPUT_LANES } else { index };
|
||||
assert!(index as u32 as usize == index);
|
||||
output[i] = index as u32;
|
||||
i += 1;
|
||||
}
|
||||
output
|
||||
};
|
||||
}
|
||||
|
||||
impl<T, const LANES: usize> Simd<T, LANES>
|
||||
where
|
||||
T: SimdElement,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
/// Reverse the order of the lanes in the vector.
|
||||
#[inline]
|
||||
pub fn reverse(self) -> Self {
|
||||
const fn reverse_index<const LANES: usize>() -> [usize; LANES] {
|
||||
let mut index = [0; LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES {
|
||||
index[i] = LANES - i - 1;
|
||||
i += 1;
|
||||
}
|
||||
index
|
||||
}
|
||||
|
||||
struct Reverse;
|
||||
|
||||
impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
|
||||
const INDEX: [usize; LANES] = reverse_index::<LANES>();
|
||||
}
|
||||
|
||||
Reverse::swizzle(self)
|
||||
}
|
||||
|
||||
/// Rotates the vector such that the first `OFFSET` elements of the slice move to the end
|
||||
/// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_left`, the
|
||||
/// element previously in lane `OFFSET` will become the first element in the slice.
|
||||
#[inline]
|
||||
pub fn rotate_left<const OFFSET: usize>(self) -> Self {
|
||||
const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
|
||||
let offset = OFFSET % LANES;
|
||||
let mut index = [0; LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES {
|
||||
index[i] = (i + offset) % LANES;
|
||||
i += 1;
|
||||
}
|
||||
index
|
||||
}
|
||||
|
||||
struct Rotate<const OFFSET: usize>;
|
||||
|
||||
impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
|
||||
const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
|
||||
}
|
||||
|
||||
Rotate::<OFFSET>::swizzle(self)
|
||||
}
|
||||
|
||||
/// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to
|
||||
/// the end while the last `OFFSET` elements move to the front. After calling `rotate_right`, the
|
||||
/// element previously at index `LANES - OFFSET` will become the first element in the slice.
|
||||
#[inline]
|
||||
pub fn rotate_right<const OFFSET: usize>(self) -> Self {
|
||||
const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
|
||||
let offset = LANES - OFFSET % LANES;
|
||||
let mut index = [0; LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES {
|
||||
index[i] = (i + offset) % LANES;
|
||||
i += 1;
|
||||
}
|
||||
index
|
||||
}
|
||||
|
||||
struct Rotate<const OFFSET: usize>;
|
||||
|
||||
impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
|
||||
const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
|
||||
}
|
||||
|
||||
Rotate::<OFFSET>::swizzle(self)
|
||||
}
|
||||
|
||||
/// Interleave two vectors.
|
||||
///
|
||||
/// Produces two vectors with lanes taken alternately from `self` and `other`.
|
||||
///
|
||||
/// The first result contains the first `LANES / 2` lanes from `self` and `other`,
|
||||
/// alternating, starting with the first lane of `self`.
|
||||
///
|
||||
/// The second result contains the last `LANES / 2` lanes from `self` and `other`,
|
||||
/// alternating, starting with the lane `LANES / 2` from the start of `self`.
|
||||
///
|
||||
/// This particular permutation is efficient on many architectures.
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(portable_simd)]
|
||||
/// # use core_simd::Simd;
|
||||
/// let a = Simd::from_array([0, 1, 2, 3]);
|
||||
/// let b = Simd::from_array([4, 5, 6, 7]);
|
||||
/// let (x, y) = a.interleave(b);
|
||||
/// assert_eq!(x.to_array(), [0, 4, 1, 5]);
|
||||
/// assert_eq!(y.to_array(), [2, 6, 3, 7]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn interleave(self, other: Self) -> (Self, Self) {
|
||||
const fn lo<const LANES: usize>() -> [Which; LANES] {
|
||||
let mut idx = [Which::First(0); LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES {
|
||||
let offset = i / 2;
|
||||
idx[i] = if i % 2 == 0 {
|
||||
Which::First(offset)
|
||||
} else {
|
||||
Which::Second(offset)
|
||||
};
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
const fn hi<const LANES: usize>() -> [Which; LANES] {
|
||||
let mut idx = [Which::First(0); LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES {
|
||||
let offset = (LANES + i) / 2;
|
||||
idx[i] = if i % 2 == 0 {
|
||||
Which::First(offset)
|
||||
} else {
|
||||
Which::Second(offset)
|
||||
};
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
struct Lo;
|
||||
struct Hi;
|
||||
|
||||
impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo {
|
||||
const INDEX: [Which; LANES] = lo::<LANES>();
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
|
||||
const INDEX: [Which; LANES] = hi::<LANES>();
|
||||
}
|
||||
|
||||
(Lo::swizzle2(self, other), Hi::swizzle2(self, other))
|
||||
}
|
||||
|
||||
/// Deinterleave two vectors.
|
||||
///
|
||||
/// The first result takes every other lane of `self` and then `other`, starting with
|
||||
/// the first lane.
|
||||
///
|
||||
/// The second result takes every other lane of `self` and then `other`, starting with
|
||||
/// the second lane.
|
||||
///
|
||||
/// This particular permutation is efficient on many architectures.
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(portable_simd)]
|
||||
/// # use core_simd::Simd;
|
||||
/// let a = Simd::from_array([0, 4, 1, 5]);
|
||||
/// let b = Simd::from_array([2, 6, 3, 7]);
|
||||
/// let (x, y) = a.deinterleave(b);
|
||||
/// assert_eq!(x.to_array(), [0, 1, 2, 3]);
|
||||
/// assert_eq!(y.to_array(), [4, 5, 6, 7]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn deinterleave(self, other: Self) -> (Self, Self) {
|
||||
const fn even<const LANES: usize>() -> [Which; LANES] {
|
||||
let mut idx = [Which::First(0); LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES / 2 {
|
||||
idx[i] = Which::First(2 * i);
|
||||
idx[i + LANES / 2] = Which::Second(2 * i);
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
const fn odd<const LANES: usize>() -> [Which; LANES] {
|
||||
let mut idx = [Which::First(0); LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES / 2 {
|
||||
idx[i] = Which::First(2 * i + 1);
|
||||
idx[i + LANES / 2] = Which::Second(2 * i + 1);
|
||||
i += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
struct Even;
|
||||
struct Odd;
|
||||
|
||||
impl<const LANES: usize> Swizzle2<LANES, LANES> for Even {
|
||||
const INDEX: [Which; LANES] = even::<LANES>();
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd {
|
||||
const INDEX: [Which; LANES] = odd::<LANES>();
|
||||
}
|
||||
|
||||
(Even::swizzle2(self, other), Odd::swizzle2(self, other))
|
||||
}
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
#![feature(portable_simd)]
|
||||
|
||||
use core_simd::Simd;
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
use wasm_bindgen_test::*;
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
wasm_bindgen_test_configure!(run_in_browser);
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn simple_shuffle() {
|
||||
let a = Simd::from_array([2, 4, 1, 9]);
|
||||
let b = a;
|
||||
assert_eq!(a.shuffle::<{ [3, 1, 4, 6] }>(b).to_array(), [9, 4, 2, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn reverse() {
|
||||
let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
assert_eq!(a.reverse().to_array(), [7, 6, 5, 4, 3, 2, 1, 0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn interleave() {
|
||||
let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let (lo, hi) = a.interleave(b);
|
||||
assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]);
|
||||
assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]);
|
||||
let (even, odd) = lo.deinterleave(hi);
|
||||
assert_eq!(even, a);
|
||||
assert_eq!(odd, b);
|
||||
}
|
62
crates/core_simd/tests/swizzle.rs
Normal file
62
crates/core_simd/tests/swizzle.rs
Normal file
@ -0,0 +1,62 @@
|
||||
#![feature(portable_simd)]
|
||||
use core_simd::{Simd, Swizzle};
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
use wasm_bindgen_test::*;
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
wasm_bindgen_test_configure!(run_in_browser);
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn swizzle() {
|
||||
struct Index;
|
||||
impl Swizzle<4, 4> for Index {
|
||||
const INDEX: [usize; 4] = [2, 1, 3, 0];
|
||||
}
|
||||
impl Swizzle<4, 2> for Index {
|
||||
const INDEX: [usize; 2] = [1, 1];
|
||||
}
|
||||
|
||||
let vector = Simd::from_array([2, 4, 1, 9]);
|
||||
assert_eq!(Index::swizzle(vector).to_array(), [1, 4, 9, 2]);
|
||||
assert_eq!(Index::swizzle(vector).to_array(), [4, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn reverse() {
|
||||
let a = Simd::from_array([1, 2, 3, 4]);
|
||||
assert_eq!(a.reverse().to_array(), [4, 3, 2, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn rotate() {
|
||||
let a = Simd::from_array([1, 2, 3, 4]);
|
||||
assert_eq!(a.rotate_left::<0>().to_array(), [1, 2, 3, 4]);
|
||||
assert_eq!(a.rotate_left::<1>().to_array(), [2, 3, 4, 1]);
|
||||
assert_eq!(a.rotate_left::<2>().to_array(), [3, 4, 1, 2]);
|
||||
assert_eq!(a.rotate_left::<3>().to_array(), [4, 1, 2, 3]);
|
||||
assert_eq!(a.rotate_left::<4>().to_array(), [1, 2, 3, 4]);
|
||||
assert_eq!(a.rotate_left::<5>().to_array(), [2, 3, 4, 1]);
|
||||
assert_eq!(a.rotate_right::<0>().to_array(), [1, 2, 3, 4]);
|
||||
assert_eq!(a.rotate_right::<1>().to_array(), [4, 1, 2, 3]);
|
||||
assert_eq!(a.rotate_right::<2>().to_array(), [3, 4, 1, 2]);
|
||||
assert_eq!(a.rotate_right::<3>().to_array(), [2, 3, 4, 1]);
|
||||
assert_eq!(a.rotate_right::<4>().to_array(), [1, 2, 3, 4]);
|
||||
assert_eq!(a.rotate_right::<5>().to_array(), [4, 1, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn interleave() {
|
||||
let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let (lo, hi) = a.interleave(b);
|
||||
assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]);
|
||||
assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]);
|
||||
let (even, odd) = lo.deinterleave(hi);
|
||||
assert_eq!(even, a);
|
||||
assert_eq!(odd, b);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user