Add new swizzle API

Expand swizzle API and migrate existing functions. Add rotate_left, rotate_right.

Hide implementation details

Add simd_shuffle macro
This commit is contained in:
Caleb Zulawski 2021-09-15 04:59:03 +00:00 committed by Jubilee
parent a16b481a08
commit 10168fb7c4
8 changed files with 491 additions and 261 deletions

View File

@ -2,6 +2,7 @@
// Code ported from the `packed_simd` crate
// Run this code with `cargo test --example matrix_inversion`
#![feature(array_chunks, portable_simd)]
use core_simd::Which::*;
use core_simd::*;
// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
@ -163,86 +164,81 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
let m_2 = f32x4::from_array(m[2]);
let m_3 = f32x4::from_array(m[3]);
// 2 argument shuffle, returns an f32x4
// the first f32x4 is indexes 0..=3
// the second f32x4 is indexed 4..=7
let tmp1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_0, m_1);
let row1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_2, m_3);
const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
let row0 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row1);
let row1 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row1, tmp1);
let tmp = simd_shuffle!(m_0, m_1, SHUFFLE01);
let row1 = simd_shuffle!(m_2, m_3, SHUFFLE01);
let tmp1 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_0, m_1);
let row3 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_2, m_3);
let row2 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row3);
let row3 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row3, tmp1);
let row0 = simd_shuffle!(tmp, row1, SHUFFLE02);
let row1 = simd_shuffle!(row1, tmp, SHUFFLE13);
let tmp1 = row2 * row3;
// there's no syntax for a 1 arg shuffle yet,
// so we just pass the same f32x4 twice
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
let tmp = simd_shuffle!(m_0, m_1, SHUFFLE23);
let row3 = simd_shuffle!(m_2, m_3, SHUFFLE23);
let row2 = simd_shuffle!(tmp, row3, SHUFFLE02);
let row3 = simd_shuffle!(row3, tmp, SHUFFLE13);
let minor0 = row1 * tmp1;
let minor1 = row0 * tmp1;
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
let minor0 = (row1 * tmp1) - minor0;
let minor1 = (row0 * tmp1) - minor1;
let minor1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor1, minor1);
let tmp = (row2 * row3).reverse().rotate_right::<2>();
let minor0 = row1 * tmp;
let minor1 = row0 * tmp;
let tmp = tmp.rotate_right::<2>();
let minor0 = (row1 * tmp) - minor0;
let minor1 = (row0 * tmp) - minor1;
let minor1 = minor1.rotate_right::<2>();
let tmp1 = row1 * row2;
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
let minor0 = (row3 * tmp1) + minor0;
let minor3 = row0 * tmp1;
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
let tmp = (row1 * row2).reverse().rotate_right::<2>();
let minor0 = (row3 * tmp) + minor0;
let minor3 = row0 * tmp;
let tmp = tmp.rotate_right::<2>();
let minor0 = minor0 - row3 * tmp1;
let minor3 = row0 * tmp1 - minor3;
let minor3 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor3, minor3);
let minor0 = minor0 - row3 * tmp;
let minor3 = row0 * tmp - minor3;
let minor3 = minor3.rotate_right::<2>();
let tmp1 = row3 * f32x4::shuffle::<{ [2, 3, 0, 1] }>(row1, row1);
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
let row2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(row2, row2);
let minor0 = row2 * tmp1 + minor0;
let minor2 = row0 * tmp1;
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
let minor0 = minor0 - row2 * tmp1;
let minor2 = row0 * tmp1 - minor2;
let minor2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor2, minor2);
let tmp = (row3 * row1.rotate_right::<2>())
.reverse()
.rotate_right::<2>();
let row2 = row2.rotate_right::<2>();
let minor0 = row2 * tmp + minor0;
let minor2 = row0 * tmp;
let tmp = tmp.rotate_right::<2>();
let minor0 = minor0 - row2 * tmp;
let minor2 = row0 * tmp - minor2;
let minor2 = minor2.rotate_right::<2>();
let tmp1 = row0 * row1;
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
let minor2 = minor2 + row3 * tmp1;
let minor3 = row2 * tmp1 - minor3;
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
let minor2 = row3 * tmp1 - minor2;
let minor3 = minor3 - row2 * tmp1;
let tmp = (row0 * row1).reverse().rotate_right::<2>();
let minor2 = minor2 + row3 * tmp;
let minor3 = row2 * tmp - minor3;
let tmp = tmp.rotate_right::<2>();
let minor2 = row3 * tmp - minor2;
let minor3 = minor3 - row2 * tmp;
let tmp1 = row0 * row3;
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
let minor1 = minor1 - row2 * tmp1;
let minor2 = row1 * tmp1 + minor2;
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
let minor1 = row2 * tmp1 + minor1;
let minor2 = minor2 - row1 * tmp1;
let tmp = (row0 * row3).reverse().rotate_right::<2>();
let minor1 = minor1 - row2 * tmp;
let minor2 = row1 * tmp + minor2;
let tmp = tmp.rotate_right::<2>();
let minor1 = row2 * tmp + minor1;
let minor2 = minor2 - row1 * tmp;
let tmp1 = row0 * row2;
let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1);
let minor1 = row3 * tmp1 + minor1;
let minor3 = minor3 - row1 * tmp1;
let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1);
let minor1 = minor1 - row3 * tmp1;
let minor3 = row1 * tmp1 + minor3;
let tmp = (row0 * row2).reverse().rotate_right::<2>();
let minor1 = row3 * tmp + minor1;
let minor3 = minor3 - row1 * tmp;
let tmp = tmp.rotate_right::<2>();
let minor1 = minor1 - row3 * tmp;
let minor3 = row1 * tmp + minor3;
let det = row0 * minor0;
let det = f32x4::shuffle::<{ [2, 3, 0, 1] }>(det, det) + det;
let det = f32x4::shuffle::<{ [1, 0, 3, 2] }>(det, det) + det;
let det = det.rotate_right::<2>() + det;
let det = det.reverse().rotate_right::<2>() + det;
if det.horizontal_sum() == 0. {
return None;
}
// calculate the reciprocal
let tmp1 = f32x4::splat(1.0) / det;
let det = tmp1 + tmp1 - det * tmp1 * tmp1;
let tmp = f32x4::splat(1.0) / det;
let det = tmp + tmp - det * tmp * tmp;
let res0 = minor0 * det;
let res1 = minor1 * det;

View File

@ -54,11 +54,7 @@ extern "platform-intrinsic" {
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
// shufflevector
pub(crate) fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
pub(crate) fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
pub(crate) fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);

View File

@ -3,6 +3,7 @@
#![feature(
adt_const_params,
const_fn_trait_bound,
const_panic,
platform_intrinsics,
repr_simd,
simd_ffi,

View File

@ -1,8 +1,9 @@
#[macro_use]
mod permute;
#[macro_use]
mod reduction;
#[macro_use]
mod swizzle;
pub(crate) mod intrinsics;
#[cfg(feature = "generic_const_exprs")]
@ -27,5 +28,6 @@ pub mod simd {
pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
pub use crate::core_simd::masks::*;
pub use crate::core_simd::select::Select;
pub use crate::core_simd::swizzle::*;
pub use crate::core_simd::vector::*;
}

View File

@ -1,154 +0,0 @@
use crate::simd::intrinsics;
use crate::simd::{Simd, SimdElement};
macro_rules! impl_shuffle_lane {
{ $fn:ident, $n:literal } => {
impl<T> Simd<T, $n>
where
T: SimdElement,
{
/// A const SIMD shuffle that takes 2 SIMD vectors and produces another vector, using
/// the indices in the const parameter. The first or "self" vector will have its lanes
/// indexed from 0, and the second vector will have its first lane indexed at $n.
/// Indices must be in-bounds of either vector at compile time.
///
/// Some SIMD shuffle instructions can be quite slow, so avoiding them by loading data
/// into the desired patterns in advance is preferred, but shuffles are still faster
/// than storing and reloading from memory.
///
/// ```
/// #![feature(portable_simd)]
/// # #[cfg(feature = "std")] use core_simd::Simd;
/// # #[cfg(not(feature = "std"))] use core::simd::Simd;
/// let a = Simd::from_array([1.0, 2.0, 3.0, 4.0]);
/// let b = Simd::from_array([5.0, 6.0, 7.0, 8.0]);
/// const IDXS: [u32; 4] = [4,0,3,7];
/// let c = Simd::<_, 4>::shuffle::<IDXS>(a,b);
/// assert_eq!(Simd::from_array([5.0, 1.0, 4.0, 8.0]), c);
/// ```
#[inline]
pub fn shuffle<const IDX: [u32; $n]>(self, second: Self) -> Self {
unsafe { intrinsics::$fn(self, second, IDX) }
}
/// Reverse the order of the lanes in the vector.
#[inline]
pub fn reverse(self) -> Self {
const fn idx() -> [u32; $n] {
let mut idx = [0u32; $n];
let mut i = 0;
while i < $n {
idx[i] = ($n - i - 1) as u32;
i += 1;
}
idx
}
self.shuffle::<{ idx() }>(self)
}
/// Interleave two vectors.
///
/// Produces two vectors with lanes taken alternately from `self` and `other`.
///
/// The first result contains the first `LANES / 2` lanes from `self` and `other`,
/// alternating, starting with the first lane of `self`.
///
/// The second result contains the last `LANES / 2` lanes from `self` and `other`,
/// alternating, starting with the lane `LANES / 2` from the start of `self`.
///
/// This particular permutation is efficient on many architectures.
///
/// ```
/// #![feature(portable_simd)]
/// # #[cfg(feature = "std")] use core_simd::Simd;
/// # #[cfg(not(feature = "std"))] use core::simd::Simd;
/// let a = Simd::from_array([0, 1, 2, 3]);
/// let b = Simd::from_array([4, 5, 6, 7]);
/// let (x, y) = a.interleave(b);
/// assert_eq!(x.to_array(), [0, 4, 1, 5]);
/// assert_eq!(y.to_array(), [2, 6, 3, 7]);
/// ```
#[inline]
pub fn interleave(self, other: Self) -> (Self, Self) {
const fn lo() -> [u32; $n] {
let mut idx = [0u32; $n];
let mut i = 0;
while i < $n {
let offset = i / 2;
idx[i] = if i % 2 == 0 {
offset
} else {
$n + offset
} as u32;
i += 1;
}
idx
}
const fn hi() -> [u32; $n] {
let mut idx = [0u32; $n];
let mut i = 0;
while i < $n {
let offset = ($n + i) / 2;
idx[i] = if i % 2 == 0 {
offset
} else {
$n + offset
} as u32;
i += 1;
}
idx
}
(self.shuffle::<{ lo() }>(other), self.shuffle::<{ hi() }>(other))
}
/// Deinterleave two vectors.
///
/// The first result takes every other lane of `self` and then `other`, starting with
/// the first lane.
///
/// The second result takes every other lane of `self` and then `other`, starting with
/// the second lane.
///
/// This particular permutation is efficient on many architectures.
///
/// ```
/// #![feature(portable_simd)]
/// # #[cfg(feature = "std")] use core_simd::Simd;
/// # #[cfg(not(feature = "std"))] use core::simd::Simd;
/// let a = Simd::from_array([0, 4, 1, 5]);
/// let b = Simd::from_array([2, 6, 3, 7]);
/// let (x, y) = a.deinterleave(b);
/// assert_eq!(x.to_array(), [0, 1, 2, 3]);
/// assert_eq!(y.to_array(), [4, 5, 6, 7]);
/// ```
#[inline]
pub fn deinterleave(self, other: Self) -> (Self, Self) {
const fn even() -> [u32; $n] {
let mut idx = [0u32; $n];
let mut i = 0;
while i < $n {
idx[i] = 2 * i as u32;
i += 1;
}
idx
}
const fn odd() -> [u32; $n] {
let mut idx = [0u32; $n];
let mut i = 0;
while i < $n {
idx[i] = 1 + 2 * i as u32;
i += 1;
}
idx
}
(self.shuffle::<{ even() }>(other), self.shuffle::<{ odd() }>(other))
}
}
}
}
impl_shuffle_lane! { simd_shuffle2, 2 }
impl_shuffle_lane! { simd_shuffle4, 4 }
impl_shuffle_lane! { simd_shuffle8, 8 }
impl_shuffle_lane! { simd_shuffle16, 16 }
impl_shuffle_lane! { simd_shuffle32, 32 }

View File

@ -0,0 +1,364 @@
use crate::simd::intrinsics;
use crate::{LaneCount, Simd, SimdElement, SupportedLaneCount};
/// Rearrange vector elements.
///
/// A new vector is constructed by specifying the the lanes of the source vector or vectors to use.
///
/// When shuffling one vector, the indices of the result vector are indicated by a `const` array
/// of `usize`, like [`Swizzle`].
/// When shuffling two vectors, the indices are indicated by a `const` array of [`Which`], like
/// [`Swizzle2`].
///
/// # Examples
/// ## One source vector
/// ```
/// # #![feature(portable_simd)]
/// # use core_simd::{Simd, simd_shuffle};
/// let v = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
/// let v = simd_shuffle!(v, [3, 0, 1, 2]);
/// assert_eq!(v.to_array(), [3., 0., 1., 2.]);
/// ```
///
/// ## Two source vectors
/// ```
/// # #![feature(portable_simd)]
/// # use core_simd::{Simd, simd_shuffle, Which};
/// use Which::*;
/// let a = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
/// let b = Simd::<f32, 4>::from_array([4., 5., 6., 7.]);
/// let v = simd_shuffle!(a, b, [First(0), First(1), Second(2), Second(3)]);
/// assert_eq!(v.to_array(), [0., 1., 6., 7.]);
/// ```
#[macro_export]
macro_rules! simd_shuffle {
{
$vector:expr, $index:expr $(,)?
} => {
{
// FIXME this won't work when we are in `core`!
use $crate::Swizzle;
struct Shuffle;
impl Swizzle<{$index.len()}, {$index.len()}> for Shuffle {
const INDEX: [usize; {$index.len()}] = $index;
}
Shuffle::swizzle($vector)
}
};
{
$first:expr, $second:expr, $index:expr $(,)?
} => {
{
// FIXME this won't work when we are in `core`!
use $crate::{Which, Swizzle2};
struct Shuffle;
impl Swizzle2<{$index.len()}, {$index.len()}> for Shuffle {
const INDEX: [Which; {$index.len()}] = $index;
}
Shuffle::swizzle2($first, $second)
}
}
}
/// An index into one of two vectors.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Which {
/// Indexes the first vector.
First(usize),
/// Indexes the second vector.
Second(usize),
}
/// Create a vector from the elements of another vector.
pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
/// Map from the lanes of the input vector to the output vector.
const INDEX: [usize; OUTPUT_LANES];
/// Create a new vector from the lanes of `vector`.
///
/// Lane `i` of the output is `vector[Self::INDEX[i]]`.
fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
where
T: SimdElement,
LaneCount<INPUT_LANES>: SupportedLaneCount,
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
{
unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
}
}
/// Create a vector from the elements of two other vectors.
pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
/// Map from the lanes of the input vectors to the output vector
const INDEX: [Which; OUTPUT_LANES];
/// Create a new vector from the lanes of `first` and `second`.
///
/// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is
/// `Second(j)`.
fn swizzle2<T>(
first: Simd<T, INPUT_LANES>,
second: Simd<T, INPUT_LANES>,
) -> Simd<T, OUTPUT_LANES>
where
T: SimdElement,
LaneCount<INPUT_LANES>: SupportedLaneCount,
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
{
unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
}
}
/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
const INDEX_IMPL: [u32; OUTPUT_LANES];
}
impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES>
for T
where
T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized,
{
const INDEX_IMPL: [u32; OUTPUT_LANES] = {
let mut output = [0; OUTPUT_LANES];
let mut i = 0;
while i < OUTPUT_LANES {
let index = Self::INDEX[i];
assert!(index as u32 as usize == index);
assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
output[i] = index as u32;
i += 1;
}
output
};
}
/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
const INDEX_IMPL: [u32; OUTPUT_LANES];
}
impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES>
for T
where
T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized,
{
const INDEX_IMPL: [u32; OUTPUT_LANES] = {
let mut output = [0; OUTPUT_LANES];
let mut i = 0;
while i < OUTPUT_LANES {
let (offset, index) = match Self::INDEX[i] {
Which::First(index) => (false, index),
Which::Second(index) => (true, index),
};
assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
// lanes are indexed by the first vector, then second vector
let index = if offset { index + INPUT_LANES } else { index };
assert!(index as u32 as usize == index);
output[i] = index as u32;
i += 1;
}
output
};
}
impl<T, const LANES: usize> Simd<T, LANES>
where
T: SimdElement,
LaneCount<LANES>: SupportedLaneCount,
{
/// Reverse the order of the lanes in the vector.
#[inline]
pub fn reverse(self) -> Self {
const fn reverse_index<const LANES: usize>() -> [usize; LANES] {
let mut index = [0; LANES];
let mut i = 0;
while i < LANES {
index[i] = LANES - i - 1;
i += 1;
}
index
}
struct Reverse;
impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
const INDEX: [usize; LANES] = reverse_index::<LANES>();
}
Reverse::swizzle(self)
}
/// Rotates the vector such that the first `OFFSET` elements of the slice move to the end
/// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_left`, the
/// element previously in lane `OFFSET` will become the first element in the slice.
#[inline]
pub fn rotate_left<const OFFSET: usize>(self) -> Self {
const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
let offset = OFFSET % LANES;
let mut index = [0; LANES];
let mut i = 0;
while i < LANES {
index[i] = (i + offset) % LANES;
i += 1;
}
index
}
struct Rotate<const OFFSET: usize>;
impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
}
Rotate::<OFFSET>::swizzle(self)
}
/// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to
/// the end while the last `OFFSET` elements move to the front. After calling `rotate_right`, the
/// element previously at index `LANES - OFFSET` will become the first element in the slice.
#[inline]
pub fn rotate_right<const OFFSET: usize>(self) -> Self {
const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
let offset = LANES - OFFSET % LANES;
let mut index = [0; LANES];
let mut i = 0;
while i < LANES {
index[i] = (i + offset) % LANES;
i += 1;
}
index
}
struct Rotate<const OFFSET: usize>;
impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
}
Rotate::<OFFSET>::swizzle(self)
}
/// Interleave two vectors.
///
/// Produces two vectors with lanes taken alternately from `self` and `other`.
///
/// The first result contains the first `LANES / 2` lanes from `self` and `other`,
/// alternating, starting with the first lane of `self`.
///
/// The second result contains the last `LANES / 2` lanes from `self` and `other`,
/// alternating, starting with the lane `LANES / 2` from the start of `self`.
///
/// This particular permutation is efficient on many architectures.
///
/// ```
/// #![feature(portable_simd)]
/// # use core_simd::Simd;
/// let a = Simd::from_array([0, 1, 2, 3]);
/// let b = Simd::from_array([4, 5, 6, 7]);
/// let (x, y) = a.interleave(b);
/// assert_eq!(x.to_array(), [0, 4, 1, 5]);
/// assert_eq!(y.to_array(), [2, 6, 3, 7]);
/// ```
#[inline]
pub fn interleave(self, other: Self) -> (Self, Self) {
const fn lo<const LANES: usize>() -> [Which; LANES] {
let mut idx = [Which::First(0); LANES];
let mut i = 0;
while i < LANES {
let offset = i / 2;
idx[i] = if i % 2 == 0 {
Which::First(offset)
} else {
Which::Second(offset)
};
i += 1;
}
idx
}
const fn hi<const LANES: usize>() -> [Which; LANES] {
let mut idx = [Which::First(0); LANES];
let mut i = 0;
while i < LANES {
let offset = (LANES + i) / 2;
idx[i] = if i % 2 == 0 {
Which::First(offset)
} else {
Which::Second(offset)
};
i += 1;
}
idx
}
struct Lo;
struct Hi;
impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo {
const INDEX: [Which; LANES] = lo::<LANES>();
}
impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
const INDEX: [Which; LANES] = hi::<LANES>();
}
(Lo::swizzle2(self, other), Hi::swizzle2(self, other))
}
/// Deinterleave two vectors.
///
/// The first result takes every other lane of `self` and then `other`, starting with
/// the first lane.
///
/// The second result takes every other lane of `self` and then `other`, starting with
/// the second lane.
///
/// This particular permutation is efficient on many architectures.
///
/// ```
/// #![feature(portable_simd)]
/// # use core_simd::Simd;
/// let a = Simd::from_array([0, 4, 1, 5]);
/// let b = Simd::from_array([2, 6, 3, 7]);
/// let (x, y) = a.deinterleave(b);
/// assert_eq!(x.to_array(), [0, 1, 2, 3]);
/// assert_eq!(y.to_array(), [4, 5, 6, 7]);
/// ```
#[inline]
pub fn deinterleave(self, other: Self) -> (Self, Self) {
const fn even<const LANES: usize>() -> [Which; LANES] {
let mut idx = [Which::First(0); LANES];
let mut i = 0;
while i < LANES / 2 {
idx[i] = Which::First(2 * i);
idx[i + LANES / 2] = Which::Second(2 * i);
i += 1;
}
idx
}
const fn odd<const LANES: usize>() -> [Which; LANES] {
let mut idx = [Which::First(0); LANES];
let mut i = 0;
while i < LANES / 2 {
idx[i] = Which::First(2 * i + 1);
idx[i + LANES / 2] = Which::Second(2 * i + 1);
i += 1;
}
idx
}
struct Even;
struct Odd;
impl<const LANES: usize> Swizzle2<LANES, LANES> for Even {
const INDEX: [Which; LANES] = even::<LANES>();
}
impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd {
const INDEX: [Which; LANES] = odd::<LANES>();
}
(Even::swizzle2(self, other), Odd::swizzle2(self, other))
}
}

View File

@ -1,37 +0,0 @@
#![feature(portable_simd)]
use core_simd::Simd;
#[cfg(target_arch = "wasm32")]
use wasm_bindgen_test::*;
#[cfg(target_arch = "wasm32")]
wasm_bindgen_test_configure!(run_in_browser);
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn simple_shuffle() {
let a = Simd::from_array([2, 4, 1, 9]);
let b = a;
assert_eq!(a.shuffle::<{ [3, 1, 4, 6] }>(b).to_array(), [9, 4, 2, 1]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn reverse() {
let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
assert_eq!(a.reverse().to_array(), [7, 6, 5, 4, 3, 2, 1, 0]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn interleave() {
let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
let (lo, hi) = a.interleave(b);
assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]);
assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]);
let (even, odd) = lo.deinterleave(hi);
assert_eq!(even, a);
assert_eq!(odd, b);
}

View File

@ -0,0 +1,62 @@
#![feature(portable_simd)]
use core_simd::{Simd, Swizzle};
#[cfg(target_arch = "wasm32")]
use wasm_bindgen_test::*;
#[cfg(target_arch = "wasm32")]
wasm_bindgen_test_configure!(run_in_browser);
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn swizzle() {
struct Index;
impl Swizzle<4, 4> for Index {
const INDEX: [usize; 4] = [2, 1, 3, 0];
}
impl Swizzle<4, 2> for Index {
const INDEX: [usize; 2] = [1, 1];
}
let vector = Simd::from_array([2, 4, 1, 9]);
assert_eq!(Index::swizzle(vector).to_array(), [1, 4, 9, 2]);
assert_eq!(Index::swizzle(vector).to_array(), [4, 4]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn reverse() {
let a = Simd::from_array([1, 2, 3, 4]);
assert_eq!(a.reverse().to_array(), [4, 3, 2, 1]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn rotate() {
let a = Simd::from_array([1, 2, 3, 4]);
assert_eq!(a.rotate_left::<0>().to_array(), [1, 2, 3, 4]);
assert_eq!(a.rotate_left::<1>().to_array(), [2, 3, 4, 1]);
assert_eq!(a.rotate_left::<2>().to_array(), [3, 4, 1, 2]);
assert_eq!(a.rotate_left::<3>().to_array(), [4, 1, 2, 3]);
assert_eq!(a.rotate_left::<4>().to_array(), [1, 2, 3, 4]);
assert_eq!(a.rotate_left::<5>().to_array(), [2, 3, 4, 1]);
assert_eq!(a.rotate_right::<0>().to_array(), [1, 2, 3, 4]);
assert_eq!(a.rotate_right::<1>().to_array(), [4, 1, 2, 3]);
assert_eq!(a.rotate_right::<2>().to_array(), [3, 4, 1, 2]);
assert_eq!(a.rotate_right::<3>().to_array(), [2, 3, 4, 1]);
assert_eq!(a.rotate_right::<4>().to_array(), [1, 2, 3, 4]);
assert_eq!(a.rotate_right::<5>().to_array(), [4, 1, 2, 3]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn interleave() {
let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
let (lo, hi) = a.interleave(b);
assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]);
assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]);
let (even, odd) = lo.deinterleave(hi);
assert_eq!(even, a);
assert_eq!(odd, b);
}