Add SimdArray::gather_{or,or_default,select}

This commit is contained in:
Jubilee Young 2021-06-15 14:38:34 -07:00
parent 2f99cc80d8
commit 128b6f5e22
2 changed files with 69 additions and 1 deletions

View File

@ -1,4 +1,6 @@
use crate::intrinsics;
use crate::masks::*; use crate::masks::*;
use crate::vector::ptr::SimdConstPtr;
use crate::vector::*; use crate::vector::*;
/// A representation of a vector as an "array" with indices, implementing /// A representation of a vector as an "array" with indices, implementing
@ -17,6 +19,70 @@ where
/// Generates a SIMD vector with the same value in every lane. /// Generates a SIMD vector with the same value in every lane.
#[must_use] #[must_use]
fn splat(val: Self::Scalar) -> Self; fn splat(val: Self::Scalar) -> Self;
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
/// If an index is out of bounds, that lane instead selects the value from the "or" vector.
/// ```
/// # use core_simd::*;
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
/// let alt = SimdI32::from_array([-5, -4, -3, -2]);
///
/// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
/// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
/// ```
#[must_use]
#[inline]
fn gather_or(slice: &[Self::Scalar], idxs: SimdUsize<LANES>, or: Self) -> Self {
Self::gather_select(slice, MaskSize::splat(true), idxs, or)
}
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
/// Out-of-bounds indices instead use the default value for that lane (0).
/// ```
/// # use core_simd::*;
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
///
/// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
/// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
/// ```
#[must_use]
#[inline]
fn gather_or_default(slice: &[Self::Scalar], idxs: SimdUsize<LANES>) -> Self
where
Self::Scalar: Default,
{
Self::gather_or(slice, idxs, Self::splat(Self::Scalar::default()))
}
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
/// Out-of-bounds or masked indices instead select the value from the "or" vector.
/// ```
/// # use core_simd::*;
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
/// let alt = SimdI32::from_array([-5, -4, -3, -2]);
/// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
///
/// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
/// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
/// ```
#[must_use]
#[inline]
fn gather_select(
slice: &[Self::Scalar],
mask: MaskSize<LANES>,
idxs: SimdUsize<LANES>,
or: Self,
) -> Self {
let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
let base_ptr = SimdConstPtr::splat(slice.as_ptr());
// Ferris forgive me, I have done pointer arithmetic here.
let ptrs = base_ptr.wrapping_add(idxs);
// SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
unsafe { intrinsics::simd_gather(or, ptrs, mask) }
}
} }
macro_rules! impl_simdarray_for { macro_rules! impl_simdarray_for {

View File

@ -45,7 +45,7 @@ extern "platform-intrinsic" {
/// fabs /// fabs
pub(crate) fn simd_fabs<T>(x: T) -> T; pub(crate) fn simd_fabs<T>(x: T) -> T;
/// fsqrt /// fsqrt
pub(crate) fn simd_fsqrt<T>(x: T) -> T; pub(crate) fn simd_fsqrt<T>(x: T) -> T;
@ -63,6 +63,8 @@ extern "platform-intrinsic" {
pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U; pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U; pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
// {s,u}add.sat // {s,u}add.sat
pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T; pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;