This commit is contained in:
commit
cde7bdc678
@ -2,5 +2,6 @@
|
||||
|
||||
members = [
|
||||
"crates/core_simd",
|
||||
"crates/std_float",
|
||||
"crates/test_helpers",
|
||||
]
|
||||
|
@ -26,3 +26,6 @@ features = ["alloc"]
|
||||
|
||||
[dev-dependencies.test_helpers]
|
||||
path = "../test_helpers"
|
||||
|
||||
[dev-dependencies]
|
||||
std_float = { path = "../std_float/", features = ["as_crate"] }
|
||||
|
@ -1,11 +1,13 @@
|
||||
#![cfg_attr(feature = "std", feature(portable_simd))]
|
||||
#![feature(portable_simd)]
|
||||
extern crate std_float;
|
||||
|
||||
/// Benchmarks game nbody code
|
||||
/// Taken from the `packed_simd` crate
|
||||
/// Run this benchmark with `cargo test --example nbody`
|
||||
#[cfg(feature = "std")]
|
||||
mod nbody {
|
||||
use core_simd::*;
|
||||
use core_simd::simd::*;
|
||||
#[allow(unused)] // False positive?
|
||||
use std_float::StdFloat;
|
||||
|
||||
use std::f64::consts::PI;
|
||||
const SOLAR_MASS: f64 = 4.0 * PI * PI;
|
||||
@ -167,7 +169,6 @@ pub fn run(n: usize) -> (f64, f64) {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
// Good enough for demonstration purposes, not going for strictness here.
|
||||
@ -184,7 +185,6 @@ fn test() {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
#[cfg(feature = "std")]
|
||||
{
|
||||
let (energy_before, energy_after) = nbody::run(1000);
|
||||
println!("Energy before: {}", energy_before);
|
||||
|
@ -39,6 +39,10 @@
|
||||
|
||||
/// fptoui/fptosi/uitofp/sitofp
|
||||
pub(crate) fn simd_cast<T, U>(x: T) -> U;
|
||||
/// follows Rust's `T as U` semantics, including saturating float casts
|
||||
/// which amounts to the same as `simd_cast` for many cases
|
||||
#[cfg(not(bootstrap))]
|
||||
pub(crate) fn simd_as<T, U>(x: T) -> U;
|
||||
|
||||
/// neg/fneg
|
||||
pub(crate) fn simd_neg<T>(x: T) -> T;
|
||||
@ -46,6 +50,10 @@
|
||||
/// fabs
|
||||
pub(crate) fn simd_fabs<T>(x: T) -> T;
|
||||
|
||||
// minnum/maxnum
|
||||
pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
|
||||
pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
|
||||
|
||||
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
|
||||
@ -87,29 +95,3 @@
|
||||
#[allow(unused)]
|
||||
pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
mod std {
|
||||
extern "platform-intrinsic" {
|
||||
// ceil
|
||||
pub(crate) fn simd_ceil<T>(x: T) -> T;
|
||||
|
||||
// floor
|
||||
pub(crate) fn simd_floor<T>(x: T) -> T;
|
||||
|
||||
// round
|
||||
pub(crate) fn simd_round<T>(x: T) -> T;
|
||||
|
||||
// trunc
|
||||
pub(crate) fn simd_trunc<T>(x: T) -> T;
|
||||
|
||||
// fsqrt
|
||||
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
|
||||
|
||||
// fma
|
||||
pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub(crate) use crate::simd::intrinsics::std::*;
|
||||
|
@ -12,9 +12,10 @@
|
||||
)]
|
||||
mod mask_impl;
|
||||
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
|
||||
use core::cmp::Ordering;
|
||||
use core::fmt;
|
||||
use core::{fmt, mem};
|
||||
|
||||
mod sealed {
|
||||
use super::*;
|
||||
@ -105,22 +106,39 @@ pub fn splat(value: bool) -> Self {
|
||||
Self(mask_impl::Mask::splat(value))
|
||||
}
|
||||
|
||||
/// Converts an array to a SIMD vector.
|
||||
/// Converts an array of bools to a SIMD mask.
|
||||
pub fn from_array(array: [bool; LANES]) -> Self {
|
||||
let mut vector = Self::splat(false);
|
||||
for (i, v) in array.iter().enumerate() {
|
||||
vector.set(i, *v);
|
||||
// SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
|
||||
// true: 0b_0000_0001
|
||||
// false: 0b_0000_0000
|
||||
// Thus, an array of bools is also a valid array of bytes: [u8; N]
|
||||
// This would be hypothetically valid as an "in-place" transmute,
|
||||
// but these are "dependently-sized" types, so copy elision it is!
|
||||
unsafe {
|
||||
let bytes: [u8; LANES] = mem::transmute_copy(&array);
|
||||
let bools: Simd<i8, LANES> =
|
||||
intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
|
||||
Mask::from_int_unchecked(intrinsics::simd_cast(bools))
|
||||
}
|
||||
vector
|
||||
}
|
||||
|
||||
/// Converts a SIMD vector to an array.
|
||||
/// Converts a SIMD mask to an array of bools.
|
||||
pub fn to_array(self) -> [bool; LANES] {
|
||||
let mut array = [false; LANES];
|
||||
for (i, v) in array.iter_mut().enumerate() {
|
||||
*v = self.test(i);
|
||||
// This follows mostly the same logic as from_array.
|
||||
// SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
|
||||
// true: 0b_0000_0001
|
||||
// false: 0b_0000_0000
|
||||
// Thus, an array of bools is also a valid array of bytes: [u8; N]
|
||||
// Since our masks are equal to integers where all bits are set,
|
||||
// we can simply convert them to i8s, and then bitand them by the
|
||||
// bitpattern for Rust's "true" bool.
|
||||
// This would be hypothetically valid as an "in-place" transmute,
|
||||
// but these are "dependently-sized" types, so copy elision it is!
|
||||
unsafe {
|
||||
let mut bytes: Simd<i8, LANES> = intrinsics::simd_cast(self.to_int());
|
||||
bytes &= Simd::splat(1i8);
|
||||
mem::transmute_copy(&bytes)
|
||||
}
|
||||
array
|
||||
}
|
||||
|
||||
/// Converts a vector of integers to a mask, where 0 represents `false` and -1
|
||||
@ -516,7 +534,7 @@ fn bitxor_assign(&mut self, rhs: bool) {
|
||||
pub type mask16x16 = Mask<i16, 16>;
|
||||
|
||||
/// Vector of 32 16-bit masks
|
||||
pub type mask16x32 = Mask<i32, 32>;
|
||||
pub type mask16x32 = Mask<i16, 32>;
|
||||
|
||||
/// Vector of two 32-bit masks
|
||||
pub type mask32x2 = Mask<i32, 2>;
|
||||
|
@ -1,4 +1,3 @@
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
|
||||
use core::ops::{Add, Mul};
|
||||
use core::ops::{BitAnd, BitOr, BitXor};
|
||||
@ -32,232 +31,211 @@ fn index_mut(&mut self, index: I) -> &mut Self::Output {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the right-hand side argument of a left- or right-shift would cause overflow.
|
||||
fn invalid_shift_rhs<T>(rhs: T) -> bool
|
||||
where
|
||||
T: Default + PartialOrd + core::convert::TryFrom<usize>,
|
||||
<T as core::convert::TryFrom<usize>>::Error: core::fmt::Debug,
|
||||
{
|
||||
let bits_in_type = T::try_from(8 * core::mem::size_of::<T>()).unwrap();
|
||||
rhs < T::default() || rhs >= bits_in_type
|
||||
macro_rules! unsafe_base {
|
||||
($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
|
||||
unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
|
||||
};
|
||||
}
|
||||
|
||||
/// Automatically implements operators over references in addition to the provided operator.
|
||||
macro_rules! impl_ref_ops {
|
||||
// binary op
|
||||
{
|
||||
impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
|
||||
where
|
||||
LaneCount<$lanes2:ident>: SupportedLaneCount,
|
||||
/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
|
||||
/// It handles performing a bitand in addition to calling the shift operator, so that the result
|
||||
/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= <Int>::BITS
|
||||
/// At worst, this will maybe add another instruction and cycle,
|
||||
/// at best, it may open up more optimization opportunities,
|
||||
/// or simply be elided entirely, especially for SIMD ISAs which default to this.
|
||||
///
|
||||
// FIXME: Consider implementing this in cg_llvm instead?
|
||||
// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
|
||||
macro_rules! wrap_bitshift {
|
||||
($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
|
||||
unsafe {
|
||||
$crate::simd::intrinsics::$simd_call(
|
||||
$lhs,
|
||||
$rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
|
||||
)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Division by zero is poison, according to LLVM.
|
||||
// So is dividing the MIN value of a signed integer by -1,
|
||||
// since that would return MAX + 1.
|
||||
// FIXME: Rust allows <SInt>::MIN / -1,
|
||||
// so we should probably figure out how to make that safe.
|
||||
macro_rules! int_divrem_guard {
|
||||
( $lhs:ident,
|
||||
$rhs:ident,
|
||||
{ const PANIC_ZERO: &'static str = $zero:literal;
|
||||
const PANIC_OVERFLOW: &'static str = $overflow:literal;
|
||||
$simd_call:ident
|
||||
},
|
||||
$int:ident ) => {
|
||||
if $rhs.lanes_eq(Simd::splat(0)).any() {
|
||||
panic!($zero);
|
||||
} else if <$int>::MIN != 0
|
||||
&& ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
|
||||
// type inference can break here, so cut an SInt to size
|
||||
& $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
|
||||
{
|
||||
type Output = $output:ty;
|
||||
|
||||
$(#[$attrs:meta])*
|
||||
fn $fn:ident($self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) -> Self::Output $body:tt
|
||||
}
|
||||
} => {
|
||||
impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
|
||||
where
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
type Output = $output;
|
||||
|
||||
$(#[$attrs])*
|
||||
fn $fn($self_tok, $rhs_arg: $rhs_arg_ty) -> Self::Output $body
|
||||
panic!($overflow);
|
||||
} else {
|
||||
unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Automatically implements operators over vectors and scalars for a particular vector.
|
||||
macro_rules! impl_op {
|
||||
{ impl Add for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, Add::add, simd_add }
|
||||
};
|
||||
{ impl Sub for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, Sub::sub, simd_sub }
|
||||
};
|
||||
{ impl Mul for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, Mul::mul, simd_mul }
|
||||
};
|
||||
{ impl Div for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, Div::div, simd_div }
|
||||
};
|
||||
{ impl Rem for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, Rem::rem, simd_rem }
|
||||
};
|
||||
{ impl Shl for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, Shl::shl, simd_shl }
|
||||
};
|
||||
{ impl Shr for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, Shr::shr, simd_shr }
|
||||
};
|
||||
{ impl BitAnd for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, BitAnd::bitand, simd_and }
|
||||
};
|
||||
{ impl BitOr for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, BitOr::bitor, simd_or }
|
||||
};
|
||||
{ impl BitXor for $scalar:ty } => {
|
||||
impl_op! { @binary $scalar, BitXor::bitxor, simd_xor }
|
||||
};
|
||||
macro_rules! for_base_types {
|
||||
( T = ($($scalar:ident),*);
|
||||
type Lhs = Simd<T, N>;
|
||||
type Rhs = Simd<T, N>;
|
||||
type Output = $out:ty;
|
||||
|
||||
// generic binary op with assignment when output is `Self`
|
||||
{ @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $intrinsic:ident } => {
|
||||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::$trait<Self> for Simd<$scalar, LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
impl $op:ident::$call:ident {
|
||||
$macro_impl:ident $inner:tt
|
||||
}) => {
|
||||
$(
|
||||
impl<const N: usize> $op<Self> for Simd<$scalar, N>
|
||||
where
|
||||
$scalar: SimdElement,
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
type Output = $out;
|
||||
|
||||
#[inline]
|
||||
fn $trait_fn(self, rhs: Self) -> Self::Output {
|
||||
unsafe {
|
||||
intrinsics::$intrinsic(self, rhs)
|
||||
#[inline]
|
||||
#[must_use = "operator returns a new vector without mutating the inputs"]
|
||||
fn $call(self, rhs: Self) -> Self::Output {
|
||||
$macro_impl!(self, rhs, $inner, $scalar)
|
||||
}
|
||||
}
|
||||
}
|
||||
})*
|
||||
}
|
||||
}
|
||||
|
||||
// A "TokenTree muncher": takes a set of scalar types `T = {};`
|
||||
// type parameters for the ops it implements, `Op::fn` names,
|
||||
// and a macro that expands into an expr, substituting in an intrinsic.
|
||||
// It passes that to for_base_types, which expands an impl for the types,
|
||||
// using the expanded expr in the function, and recurses with itself.
|
||||
//
|
||||
// tl;dr impls a set of ops::{Traits} for a set of types
|
||||
macro_rules! for_base_ops {
|
||||
(
|
||||
T = $types:tt;
|
||||
type Lhs = Simd<T, N>;
|
||||
type Rhs = Simd<T, N>;
|
||||
type Output = $out:ident;
|
||||
impl $op:ident::$call:ident
|
||||
$inner:tt
|
||||
$($rest:tt)*
|
||||
) => {
|
||||
for_base_types! {
|
||||
T = $types;
|
||||
type Lhs = Simd<T, N>;
|
||||
type Rhs = Simd<T, N>;
|
||||
type Output = $out;
|
||||
impl $op::$call
|
||||
$inner
|
||||
}
|
||||
for_base_ops! {
|
||||
T = $types;
|
||||
type Lhs = Simd<T, N>;
|
||||
type Rhs = Simd<T, N>;
|
||||
type Output = $out;
|
||||
$($rest)*
|
||||
}
|
||||
};
|
||||
($($done:tt)*) => {
|
||||
// Done.
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements floating-point operators for the provided types.
|
||||
macro_rules! impl_float_ops {
|
||||
{ $($scalar:ty),* } => {
|
||||
$(
|
||||
impl_op! { impl Add for $scalar }
|
||||
impl_op! { impl Sub for $scalar }
|
||||
impl_op! { impl Mul for $scalar }
|
||||
impl_op! { impl Div for $scalar }
|
||||
impl_op! { impl Rem for $scalar }
|
||||
)*
|
||||
};
|
||||
// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
|
||||
// For all of these operations, simd_* intrinsics apply wrapping logic.
|
||||
for_base_ops! {
|
||||
T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
|
||||
type Lhs = Simd<T, N>;
|
||||
type Rhs = Simd<T, N>;
|
||||
type Output = Self;
|
||||
|
||||
impl Add::add {
|
||||
unsafe_base { simd_add }
|
||||
}
|
||||
|
||||
impl Mul::mul {
|
||||
unsafe_base { simd_mul }
|
||||
}
|
||||
|
||||
impl Sub::sub {
|
||||
unsafe_base { simd_sub }
|
||||
}
|
||||
|
||||
impl BitAnd::bitand {
|
||||
unsafe_base { simd_and }
|
||||
}
|
||||
|
||||
impl BitOr::bitor {
|
||||
unsafe_base { simd_or }
|
||||
}
|
||||
|
||||
impl BitXor::bitxor {
|
||||
unsafe_base { simd_xor }
|
||||
}
|
||||
|
||||
impl Div::div {
|
||||
int_divrem_guard {
|
||||
const PANIC_ZERO: &'static str = "attempt to divide by zero";
|
||||
const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
|
||||
simd_div
|
||||
}
|
||||
}
|
||||
|
||||
impl Rem::rem {
|
||||
int_divrem_guard {
|
||||
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
|
||||
const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
|
||||
simd_rem
|
||||
}
|
||||
}
|
||||
|
||||
// The only question is how to handle shifts >= <Int>::BITS?
|
||||
// Our current solution uses wrapping logic.
|
||||
impl Shl::shl {
|
||||
wrap_bitshift { simd_shl }
|
||||
}
|
||||
|
||||
impl Shr::shr {
|
||||
wrap_bitshift {
|
||||
// This automatically monomorphizes to lshr or ashr, depending,
|
||||
// so it's fine to use it for both UInts and SInts.
|
||||
simd_shr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements unsigned integer operators for the provided types.
|
||||
macro_rules! impl_unsigned_int_ops {
|
||||
{ $($scalar:ty),* } => {
|
||||
$(
|
||||
impl_op! { impl Add for $scalar }
|
||||
impl_op! { impl Sub for $scalar }
|
||||
impl_op! { impl Mul for $scalar }
|
||||
impl_op! { impl BitAnd for $scalar }
|
||||
impl_op! { impl BitOr for $scalar }
|
||||
impl_op! { impl BitXor for $scalar }
|
||||
// We don't need any special precautions here:
|
||||
// Floats always accept arithmetic ops, but may become NaN.
|
||||
for_base_ops! {
|
||||
T = (f32, f64);
|
||||
type Lhs = Simd<T, N>;
|
||||
type Rhs = Simd<T, N>;
|
||||
type Output = Self;
|
||||
|
||||
// Integers panic on divide by 0
|
||||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Div<Self> for Simd<$scalar, LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
impl Add::add {
|
||||
unsafe_base { simd_add }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn div(self, rhs: Self) -> Self::Output {
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.any(|x| *x == 0)
|
||||
{
|
||||
panic!("attempt to divide by zero");
|
||||
}
|
||||
impl Mul::mul {
|
||||
unsafe_base { simd_mul }
|
||||
}
|
||||
|
||||
// Guards for div(MIN, -1),
|
||||
// this check only applies to signed ints
|
||||
if <$scalar>::MIN != 0 && self.as_array().iter()
|
||||
.zip(rhs.as_array().iter())
|
||||
.any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
|
||||
panic!("attempt to divide with overflow");
|
||||
}
|
||||
unsafe { intrinsics::simd_div(self, rhs) }
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Sub::sub {
|
||||
unsafe_base { simd_sub }
|
||||
}
|
||||
|
||||
// remainder panics on zero divisor
|
||||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Rem<Self> for Simd<$scalar, LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
impl Div::div {
|
||||
unsafe_base { simd_div }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn rem(self, rhs: Self) -> Self::Output {
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.any(|x| *x == 0)
|
||||
{
|
||||
panic!("attempt to calculate the remainder with a divisor of zero");
|
||||
}
|
||||
|
||||
// Guards for rem(MIN, -1)
|
||||
// this branch applies the check only to signed ints
|
||||
if <$scalar>::MIN != 0 && self.as_array().iter()
|
||||
.zip(rhs.as_array().iter())
|
||||
.any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
|
||||
panic!("attempt to calculate the remainder with overflow");
|
||||
}
|
||||
unsafe { intrinsics::simd_rem(self, rhs) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shifts panic on overflow
|
||||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Shl<Self> for Simd<$scalar, LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn shl(self, rhs: Self) -> Self::Output {
|
||||
// TODO there is probably a better way of doing this
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.copied()
|
||||
.any(invalid_shift_rhs)
|
||||
{
|
||||
panic!("attempt to shift left with overflow");
|
||||
}
|
||||
unsafe { intrinsics::simd_shl(self, rhs) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Shr<Self> for Simd<$scalar, LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn shr(self, rhs: Self) -> Self::Output {
|
||||
// TODO there is probably a better way of doing this
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.copied()
|
||||
.any(invalid_shift_rhs)
|
||||
{
|
||||
panic!("attempt to shift with overflow");
|
||||
}
|
||||
unsafe { intrinsics::simd_shr(self, rhs) }
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
impl Rem::rem {
|
||||
unsafe_base { simd_rem }
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements unsigned integer operators for the provided types.
|
||||
macro_rules! impl_signed_int_ops {
|
||||
{ $($scalar:ty),* } => {
|
||||
impl_unsigned_int_ops! { $($scalar),* }
|
||||
};
|
||||
}
|
||||
|
||||
impl_unsigned_int_ops! { u8, u16, u32, u64, usize }
|
||||
impl_signed_int_ops! { i8, i16, i32, i64, isize }
|
||||
impl_float_ops! { f32, f64 }
|
||||
|
@ -5,47 +5,6 @@ macro_rules! implement {
|
||||
{
|
||||
$type:ty, $int_type:ty
|
||||
} => {
|
||||
#[cfg(feature = "std")]
|
||||
impl<const LANES: usize> Simd<$type, LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
/// Returns the smallest integer greater than or equal to each lane.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
pub fn ceil(self) -> Self {
|
||||
unsafe { intrinsics::simd_ceil(self) }
|
||||
}
|
||||
|
||||
/// Returns the largest integer value less than or equal to each lane.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
pub fn floor(self) -> Self {
|
||||
unsafe { intrinsics::simd_floor(self) }
|
||||
}
|
||||
|
||||
/// Rounds to the nearest integer value. Ties round toward zero.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
pub fn round(self) -> Self {
|
||||
unsafe { intrinsics::simd_round(self) }
|
||||
}
|
||||
|
||||
/// Returns the floating point's integer value, with its fractional part removed.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
pub fn trunc(self) -> Self {
|
||||
unsafe { intrinsics::simd_trunc(self) }
|
||||
}
|
||||
|
||||
/// Returns the floating point's fractional value, with its integer part removed.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
pub fn fract(self) -> Self {
|
||||
self - self.trunc()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Simd<$type, LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
|
@ -75,6 +75,36 @@ pub const fn from_slice(slice: &[T]) -> Self {
|
||||
Self(array)
|
||||
}
|
||||
|
||||
/// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type.
|
||||
/// This follows the semantics of Rust's `as` conversion for casting
|
||||
/// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`),
|
||||
/// and from floats to integers (truncating, or saturating at the limits) for each lane,
|
||||
/// or vice versa.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "std")] use core_simd::Simd;
|
||||
/// # #[cfg(not(feature = "std"))] use core::simd::Simd;
|
||||
/// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
|
||||
/// let ints = floats.cast::<i32>();
|
||||
/// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
|
||||
///
|
||||
/// // Formally equivalent, but `Simd::cast` can optimize better.
|
||||
/// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32)));
|
||||
///
|
||||
/// // The float conversion does not round-trip.
|
||||
/// let floats_again = ints.cast();
|
||||
/// assert_ne!(floats, floats_again);
|
||||
/// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0]));
|
||||
/// ```
|
||||
#[must_use]
|
||||
#[inline]
|
||||
#[cfg(not(bootstrap))]
|
||||
pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
|
||||
unsafe { intrinsics::simd_as(self) }
|
||||
}
|
||||
|
||||
/// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
|
||||
/// If an index is out-of-bounds, the lane is instead selected from the `or` vector.
|
||||
///
|
||||
|
@ -38,29 +38,6 @@ pub fn abs(self) -> Self {
|
||||
unsafe { intrinsics::simd_fabs(self) }
|
||||
}
|
||||
|
||||
/// Fused multiply-add. Computes `(self * a) + b` with only one rounding error,
|
||||
/// yielding a more accurate result than an unfused multiply-add.
|
||||
///
|
||||
/// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
|
||||
/// architecture has a dedicated `fma` CPU instruction. However, this is not always
|
||||
/// true, and will be heavily dependent on designing algorithms with specific target
|
||||
/// hardware in mind.
|
||||
#[cfg(feature = "std")]
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn mul_add(self, a: Self, b: Self) -> Self {
|
||||
unsafe { intrinsics::simd_fma(self, a, b) }
|
||||
}
|
||||
|
||||
/// Produces a vector where every lane has the square root value
|
||||
/// of the equivalently-indexed lane in `self`
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[cfg(feature = "std")]
|
||||
pub fn sqrt(self) -> Self {
|
||||
unsafe { intrinsics::simd_fsqrt(self) }
|
||||
}
|
||||
|
||||
/// Takes the reciprocal (inverse) of each lane, `1/x`.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
@ -128,8 +105,8 @@ pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> {
|
||||
self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0))
|
||||
}
|
||||
|
||||
/// Returns true for each lane if its value is neither neither zero, infinite,
|
||||
/// subnormal, or `NaN`.
|
||||
/// Returns true for each lane if its value is neither zero, infinite,
|
||||
/// subnormal, nor `NaN`.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn is_normal(self) -> Mask<$mask_ty, LANES> {
|
||||
@ -164,11 +141,7 @@ pub fn copysign(self, sign: Self) -> Self {
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn min(self, other: Self) -> Self {
|
||||
// TODO consider using an intrinsic
|
||||
self.is_nan().select(
|
||||
other,
|
||||
self.lanes_ge(other).select(other, self)
|
||||
)
|
||||
unsafe { intrinsics::simd_fmin(self, other) }
|
||||
}
|
||||
|
||||
/// Returns the maximum of each lane.
|
||||
@ -177,11 +150,7 @@ pub fn min(self, other: Self) -> Self {
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn max(self, other: Self) -> Self {
|
||||
// TODO consider using an intrinsic
|
||||
self.is_nan().select(
|
||||
other,
|
||||
self.lanes_le(other).select(other, self)
|
||||
)
|
||||
unsafe { intrinsics::simd_fmax(self, other) }
|
||||
}
|
||||
|
||||
/// Restrict each lane to a certain interval unless it is NaN.
|
||||
|
37
library/portable-simd/crates/core_simd/tests/cast.rs
Normal file
37
library/portable-simd/crates/core_simd/tests/cast.rs
Normal file
@ -0,0 +1,37 @@
|
||||
#![feature(portable_simd)]
|
||||
macro_rules! cast_types {
|
||||
($start:ident, $($target:ident),*) => {
|
||||
mod $start {
|
||||
use core_simd::simd::Simd;
|
||||
type Vector<const N: usize> = Simd<$start, N>;
|
||||
$(
|
||||
mod $target {
|
||||
use super::*;
|
||||
test_helpers::test_lanes! {
|
||||
fn cast_as<const N: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<N>::cast::<$target>,
|
||||
&|x| x as $target,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// The hypothesis is that widening conversions aren't terribly interesting.
|
||||
cast_types!(f32, f64, i8, u8, usize, isize);
|
||||
cast_types!(f64, f32, i8, u8, usize, isize);
|
||||
cast_types!(i8, u8, f32);
|
||||
cast_types!(u8, i8, f32);
|
||||
cast_types!(i16, u16, i8, u8, f32);
|
||||
cast_types!(u16, i16, i8, u8, f32);
|
||||
cast_types!(i32, u32, i8, u8, f32, f64);
|
||||
cast_types!(u32, i32, i8, u8, f32, f64);
|
||||
cast_types!(i64, u64, i8, u8, isize, usize, f32, f64);
|
||||
cast_types!(u64, i64, i8, u8, isize, usize, f32, f64);
|
||||
cast_types!(isize, usize, i8, u8, f32, f64);
|
||||
cast_types!(usize, isize, i8, u8, f32, f64);
|
@ -546,6 +546,8 @@ fn horizontal_min<const LANES: usize>() {
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
mod std {
|
||||
use std_float::StdFloat;
|
||||
|
||||
use super::*;
|
||||
test_helpers::test_lanes! {
|
||||
fn sqrt<const LANES: usize>() {
|
||||
|
@ -3,6 +3,8 @@
|
||||
macro_rules! float_rounding_test {
|
||||
{ $scalar:tt, $int_scalar:tt } => {
|
||||
mod $scalar {
|
||||
use std_float::StdFloat;
|
||||
|
||||
type Vector<const LANES: usize> = core_simd::Simd<$scalar, LANES>;
|
||||
type Scalar = $scalar;
|
||||
type IntScalar = $int_scalar;
|
||||
|
13
library/portable-simd/crates/std_float/Cargo.toml
Normal file
13
library/portable-simd/crates/std_float/Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "std_float"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
core_simd = { path = "../core_simd" }
|
||||
|
||||
[features]
|
||||
default = ["as_crate"]
|
||||
as_crate = []
|
165
library/portable-simd/crates/std_float/src/lib.rs
Normal file
165
library/portable-simd/crates/std_float/src/lib.rs
Normal file
@ -0,0 +1,165 @@
|
||||
#![cfg_attr(feature = "as_crate", no_std)] // We are std!
|
||||
#![cfg_attr(
|
||||
feature = "as_crate",
|
||||
feature(platform_intrinsics),
|
||||
feature(portable_simd)
|
||||
)]
|
||||
#[cfg(not(feature = "as_crate"))]
|
||||
use core::simd;
|
||||
#[cfg(feature = "as_crate")]
|
||||
use core_simd::simd;
|
||||
|
||||
use simd::{LaneCount, Simd, SupportedLaneCount};
|
||||
|
||||
#[cfg(feature = "as_crate")]
|
||||
mod experimental {
|
||||
pub trait Sealed {}
|
||||
}
|
||||
|
||||
#[cfg(feature = "as_crate")]
|
||||
use experimental as sealed;
|
||||
|
||||
use crate::sealed::Sealed;
|
||||
|
||||
// "platform intrinsics" are essentially "codegen intrinsics"
|
||||
// each of these may be scalarized and lowered to a libm call
|
||||
extern "platform-intrinsic" {
|
||||
// ceil
|
||||
fn simd_ceil<T>(x: T) -> T;
|
||||
|
||||
// floor
|
||||
fn simd_floor<T>(x: T) -> T;
|
||||
|
||||
// round
|
||||
fn simd_round<T>(x: T) -> T;
|
||||
|
||||
// trunc
|
||||
fn simd_trunc<T>(x: T) -> T;
|
||||
|
||||
// fsqrt
|
||||
fn simd_fsqrt<T>(x: T) -> T;
|
||||
|
||||
// fma
|
||||
fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
||||
}
|
||||
|
||||
/// This trait provides a possibly-temporary implementation of float functions
|
||||
/// that may, in the absence of hardware support, canonicalize to calling an
|
||||
/// operating system's `math.h` dynamically-loaded library (also known as a
|
||||
/// shared object). As these conditionally require runtime support, they
|
||||
/// should only appear in binaries built assuming OS support: `std`.
|
||||
///
|
||||
/// However, there is no reason SIMD types, in general, need OS support,
|
||||
/// as for many architectures an embedded binary may simply configure that
|
||||
/// support itself. This means these types must be visible in `core`
|
||||
/// but have these functions available in `std`.
|
||||
///
|
||||
/// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but
|
||||
/// due to compiler limitations, it is harder to implement this approach for
|
||||
/// abstract data types like [`Simd`]. From that need, this trait is born.
|
||||
///
|
||||
/// It is possible this trait will be replaced in some manner in the future,
|
||||
/// when either the compiler or its supporting runtime functions are improved.
|
||||
/// For now this trait is available to permit experimentation with SIMD float
|
||||
/// operations that may lack hardware support, such as `mul_add`.
|
||||
pub trait StdFloat: Sealed + Sized {
|
||||
/// Fused multiply-add. Computes `(self * a) + b` with only one rounding error,
|
||||
/// yielding a more accurate result than an unfused multiply-add.
|
||||
///
|
||||
/// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
|
||||
/// architecture has a dedicated `fma` CPU instruction. However, this is not always
|
||||
/// true, and will be heavily dependent on designing algorithms with specific target
|
||||
/// hardware in mind.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
fn mul_add(self, a: Self, b: Self) -> Self {
|
||||
unsafe { simd_fma(self, a, b) }
|
||||
}
|
||||
|
||||
/// Produces a vector where every lane has the square root value
|
||||
/// of the equivalently-indexed lane in `self`
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
fn sqrt(self) -> Self {
|
||||
unsafe { simd_fsqrt(self) }
|
||||
}
|
||||
|
||||
/// Returns the smallest integer greater than or equal to each lane.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn ceil(self) -> Self {
|
||||
unsafe { simd_ceil(self) }
|
||||
}
|
||||
|
||||
/// Returns the largest integer value less than or equal to each lane.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn floor(self) -> Self {
|
||||
unsafe { simd_floor(self) }
|
||||
}
|
||||
|
||||
/// Rounds to the nearest integer value. Ties round toward zero.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn round(self) -> Self {
|
||||
unsafe { simd_round(self) }
|
||||
}
|
||||
|
||||
/// Returns the floating point's integer value, with its fractional part removed.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn trunc(self) -> Self {
|
||||
unsafe { simd_trunc(self) }
|
||||
}
|
||||
|
||||
/// Returns the floating point's fractional value, with its integer part removed.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
fn fract(self) -> Self;
|
||||
}
|
||||
|
||||
impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
|
||||
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
|
||||
|
||||
// We can safely just use all the defaults.
|
||||
impl<const N: usize> StdFloat for Simd<f32, N>
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
/// Returns the floating point's fractional value, with its integer part removed.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn fract(self) -> Self {
|
||||
self - self.trunc()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> StdFloat for Simd<f64, N>
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
/// Returns the floating point's fractional value, with its integer part removed.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn fract(self) -> Self {
|
||||
self - self.trunc()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use simd::*;
|
||||
|
||||
#[test]
|
||||
fn everything_works() {
|
||||
let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]);
|
||||
let x2 = x + x;
|
||||
let _xc = x.ceil();
|
||||
let _xf = x.floor();
|
||||
let _xr = x.round();
|
||||
let _xt = x.trunc();
|
||||
let _xfma = x.mul_add(x, x);
|
||||
let _xsqrt = x.sqrt();
|
||||
let _ = x2.abs() * x2;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user