Auto merge of #3055 - eduardosm:x86-sse2-intrinsics, r=RalfJung
Implement some `llvm.x86.sse2.*` intrinsics and add tests Continuation of https://github.com/rust-lang/miri/pull/2989 with SSE2 intrinsics. Thankfully, a significant amount of SSE2 functions use `simd_*` intrinsics, which are already implemented in Miri.
This commit is contained in:
commit
8cd31eadba
@ -1037,6 +1037,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
this, link_name, abi, args, dest,
|
||||
);
|
||||
}
|
||||
name if name.starts_with("llvm.x86.sse2.") => {
|
||||
return shims::x86::sse2::EvalContextExt::emulate_x86_sse2_intrinsic(
|
||||
this, link_name, abi, args, dest,
|
||||
);
|
||||
}
|
||||
|
||||
// Platform-specific shims
|
||||
_ =>
|
||||
|
@ -1 +1,45 @@
|
||||
use crate::InterpResult;
|
||||
|
||||
pub(super) mod sse;
|
||||
pub(super) mod sse2;
|
||||
|
||||
/// Floating point comparison operation
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/cmpss>
|
||||
/// <https://www.felixcloutier.com/x86/cmpps>
|
||||
/// <https://www.felixcloutier.com/x86/cmpsd>
|
||||
/// <https://www.felixcloutier.com/x86/cmppd>
|
||||
#[derive(Copy, Clone)]
|
||||
enum FloatCmpOp {
|
||||
Eq,
|
||||
Lt,
|
||||
Le,
|
||||
Unord,
|
||||
Neq,
|
||||
/// Not less-than
|
||||
Nlt,
|
||||
/// Not less-or-equal
|
||||
Nle,
|
||||
/// Ordered, i.e. neither of them is NaN
|
||||
Ord,
|
||||
}
|
||||
|
||||
impl FloatCmpOp {
|
||||
/// Convert from the `imm` argument used to specify the comparison
|
||||
/// operation in intrinsics such as `llvm.x86.sse.cmp.ss`.
|
||||
fn from_intrinsic_imm(imm: i8, intrinsic: &str) -> InterpResult<'_, Self> {
|
||||
match imm {
|
||||
0 => Ok(Self::Eq),
|
||||
1 => Ok(Self::Lt),
|
||||
2 => Ok(Self::Le),
|
||||
3 => Ok(Self::Unord),
|
||||
4 => Ok(Self::Neq),
|
||||
5 => Ok(Self::Nlt),
|
||||
6 => Ok(Self::Nle),
|
||||
7 => Ok(Self::Ord),
|
||||
imm => {
|
||||
throw_unsup_format!("invalid `imm` parameter of {intrinsic}: {imm}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ use rustc_target::spec::abi::Abi;
|
||||
|
||||
use rand::Rng as _;
|
||||
|
||||
use super::FloatCmpOp;
|
||||
use crate::*;
|
||||
use shims::foreign_items::EmulateByNameResult;
|
||||
|
||||
@ -78,7 +79,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
|
||||
unary_op_ss(this, which, op, dest)?;
|
||||
}
|
||||
// Used to implement _mm_{sqrt,rcp,rsqrt}_ss functions.
|
||||
// Used to implement _mm_{sqrt,rcp,rsqrt}_ps functions.
|
||||
// Performs the operations on all components of `op`.
|
||||
"sqrt.ps" | "rcp.ps" | "rsqrt.ps" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
@ -100,22 +101,10 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
let [left, right, imm] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match this.read_scalar(imm)?.to_i8()? {
|
||||
0 => FloatBinOp::Cmp(FloatCmpOp::Eq),
|
||||
1 => FloatBinOp::Cmp(FloatCmpOp::Lt),
|
||||
2 => FloatBinOp::Cmp(FloatCmpOp::Le),
|
||||
3 => FloatBinOp::Cmp(FloatCmpOp::Unord),
|
||||
4 => FloatBinOp::Cmp(FloatCmpOp::Neq),
|
||||
5 => FloatBinOp::Cmp(FloatCmpOp::Nlt),
|
||||
6 => FloatBinOp::Cmp(FloatCmpOp::Nle),
|
||||
7 => FloatBinOp::Cmp(FloatCmpOp::Ord),
|
||||
imm => {
|
||||
throw_unsup_format!(
|
||||
"invalid 3rd parameter of llvm.x86.sse.cmp.ps: {}",
|
||||
imm
|
||||
);
|
||||
}
|
||||
};
|
||||
let which = FloatBinOp::Cmp(FloatCmpOp::from_intrinsic_imm(
|
||||
this.read_scalar(imm)?.to_i8()?,
|
||||
"llvm.x86.sse.cmp.ss",
|
||||
)?);
|
||||
|
||||
bin_op_ss(this, which, left, right, dest)?;
|
||||
}
|
||||
@ -127,26 +116,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
let [left, right, imm] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match this.read_scalar(imm)?.to_i8()? {
|
||||
0 => FloatBinOp::Cmp(FloatCmpOp::Eq),
|
||||
1 => FloatBinOp::Cmp(FloatCmpOp::Lt),
|
||||
2 => FloatBinOp::Cmp(FloatCmpOp::Le),
|
||||
3 => FloatBinOp::Cmp(FloatCmpOp::Unord),
|
||||
4 => FloatBinOp::Cmp(FloatCmpOp::Neq),
|
||||
5 => FloatBinOp::Cmp(FloatCmpOp::Nlt),
|
||||
6 => FloatBinOp::Cmp(FloatCmpOp::Nle),
|
||||
7 => FloatBinOp::Cmp(FloatCmpOp::Ord),
|
||||
imm => {
|
||||
throw_unsup_format!(
|
||||
"invalid 3rd parameter of llvm.x86.sse.cmp.ps: {}",
|
||||
imm
|
||||
);
|
||||
}
|
||||
};
|
||||
let which = FloatBinOp::Cmp(FloatCmpOp::from_intrinsic_imm(
|
||||
this.read_scalar(imm)?.to_i8()?,
|
||||
"llvm.x86.sse.cmp.ps",
|
||||
)?);
|
||||
|
||||
bin_op_ps(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement _mm_{,u}comi{eq,lt,le,gt,ge,neq}_ps functions.
|
||||
// Used to implement _mm_{,u}comi{eq,lt,le,gt,ge,neq}_ss functions.
|
||||
// Compares the first component of `left` and `right` and returns
|
||||
// a scalar value (0 or 1).
|
||||
"comieq.ss" | "comilt.ss" | "comile.ss" | "comigt.ss" | "comige.ss" | "comineq.ss"
|
||||
@ -292,6 +269,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?;
|
||||
let op = op.to_u32()?;
|
||||
|
||||
// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
|
||||
res |= (op >> 31) << i;
|
||||
}
|
||||
|
||||
@ -303,25 +281,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Floating point comparison operation
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/cmpss>
|
||||
/// <https://www.felixcloutier.com/x86/cmpps>
|
||||
#[derive(Copy, Clone)]
|
||||
enum FloatCmpOp {
|
||||
Eq,
|
||||
Lt,
|
||||
Le,
|
||||
Unord,
|
||||
Neq,
|
||||
/// Not less-than
|
||||
Nlt,
|
||||
/// Not less-or-equal
|
||||
Nle,
|
||||
/// Ordered, i.e. neither of them is NaN
|
||||
Ord,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum FloatBinOp {
|
||||
/// Arithmetic operation
|
||||
@ -436,8 +395,8 @@ fn bin_op_ss<'tcx>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Performs `which` operation on each component of `left`, and
|
||||
/// `right` storing the result is stored in `dest`.
|
||||
/// Performs `which` operation on each component of `left` and
|
||||
/// `right`, storing the result is stored in `dest`.
|
||||
fn bin_op_ps<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatBinOp,
|
||||
|
982
src/tools/miri/src/shims/x86/sse2.rs
Normal file
982
src/tools/miri/src/shims/x86/sse2.rs
Normal file
@ -0,0 +1,982 @@
|
||||
use rustc_apfloat::{
|
||||
ieee::{Double, Single},
|
||||
Float as _, FloatConvert as _,
|
||||
};
|
||||
use rustc_middle::ty::layout::LayoutOf as _;
|
||||
use rustc_middle::ty::Ty;
|
||||
use rustc_span::Symbol;
|
||||
use rustc_target::abi::Size;
|
||||
use rustc_target::spec::abi::Abi;
|
||||
|
||||
use super::FloatCmpOp;
|
||||
use crate::*;
|
||||
use shims::foreign_items::EmulateByNameResult;
|
||||
|
||||
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
|
||||
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
fn emulate_x86_sse2_intrinsic(
|
||||
&mut self,
|
||||
link_name: Symbol,
|
||||
abi: Abi,
|
||||
args: &[OpTy<'tcx, Provenance>],
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
|
||||
let this = self.eval_context_mut();
|
||||
// Prefix should have already been checked.
|
||||
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse2.").unwrap();
|
||||
|
||||
// These intrinsics operate on 128-bit (f32x4, f64x2, i8x16, i16x8, i32x4, i64x2) SIMD
|
||||
// vectors unless stated otherwise.
|
||||
// Many intrinsic names are sufixed with "ps" (packed single), "ss" (scalar signle),
|
||||
// "pd" (packed double) or "sd" (scalar double), where single means single precision
|
||||
// floating point (f32) and double means double precision floating point (f64). "ps"
|
||||
// and "pd" means thet the operation is performed on each element of the vector, while
|
||||
// "ss" and "sd" means that the operation is performed only on the first element, copying
|
||||
// the remaining elements from the input vector (for binary operations, from the left-hand
|
||||
// side).
|
||||
// Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
|
||||
// vectors.
|
||||
match unprefixed_name {
|
||||
// Used to implement the _mm_avg_epu8 function.
|
||||
// Averages packed unsigned 8-bit integers in `left` and `right`.
|
||||
"pavg.b" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u8()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// Values are expanded from u8 to u16, so adds cannot overflow.
|
||||
let res = u16::from(left)
|
||||
.checked_add(u16::from(right))
|
||||
.unwrap()
|
||||
.checked_add(1)
|
||||
.unwrap()
|
||||
/ 2;
|
||||
this.write_scalar(Scalar::from_u8(res.try_into().unwrap()), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_avg_epu16 function.
|
||||
// Averages packed unsigned 16-bit integers in `left` and `right`.
|
||||
"pavg.w" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u16()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u16()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// Values are expanded from u16 to u32, so adds cannot overflow.
|
||||
let res = u32::from(left)
|
||||
.checked_add(u32::from(right))
|
||||
.unwrap()
|
||||
.checked_add(1)
|
||||
.unwrap()
|
||||
/ 2;
|
||||
this.write_scalar(Scalar::from_u16(res.try_into().unwrap()), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_mulhi_epi16 function.
|
||||
"pmulh.w" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// Values are expanded from i16 to i32, so multiplication cannot overflow.
|
||||
let res = i32::from(left).checked_mul(i32::from(right)).unwrap() >> 16;
|
||||
this.write_scalar(Scalar::from_int(res, Size::from_bits(16)), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_mulhi_epu16 function.
|
||||
"pmulhu.w" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u16()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u16()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// Values are expanded from u16 to u32, so multiplication cannot overflow.
|
||||
let res = u32::from(left).checked_mul(u32::from(right)).unwrap() >> 16;
|
||||
this.write_scalar(Scalar::from_u16(res.try_into().unwrap()), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_mul_epu32 function.
|
||||
// Multiplies the the low unsigned 32-bit integers from each packed
|
||||
// 64-bit element and stores the result as 64-bit unsigned integers.
|
||||
"pmulu.dq" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// left and right are u32x4, dest is u64x2
|
||||
assert_eq!(left_len, 4);
|
||||
assert_eq!(right_len, 4);
|
||||
assert_eq!(dest_len, 2);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op_i = i.checked_mul(2).unwrap();
|
||||
let left = this.read_scalar(&this.project_index(&left, op_i)?)?.to_u32()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, op_i)?)?.to_u32()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// The multiplication will not overflow because stripping the
|
||||
// operands are expanded from 32-bit to 64-bit.
|
||||
let res = u64::from(left).checked_mul(u64::from(right)).unwrap();
|
||||
this.write_scalar(Scalar::from_u64(res), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_sad_epu8 function.
|
||||
// Computes the absolute differences of packed unsigned 8-bit integers in `a`
|
||||
// and `b`, then horizontally sum each consecutive 8 differences to produce
|
||||
// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
|
||||
// the low 16 bits of 64-bit elements returned.
|
||||
//
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8
|
||||
"psad.bw" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// left and right are u8x16, dest is u64x2
|
||||
assert_eq!(left_len, right_len);
|
||||
assert_eq!(left_len, 16);
|
||||
assert_eq!(dest_len, 2);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let mut res: u16 = 0;
|
||||
let n = left_len.checked_div(dest_len).unwrap();
|
||||
for j in 0..n {
|
||||
let op_i = j.checked_add(i.checked_mul(n).unwrap()).unwrap();
|
||||
let left = this.read_scalar(&this.project_index(&left, op_i)?)?.to_u8()?;
|
||||
let right =
|
||||
this.read_scalar(&this.project_index(&right, op_i)?)?.to_u8()?;
|
||||
|
||||
res = res.checked_add(left.abs_diff(right).into()).unwrap();
|
||||
}
|
||||
|
||||
this.write_scalar(Scalar::from_u64(res.into()), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_{sll,srl,sra}_epi16 functions.
|
||||
// Shifts 16-bit packed integers in left by the amount in right.
|
||||
// Both operands are vectors of 16-bit integers. However, right is
|
||||
// interpreted as a single 64-bit integer (remaining bits are ignored).
|
||||
// For logic shifts, when right is larger than 15, zero is produced.
|
||||
// For arithmetic shifts, when right is larger than 15, the sign bit
|
||||
// is copied to remaining bits.
|
||||
"psll.w" | "psrl.w" | "psra.w" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
enum ShiftOp {
|
||||
Sll,
|
||||
Srl,
|
||||
Sra,
|
||||
}
|
||||
let which = match unprefixed_name {
|
||||
"psll.w" => ShiftOp::Sll,
|
||||
"psrl.w" => ShiftOp::Srl,
|
||||
"psra.w" => ShiftOp::Sra,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// Get the 64-bit shift operand and convert it to the type expected
|
||||
// by checked_{shl,shr} (u32).
|
||||
// It is ok to saturate the value to u32::MAX because any value
|
||||
// above 15 will produce the same result.
|
||||
let shift = extract_first_u64(this, &right)?.try_into().unwrap_or(u32::MAX);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u16()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = match which {
|
||||
ShiftOp::Sll => left.checked_shl(shift).unwrap_or(0),
|
||||
ShiftOp::Srl => left.checked_shr(shift).unwrap_or(0),
|
||||
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
|
||||
ShiftOp::Sra => {
|
||||
// Convert u16 to i16 to use arithmetic shift
|
||||
let left = left as i16;
|
||||
// Copy the sign bit to the remaining bits
|
||||
left.checked_shr(shift).unwrap_or(left >> 15) as u16
|
||||
}
|
||||
};
|
||||
|
||||
this.write_scalar(Scalar::from_u16(res), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_{sll,srl,sra}_epi32 functions.
|
||||
// 32-bit equivalent to the shift functions above.
|
||||
"psll.d" | "psrl.d" | "psra.d" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
enum ShiftOp {
|
||||
Sll,
|
||||
Srl,
|
||||
Sra,
|
||||
}
|
||||
let which = match unprefixed_name {
|
||||
"psll.d" => ShiftOp::Sll,
|
||||
"psrl.d" => ShiftOp::Srl,
|
||||
"psra.d" => ShiftOp::Sra,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// Get the 64-bit shift operand and convert it to the type expected
|
||||
// by checked_{shl,shr} (u32).
|
||||
// It is ok to saturate the value to u32::MAX because any value
|
||||
// above 31 will produce the same result.
|
||||
let shift = extract_first_u64(this, &right)?.try_into().unwrap_or(u32::MAX);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u32()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = match which {
|
||||
ShiftOp::Sll => left.checked_shl(shift).unwrap_or(0),
|
||||
ShiftOp::Srl => left.checked_shr(shift).unwrap_or(0),
|
||||
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
|
||||
ShiftOp::Sra => {
|
||||
// Convert u32 to i32 to use arithmetic shift
|
||||
let left = left as i32;
|
||||
// Copy the sign bit to the remaining bits
|
||||
left.checked_shr(shift).unwrap_or(left >> 31) as u32
|
||||
}
|
||||
};
|
||||
|
||||
this.write_scalar(Scalar::from_u32(res), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_{sll,srl}_epi64 functions.
|
||||
// 64-bit equivalent to the shift functions above, except _mm_sra_epi64,
|
||||
// which is not available in SSE2.
|
||||
"psll.q" | "psrl.q" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
enum ShiftOp {
|
||||
Sll,
|
||||
Srl,
|
||||
}
|
||||
let which = match unprefixed_name {
|
||||
"psll.q" => ShiftOp::Sll,
|
||||
"psrl.q" => ShiftOp::Srl,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// Get the 64-bit shift operand and convert it to the type expected
|
||||
// by checked_{shl,shr} (u32).
|
||||
// It is ok to saturate the value to u32::MAX because any value
|
||||
// above 63 will produce the same result.
|
||||
let shift = this
|
||||
.read_scalar(&this.project_index(&right, 0)?)?
|
||||
.to_u64()?
|
||||
.try_into()
|
||||
.unwrap_or(u32::MAX);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u64()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = match which {
|
||||
ShiftOp::Sll => left.checked_shl(shift).unwrap_or(0),
|
||||
ShiftOp::Srl => left.checked_shr(shift).unwrap_or(0),
|
||||
};
|
||||
|
||||
this.write_scalar(Scalar::from_u64(res), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cvtepi32_ps function.
|
||||
// Converts packed i32 to packed f32.
|
||||
// FIXME: Can we get rid of this intrinsic and just use simd_as?
|
||||
"cvtdq2ps" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, op_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_i32()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = Scalar::from_f32(Single::from_i128(op.into()).value);
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cvtps_epi32 and _mm_cvttps_epi32 functions.
|
||||
// Converts packed f32 to packed i32.
|
||||
"cvtps2dq" | "cvttps2dq" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, op_len);
|
||||
|
||||
let rnd = match unprefixed_name {
|
||||
// "current SSE rounding mode", assume nearest
|
||||
// https://www.felixcloutier.com/x86/cvtps2dq
|
||||
"cvtps2dq" => rustc_apfloat::Round::NearestTiesToEven,
|
||||
// always truncate
|
||||
// https://www.felixcloutier.com/x86/cvttps2dq
|
||||
"cvttps2dq" => rustc_apfloat::Round::TowardZero,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_f32()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res =
|
||||
this.float_to_int_checked(op, dest.layout.ty, rnd).unwrap_or_else(|| {
|
||||
// Fallback to minimum acording to SSE2 semantics.
|
||||
Scalar::from_i32(i32::MIN)
|
||||
});
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_packs_epi16 function.
|
||||
// Converts two 16-bit integer vectors to a single 8-bit integer
|
||||
// vector with signed saturation.
|
||||
"packsswb.128" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// left and right are i16x8, dest is i8x16
|
||||
assert_eq!(left_len, 8);
|
||||
assert_eq!(right_len, 8);
|
||||
assert_eq!(dest_len, 16);
|
||||
|
||||
for i in 0..left_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
|
||||
let left_dest = this.project_index(&dest, i)?;
|
||||
let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
|
||||
|
||||
let left_res =
|
||||
i8::try_from(left).unwrap_or(if left < 0 { i8::MIN } else { i8::MAX });
|
||||
let right_res =
|
||||
i8::try_from(right).unwrap_or(if right < 0 { i8::MIN } else { i8::MAX });
|
||||
|
||||
this.write_scalar(Scalar::from_int(left_res, Size::from_bits(8)), &left_dest)?;
|
||||
this.write_scalar(
|
||||
Scalar::from_int(right_res, Size::from_bits(8)),
|
||||
&right_dest,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_packus_epi16 function.
|
||||
// Converts two 16-bit signed integer vectors to a single 8-bit
|
||||
// unsigned integer vector with saturation.
|
||||
"packuswb.128" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// left and right are i16x8, dest is u8x16
|
||||
assert_eq!(left_len, 8);
|
||||
assert_eq!(right_len, 8);
|
||||
assert_eq!(dest_len, 16);
|
||||
|
||||
for i in 0..left_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
|
||||
let left_dest = this.project_index(&dest, i)?;
|
||||
let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
|
||||
|
||||
let left_res = u8::try_from(left).unwrap_or(if left < 0 { 0 } else { u8::MAX });
|
||||
let right_res =
|
||||
u8::try_from(right).unwrap_or(if right < 0 { 0 } else { u8::MAX });
|
||||
|
||||
this.write_scalar(Scalar::from_u8(left_res), &left_dest)?;
|
||||
this.write_scalar(Scalar::from_u8(right_res), &right_dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_packs_epi32 function.
|
||||
// Converts two 16-bit integer vectors to a single 8-bit integer
|
||||
// vector with signed saturation.
|
||||
"packssdw.128" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// left and right are i32x4, dest is i16x8
|
||||
assert_eq!(left_len, 4);
|
||||
assert_eq!(right_len, 4);
|
||||
assert_eq!(dest_len, 8);
|
||||
|
||||
for i in 0..left_len {
|
||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i32()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i32()?;
|
||||
let left_dest = this.project_index(&dest, i)?;
|
||||
let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
|
||||
|
||||
let left_res =
|
||||
i16::try_from(left).unwrap_or(if left < 0 { i16::MIN } else { i16::MAX });
|
||||
let right_res =
|
||||
i16::try_from(right).unwrap_or(if right < 0 { i16::MIN } else { i16::MAX });
|
||||
|
||||
this.write_scalar(Scalar::from_int(left_res, Size::from_bits(16)), &left_dest)?;
|
||||
this.write_scalar(
|
||||
Scalar::from_int(right_res, Size::from_bits(16)),
|
||||
&right_dest,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// Used to implement _mm_min_sd and _mm_max_sd functions.
|
||||
// Note that the semantics are a bit different from Rust simd_min
|
||||
// and simd_max intrinsics regarding handling of NaN and -0.0: Rust
|
||||
// matches the IEEE min/max operations, while x86 has different
|
||||
// semantics.
|
||||
"min.sd" | "max.sd" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match unprefixed_name {
|
||||
"min.sd" => FloatBinOp::Min,
|
||||
"max.sd" => FloatBinOp::Max,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
bin_op_sd(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement _mm_min_pd and _mm_max_pd functions.
|
||||
// Note that the semantics are a bit different from Rust simd_min
|
||||
// and simd_max intrinsics regarding handling of NaN and -0.0: Rust
|
||||
// matches the IEEE min/max operations, while x86 has different
|
||||
// semantics.
|
||||
"min.pd" | "max.pd" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match unprefixed_name {
|
||||
"min.pd" => FloatBinOp::Min,
|
||||
"max.pd" => FloatBinOp::Max,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
bin_op_pd(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement _mm_sqrt_sd functions.
|
||||
// Performs the operations on the first component of `op` and
|
||||
// copies the remaining components from `op`.
|
||||
"sqrt.sd" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, op_len);
|
||||
|
||||
let op0 = this.read_scalar(&this.project_index(&op, 0)?)?.to_u64()?;
|
||||
// FIXME using host floats
|
||||
let res0 = Scalar::from_u64(f64::from_bits(op0).sqrt().to_bits());
|
||||
this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
|
||||
|
||||
for i in 1..dest_len {
|
||||
this.copy_op(
|
||||
&this.project_index(&op, i)?,
|
||||
&this.project_index(&dest, i)?,
|
||||
/*allow_transmute*/ false,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// Used to implement _mm_sqrt_pd functions.
|
||||
// Performs the operations on all components of `op`.
|
||||
"sqrt.pd" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, op_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u64()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// FIXME using host floats
|
||||
let res = Scalar::from_u64(f64::from_bits(op).sqrt().to_bits());
|
||||
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cmp*_sd function.
|
||||
// Performs a comparison operation on the first component of `left`
|
||||
// and `right`, returning 0 if false or `u64::MAX` if true. The remaining
|
||||
// components are copied from `left`.
|
||||
"cmp.sd" => {
|
||||
let [left, right, imm] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = FloatBinOp::Cmp(FloatCmpOp::from_intrinsic_imm(
|
||||
this.read_scalar(imm)?.to_i8()?,
|
||||
"llvm.x86.sse2.cmp.sd",
|
||||
)?);
|
||||
|
||||
bin_op_sd(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement the _mm_cmp*_pd functions.
|
||||
// Performs a comparison operation on each component of `left`
|
||||
// and `right`. For each component, returns 0 if false or `u64::MAX`
|
||||
// if true.
|
||||
"cmp.pd" => {
|
||||
let [left, right, imm] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = FloatBinOp::Cmp(FloatCmpOp::from_intrinsic_imm(
|
||||
this.read_scalar(imm)?.to_i8()?,
|
||||
"llvm.x86.sse2.cmp.pd",
|
||||
)?);
|
||||
|
||||
bin_op_pd(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement _mm_{,u}comi{eq,lt,le,gt,ge,neq}_sd functions.
|
||||
// Compares the first component of `left` and `right` and returns
|
||||
// a scalar value (0 or 1).
|
||||
"comieq.sd" | "comilt.sd" | "comile.sd" | "comigt.sd" | "comige.sd" | "comineq.sd"
|
||||
| "ucomieq.sd" | "ucomilt.sd" | "ucomile.sd" | "ucomigt.sd" | "ucomige.sd"
|
||||
| "ucomineq.sd" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
|
||||
assert_eq!(left_len, right_len);
|
||||
|
||||
let left = this.read_scalar(&this.project_index(&left, 0)?)?.to_f64()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, 0)?)?.to_f64()?;
|
||||
// The difference between the com* and *ucom variants is signaling
|
||||
// of exceptions when either argument is a quiet NaN. We do not
|
||||
// support accessing the SSE status register from miri (or from Rust,
|
||||
// for that matter), so we treat equally both variants.
|
||||
let res = match unprefixed_name {
|
||||
"comieq.sd" | "ucomieq.sd" => left == right,
|
||||
"comilt.sd" | "ucomilt.sd" => left < right,
|
||||
"comile.sd" | "ucomile.sd" => left <= right,
|
||||
"comigt.sd" | "ucomigt.sd" => left > right,
|
||||
"comige.sd" | "ucomige.sd" => left >= right,
|
||||
"comineq.sd" | "ucomineq.sd" => left != right,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?;
|
||||
}
|
||||
// Used to implement the _mm_cvtpd_ps function.
|
||||
// Converts packed f32 to packed f64.
|
||||
"cvtpd2ps" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// op is f64x2, dest is f32x4
|
||||
assert_eq!(op_len, 2);
|
||||
assert_eq!(dest_len, 4);
|
||||
|
||||
for i in 0..op_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_f64()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = op.convert(/*loses_info*/ &mut false).value;
|
||||
this.write_scalar(Scalar::from_f32(res), &dest)?;
|
||||
}
|
||||
// Fill the remaining with zeros
|
||||
for i in op_len..dest_len {
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
this.write_scalar(Scalar::from_u32(0), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cvtps_pd function.
|
||||
// Converts packed f64 to packed f32.
|
||||
"cvtps2pd" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// op is f32x4, dest is f64x2
|
||||
assert_eq!(op_len, 4);
|
||||
assert_eq!(dest_len, 2);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_f32()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = op.convert(/*loses_info*/ &mut false).value;
|
||||
this.write_scalar(Scalar::from_f64(res), &dest)?;
|
||||
}
|
||||
// the two remaining f32 are ignored
|
||||
}
|
||||
// Used to implement the _mm_cvtpd_epi32 and _mm_cvttpd_epi32 functions.
|
||||
// Converts packed f64 to packed i32.
|
||||
"cvtpd2dq" | "cvttpd2dq" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// op is f64x2, dest is i32x4
|
||||
assert_eq!(op_len, 2);
|
||||
assert_eq!(dest_len, 4);
|
||||
|
||||
let rnd = match unprefixed_name {
|
||||
// "current SSE rounding mode", assume nearest
|
||||
// https://www.felixcloutier.com/x86/cvtpd2dq
|
||||
"cvtpd2dq" => rustc_apfloat::Round::NearestTiesToEven,
|
||||
// always truncate
|
||||
// https://www.felixcloutier.com/x86/cvttpd2dq
|
||||
"cvttpd2dq" => rustc_apfloat::Round::TowardZero,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
for i in 0..op_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_f64()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res =
|
||||
this.float_to_int_checked(op, dest.layout.ty, rnd).unwrap_or_else(|| {
|
||||
// Fallback to minimum acording to SSE2 semantics.
|
||||
Scalar::from_i32(i32::MIN)
|
||||
});
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
// Fill the remaining with zeros
|
||||
for i in op_len..dest_len {
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
this.write_scalar(Scalar::from_i32(0), &dest)?;
|
||||
}
|
||||
}
|
||||
// Use to implement the _mm_cvtsd_si32 and _mm_cvttsd_si32 functions.
|
||||
// Converts the first component of `op` from f64 to i32.
|
||||
"cvtsd2si" | "cvttsd2si" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, _) = this.operand_to_simd(op)?;
|
||||
|
||||
let op = this.read_scalar(&this.project_index(&op, 0)?)?.to_f64()?;
|
||||
|
||||
let rnd = match unprefixed_name {
|
||||
// "current SSE rounding mode", assume nearest
|
||||
// https://www.felixcloutier.com/x86/cvtsd2si
|
||||
"cvtsd2si" => rustc_apfloat::Round::NearestTiesToEven,
|
||||
// always truncate
|
||||
// https://www.felixcloutier.com/x86/cvttsd2si
|
||||
"cvttsd2si" => rustc_apfloat::Round::TowardZero,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let res = this.float_to_int_checked(op, dest.layout.ty, rnd).unwrap_or_else(|| {
|
||||
// Fallback to minimum acording to SSE semantics.
|
||||
Scalar::from_i32(i32::MIN)
|
||||
});
|
||||
|
||||
this.write_scalar(res, dest)?;
|
||||
}
|
||||
// Use to implement the _mm_cvtsd_si64 and _mm_cvttsd_si64 functions.
|
||||
// Converts the first component of `op` from f64 to i64.
|
||||
"cvtsd2si64" | "cvttsd2si64" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, _) = this.operand_to_simd(op)?;
|
||||
|
||||
let op = this.read_scalar(&this.project_index(&op, 0)?)?.to_f64()?;
|
||||
|
||||
let rnd = match unprefixed_name {
|
||||
// "current SSE rounding mode", assume nearest
|
||||
// https://www.felixcloutier.com/x86/cvtsd2si
|
||||
"cvtsd2si64" => rustc_apfloat::Round::NearestTiesToEven,
|
||||
// always truncate
|
||||
// https://www.felixcloutier.com/x86/cvttsd2si
|
||||
"cvttsd2si64" => rustc_apfloat::Round::TowardZero,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let res = this.float_to_int_checked(op, dest.layout.ty, rnd).unwrap_or_else(|| {
|
||||
// Fallback to minimum acording to SSE semantics.
|
||||
Scalar::from_i64(i64::MIN)
|
||||
});
|
||||
|
||||
this.write_scalar(res, dest)?;
|
||||
}
|
||||
// Used to implement the _mm_cvtsd_ss and _mm_cvtss_sd functions.
|
||||
// Converts the first f64/f32 from `right` to f32/f64 and copies
|
||||
// the remaining elements from `left`
|
||||
"cvtsd2ss" | "cvtss2sd" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, _) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
|
||||
// Convert first element of `right`
|
||||
let right0 = this.read_immediate(&this.project_index(&right, 0)?)?;
|
||||
let dest0 = this.project_index(&dest, 0)?;
|
||||
// `float_to_float_or_int` here will convert from f64 to f32 (cvtsd2ss) or
|
||||
// from f32 to f64 (cvtss2sd).
|
||||
let res0 = this.float_to_float_or_int(&right0, dest0.layout.ty)?;
|
||||
this.write_immediate(res0, &dest0)?;
|
||||
|
||||
// Copy remianing from `left`
|
||||
for i in 1..dest_len {
|
||||
this.copy_op(
|
||||
&this.project_index(&left, i)?,
|
||||
&this.project_index(&dest, i)?,
|
||||
/*allow_transmute*/ false,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_movemask_pd function.
|
||||
// Returns a scalar integer where the i-th bit is the highest
|
||||
// bit of the i-th component of `op`.
|
||||
// https://www.felixcloutier.com/x86/movmskpd
|
||||
"movmsk.pd" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
|
||||
let mut res = 0;
|
||||
for i in 0..op_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?;
|
||||
let op = op.to_u64()?;
|
||||
|
||||
// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
|
||||
res |= (op >> 63) << i;
|
||||
}
|
||||
|
||||
this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
|
||||
}
|
||||
_ => return Ok(EmulateByNameResult::NotSupported),
|
||||
}
|
||||
Ok(EmulateByNameResult::NeedsJumping)
|
||||
}
|
||||
}
|
||||
|
||||
/// Takes a 128-bit vector, transmutes it to `[u64; 2]` and extracts
|
||||
/// the first value.
|
||||
fn extract_first_u64<'tcx>(
|
||||
this: &crate::MiriInterpCx<'_, 'tcx>,
|
||||
op: &MPlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, u64> {
|
||||
// Transmute vector to `[u64; 2]`
|
||||
let u64_array_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u64, 2))?;
|
||||
let op = op.transmute(u64_array_layout, this)?;
|
||||
|
||||
// Get the first u64 from the array
|
||||
this.read_scalar(&this.project_index(&op, 0)?)?.to_u64()
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum FloatBinOp {
|
||||
/// Comparison
|
||||
Cmp(FloatCmpOp),
|
||||
/// Minimum value (with SSE semantics)
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/minsd>
|
||||
/// <https://www.felixcloutier.com/x86/minpd>
|
||||
Min,
|
||||
/// Maximum value (with SSE semantics)
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/maxsd>
|
||||
/// <https://www.felixcloutier.com/x86/maxpd>
|
||||
Max,
|
||||
}
|
||||
|
||||
/// Performs `which` scalar operation on `left` and `right` and returns
|
||||
/// the result.
|
||||
// FIXME make this generic over apfloat type to reduce code duplicaton with bin_op_f32
|
||||
fn bin_op_f64<'tcx>(
|
||||
which: FloatBinOp,
|
||||
left: &ImmTy<'tcx, Provenance>,
|
||||
right: &ImmTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, Scalar<Provenance>> {
|
||||
match which {
|
||||
FloatBinOp::Cmp(which) => {
|
||||
let left = left.to_scalar().to_f64()?;
|
||||
let right = right.to_scalar().to_f64()?;
|
||||
// FIXME: Make sure that these operations match the semantics of cmppd
|
||||
let res = match which {
|
||||
FloatCmpOp::Eq => left == right,
|
||||
FloatCmpOp::Lt => left < right,
|
||||
FloatCmpOp::Le => left <= right,
|
||||
FloatCmpOp::Unord => left.is_nan() || right.is_nan(),
|
||||
FloatCmpOp::Neq => left != right,
|
||||
FloatCmpOp::Nlt => !(left < right),
|
||||
FloatCmpOp::Nle => !(left <= right),
|
||||
FloatCmpOp::Ord => !left.is_nan() && !right.is_nan(),
|
||||
};
|
||||
Ok(Scalar::from_u64(if res { u64::MAX } else { 0 }))
|
||||
}
|
||||
FloatBinOp::Min => {
|
||||
let left = left.to_scalar().to_f64()?;
|
||||
let right = right.to_scalar().to_f64()?;
|
||||
// SSE semantics to handle zero and NaN. Note that `x == Single::ZERO`
|
||||
// is true when `x` is either +0 or -0.
|
||||
if (left == Double::ZERO && right == Double::ZERO)
|
||||
|| left.is_nan()
|
||||
|| right.is_nan()
|
||||
|| left >= right
|
||||
{
|
||||
Ok(Scalar::from_f64(right))
|
||||
} else {
|
||||
Ok(Scalar::from_f64(left))
|
||||
}
|
||||
}
|
||||
FloatBinOp::Max => {
|
||||
let left = left.to_scalar().to_f64()?;
|
||||
let right = right.to_scalar().to_f64()?;
|
||||
// SSE semantics to handle zero and NaN. Note that `x == Single::ZERO`
|
||||
// is true when `x` is either +0 or -0.
|
||||
if (left == Double::ZERO && right == Double::ZERO)
|
||||
|| left.is_nan()
|
||||
|| right.is_nan()
|
||||
|| left <= right
|
||||
{
|
||||
Ok(Scalar::from_f64(right))
|
||||
} else {
|
||||
Ok(Scalar::from_f64(left))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Performs `which` operation on the first component of `left` and `right`
|
||||
/// and copies the other components from `left`. The result is stored in `dest`.
|
||||
fn bin_op_sd<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatBinOp,
|
||||
left: &OpTy<'tcx, Provenance>,
|
||||
right: &OpTy<'tcx, Provenance>,
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, ()> {
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
let res0 = bin_op_f64(
|
||||
which,
|
||||
&this.read_immediate(&this.project_index(&left, 0)?)?,
|
||||
&this.read_immediate(&this.project_index(&right, 0)?)?,
|
||||
)?;
|
||||
this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
|
||||
|
||||
for i in 1..dest_len {
|
||||
this.copy_op(
|
||||
&this.project_index(&left, i)?,
|
||||
&this.project_index(&dest, i)?,
|
||||
/*allow_transmute*/ false,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Performs `which` operation on each component of `left` and
|
||||
/// `right`, storing the result is stored in `dest`.
|
||||
fn bin_op_pd<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatBinOp,
|
||||
left: &OpTy<'tcx, Provenance>,
|
||||
right: &OpTy<'tcx, Provenance>,
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, ()> {
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let right = this.read_immediate(&this.project_index(&right, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = bin_op_f64(which, &left, &right)?;
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,5 +1,15 @@
|
||||
//@only-target-x86_64
|
||||
// Ignore everything except x86 and x86_64
|
||||
// Any additional target are added to CI should be ignored here
|
||||
//@ignore-target-aarch64
|
||||
//@ignore-target-arm
|
||||
//@ignore-target-avr
|
||||
//@ignore-target-s390x
|
||||
//@ignore-target-thumbv7em
|
||||
//@ignore-target-wasm32
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
use std::f32::NAN;
|
||||
use std::mem::transmute;
|
||||
@ -987,6 +997,8 @@ unsafe fn test_sse() {
|
||||
}
|
||||
test_mm_cvtsi32_ss();
|
||||
|
||||
// Intrinsic only available on x86_64
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvtss_si64() {
|
||||
let inputs = &[
|
||||
@ -1007,8 +1019,11 @@ unsafe fn test_sse() {
|
||||
assert_eq!(e, r, "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", i, x, r, e);
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
test_mm_cvtss_si64();
|
||||
|
||||
// Intrinsic only available on x86_64
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvttss_si64() {
|
||||
let inputs = &[
|
||||
@ -1032,8 +1047,11 @@ unsafe fn test_sse() {
|
||||
assert_eq!(e, r, "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", i, x, r, e);
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
test_mm_cvttss_si64();
|
||||
|
||||
// Intrinsic only available on x86_64
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvtsi64_ss() {
|
||||
let inputs = &[
|
||||
@ -1053,6 +1071,7 @@ unsafe fn test_sse() {
|
||||
assert_eq_m128(e, r);
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
test_mm_cvtsi64_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
|
828
src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
Normal file
828
src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
Normal file
@ -0,0 +1,828 @@
|
||||
// Ignore everything except x86 and x86_64
|
||||
// Any additional target are added to CI should be ignored here
|
||||
//@ignore-target-aarch64
|
||||
//@ignore-target-arm
|
||||
//@ignore-target-avr
|
||||
//@ignore-target-s390x
|
||||
//@ignore-target-thumbv7em
|
||||
//@ignore-target-wasm32
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
use std::f64::NAN;
|
||||
use std::mem::transmute;
|
||||
|
||||
fn main() {
|
||||
assert!(is_x86_feature_detected!("sse2"));
|
||||
|
||||
unsafe {
|
||||
test_sse2();
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
|
||||
_mm_set_epi64x(b, a)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_sse2() {
|
||||
// Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse2.rs
|
||||
|
||||
unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
|
||||
_mm_set_epi64x(b, a)
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
|
||||
let r = _mm_cmpeq_ps(a, b);
|
||||
if _mm_movemask_ps(r) != 0b1111 {
|
||||
panic!("{:?} != {:?}", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
|
||||
assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
|
||||
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
|
||||
panic!("{:?} != {:?}", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_avg_epu8() {
|
||||
let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
|
||||
let r = _mm_avg_epu8(a, b);
|
||||
assert_eq_m128i(r, _mm_set1_epi8(6));
|
||||
}
|
||||
test_mm_avg_epu8();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_avg_epu16() {
|
||||
let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
|
||||
let r = _mm_avg_epu16(a, b);
|
||||
assert_eq_m128i(r, _mm_set1_epi16(6));
|
||||
}
|
||||
test_mm_avg_epu16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_mulhi_epi16() {
|
||||
let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
|
||||
let r = _mm_mulhi_epi16(a, b);
|
||||
assert_eq_m128i(r, _mm_set1_epi16(-16));
|
||||
}
|
||||
test_mm_mulhi_epi16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_mulhi_epu16() {
|
||||
let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
|
||||
let r = _mm_mulhi_epu16(a, b);
|
||||
assert_eq_m128i(r, _mm_set1_epi16(15));
|
||||
}
|
||||
test_mm_mulhi_epu16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_mul_epu32() {
|
||||
let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
|
||||
let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
|
||||
let r = _mm_mul_epu32(a, b);
|
||||
let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_mul_epu32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sad_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm_setr_epi8(
|
||||
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
|
||||
1, 2, 3, 4,
|
||||
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
|
||||
1, 2, 3, 4,
|
||||
);
|
||||
let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
|
||||
let r = _mm_sad_epu8(a, b);
|
||||
let e = _mm_setr_epi64x(1020, 614);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_sad_epu8();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sll_epi16() {
|
||||
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
|
||||
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(
|
||||
r,
|
||||
_mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
|
||||
);
|
||||
let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
|
||||
assert_eq_m128i(r, _mm_set1_epi16(0));
|
||||
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_set1_epi16(0));
|
||||
}
|
||||
test_mm_sll_epi16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_srl_epi16() {
|
||||
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
|
||||
let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(r, _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0));
|
||||
let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
|
||||
assert_eq_m128i(r, _mm_set1_epi16(0));
|
||||
let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_set1_epi16(0));
|
||||
}
|
||||
test_mm_srl_epi16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sra_epi16() {
|
||||
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
|
||||
let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(r, _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10));
|
||||
let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
|
||||
assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
|
||||
let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
|
||||
}
|
||||
test_mm_sra_epi16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sll_epi32() {
|
||||
let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
|
||||
let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
|
||||
let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
|
||||
assert_eq_m128i(r, _mm_set1_epi32(0));
|
||||
let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_set1_epi32(0));
|
||||
}
|
||||
test_mm_sll_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_srl_epi32() {
|
||||
let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
|
||||
let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
|
||||
let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
|
||||
assert_eq_m128i(r, _mm_set1_epi32(0));
|
||||
let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_set1_epi32(0));
|
||||
}
|
||||
test_mm_srl_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sra_epi32() {
|
||||
let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
|
||||
let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
|
||||
let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
|
||||
let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
|
||||
}
|
||||
test_mm_sra_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sll_epi64() {
|
||||
let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
|
||||
let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
|
||||
let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
|
||||
assert_eq_m128i(r, _mm_set1_epi64x(0));
|
||||
let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_set1_epi64x(0));
|
||||
}
|
||||
test_mm_sll_epi64();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_srl_epi64() {
|
||||
let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
|
||||
let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
|
||||
let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
|
||||
assert_eq_m128i(r, _mm_set1_epi64x(0));
|
||||
let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
|
||||
assert_eq_m128i(r, _mm_set1_epi64x(0));
|
||||
}
|
||||
test_mm_srl_epi64();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtepi32_ps() {
|
||||
let a = _mm_setr_epi32(1, 2, 3, 4);
|
||||
let r = _mm_cvtepi32_ps(a);
|
||||
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
|
||||
}
|
||||
test_mm_cvtepi32_ps();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtps_epi32() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_cvtps_epi32(a);
|
||||
assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
|
||||
}
|
||||
test_mm_cvtps_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvttps_epi32() {
|
||||
let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
|
||||
let r = _mm_cvttps_epi32(a);
|
||||
assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
|
||||
|
||||
let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
|
||||
let r = _mm_cvttps_epi32(a);
|
||||
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
|
||||
}
|
||||
test_mm_cvttps_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_packs_epi16() {
|
||||
let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
|
||||
let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
|
||||
let r = _mm_packs_epi16(a, b);
|
||||
assert_eq_m128i(
|
||||
r,
|
||||
_mm_setr_epi8(0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F),
|
||||
);
|
||||
}
|
||||
test_mm_packs_epi16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_packus_epi16() {
|
||||
let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
|
||||
let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
|
||||
let r = _mm_packus_epi16(a, b);
|
||||
assert_eq_m128i(r, _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0));
|
||||
}
|
||||
test_mm_packus_epi16();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_packs_epi32() {
|
||||
let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
|
||||
let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
|
||||
let r = _mm_packs_epi32(a, b);
|
||||
assert_eq_m128i(r, _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF));
|
||||
}
|
||||
test_mm_packs_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_min_sd() {
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
let b = _mm_setr_pd(5.0, 10.0);
|
||||
let r = _mm_min_sd(a, b);
|
||||
assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
|
||||
}
|
||||
test_mm_min_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_min_pd() {
|
||||
let a = _mm_setr_pd(-1.0, 5.0);
|
||||
let b = _mm_setr_pd(-100.0, 20.0);
|
||||
let r = _mm_min_pd(a, b);
|
||||
assert_eq_m128d(r, _mm_setr_pd(-100.0, 5.0));
|
||||
|
||||
// `_mm_min_pd` can **not** be implemented using the `simd_min` rust intrinsic because
|
||||
// the semantics of `simd_min` are different to those of `_mm_min_pd` regarding handling
|
||||
// of `-0.0`.
|
||||
let a = _mm_setr_pd(-0.0, 0.0);
|
||||
let b = _mm_setr_pd(0.0, 0.0);
|
||||
let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
|
||||
let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
|
||||
let a: [u8; 16] = transmute(a);
|
||||
let b: [u8; 16] = transmute(b);
|
||||
assert_eq!(r1, b);
|
||||
assert_eq!(r2, a);
|
||||
assert_ne!(a, b); // sanity check that -0.0 is actually present
|
||||
}
|
||||
test_mm_min_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_max_sd() {
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
let b = _mm_setr_pd(5.0, 10.0);
|
||||
let r = _mm_max_sd(a, b);
|
||||
assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
|
||||
}
|
||||
test_mm_max_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_max_pd() {
|
||||
let a = _mm_setr_pd(-1.0, 5.0);
|
||||
let b = _mm_setr_pd(-100.0, 20.0);
|
||||
let r = _mm_max_pd(a, b);
|
||||
assert_eq_m128d(r, _mm_setr_pd(-1.0, 20.0));
|
||||
|
||||
// `_mm_max_pd` can **not** be implemented using the `simd_max` rust intrinsic because
|
||||
// the semantics of `simd_max` are different to those of `_mm_max_pd` regarding handling
|
||||
// of `-0.0`.
|
||||
let a = _mm_setr_pd(-0.0, 0.0);
|
||||
let b = _mm_setr_pd(0.0, 0.0);
|
||||
let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
|
||||
let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
|
||||
let a: [u8; 16] = transmute(a);
|
||||
let b: [u8; 16] = transmute(b);
|
||||
assert_eq!(r1, b);
|
||||
assert_eq!(r2, a);
|
||||
assert_ne!(a, b); // sanity check that -0.0 is actually present
|
||||
}
|
||||
test_mm_max_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sqrt_sd() {
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
let b = _mm_setr_pd(5.0, 10.0);
|
||||
let r = _mm_sqrt_sd(a, b);
|
||||
assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
|
||||
}
|
||||
test_mm_sqrt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sqrt_pd() {
|
||||
let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
|
||||
assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
|
||||
}
|
||||
test_mm_sqrt_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpeq_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpeq_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmplt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmplt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmple_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmple_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpgt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpgt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpge_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpge_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpord_sd() {
|
||||
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpord_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpunord_sd() {
|
||||
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpunord_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpneq_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpneq_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpnlt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpnlt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpnle_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpnle_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpngt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpngt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpnge_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, transmute(2.0f64));
|
||||
let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpnge_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpeq_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, 0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpeq_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmplt_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, !0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmplt_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmple_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, !0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmple_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpgt_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, 0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpgt_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpge_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, 0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpge_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpord_pd() {
|
||||
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, !0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpord_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpunord_pd() {
|
||||
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, 0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpunord_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpneq_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(!0, !0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpneq_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpnlt_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, 0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpnlt_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpnle_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, 0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpnle_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpngt_pd() {
|
||||
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, !0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpngt_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cmpnge_pd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
let e = _mm_setr_epi64x(0, !0);
|
||||
let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
test_mm_cmpnge_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_comieq_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_comieq_sd(a, b) != 0);
|
||||
|
||||
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_comieq_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_comieq_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_comilt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_comilt_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_comilt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_comile_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_comile_sd(a, b) != 0);
|
||||
}
|
||||
test_mm_comile_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_comigt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_comigt_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_comigt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_comige_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_comige_sd(a, b) != 0);
|
||||
}
|
||||
test_mm_comige_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_comineq_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_comineq_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_comineq_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_ucomieq_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_ucomieq_sd(a, b) != 0);
|
||||
|
||||
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
|
||||
assert!(_mm_ucomieq_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_ucomieq_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_ucomilt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_ucomilt_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_ucomilt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_ucomile_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_ucomile_sd(a, b) != 0);
|
||||
}
|
||||
test_mm_ucomile_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_ucomigt_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_ucomigt_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_ucomigt_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_ucomige_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_ucomige_sd(a, b) != 0);
|
||||
}
|
||||
test_mm_ucomige_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_ucomineq_sd() {
|
||||
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
|
||||
assert!(_mm_ucomineq_sd(a, b) == 0);
|
||||
}
|
||||
test_mm_ucomineq_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtpd_ps() {
|
||||
let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
|
||||
assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
|
||||
|
||||
let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
|
||||
assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
|
||||
|
||||
let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
|
||||
assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
|
||||
|
||||
let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
|
||||
assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
|
||||
}
|
||||
test_mm_cvtpd_ps();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtps_pd() {
|
||||
let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
|
||||
assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
|
||||
|
||||
let r = _mm_cvtps_pd(_mm_setr_ps(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN));
|
||||
assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
|
||||
}
|
||||
test_mm_cvtps_pd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtpd_epi32() {
|
||||
let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
|
||||
|
||||
let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
|
||||
|
||||
let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
|
||||
|
||||
let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
|
||||
|
||||
let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
|
||||
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
|
||||
}
|
||||
test_mm_cvtpd_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvttpd_epi32() {
|
||||
let a = _mm_setr_pd(-1.1, 2.2);
|
||||
let r = _mm_cvttpd_epi32(a);
|
||||
assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
|
||||
|
||||
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
|
||||
let r = _mm_cvttpd_epi32(a);
|
||||
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
|
||||
}
|
||||
test_mm_cvttpd_epi32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtsd_si32() {
|
||||
let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
|
||||
assert_eq!(r, -2);
|
||||
|
||||
let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
|
||||
assert_eq!(r, i32::MIN);
|
||||
|
||||
let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
|
||||
assert_eq!(r, i32::MIN);
|
||||
}
|
||||
test_mm_cvtsd_si32();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvttsd_si32() {
|
||||
let a = _mm_setr_pd(-1.1, 2.2);
|
||||
let r = _mm_cvttsd_si32(a);
|
||||
assert_eq!(r, -1);
|
||||
|
||||
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
|
||||
let r = _mm_cvttsd_si32(a);
|
||||
assert_eq!(r, i32::MIN);
|
||||
}
|
||||
test_mm_cvttsd_si32();
|
||||
|
||||
// Intrinsic only available on x86_64
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtsd_si64() {
|
||||
let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0));
|
||||
assert_eq!(r, -2_i64);
|
||||
|
||||
let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN));
|
||||
assert_eq!(r, i64::MIN);
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
test_mm_cvtsd_si64();
|
||||
|
||||
// Intrinsic only available on x86_64
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvttsd_si64() {
|
||||
let a = _mm_setr_pd(-1.1, 2.2);
|
||||
let r = _mm_cvttsd_si64(a);
|
||||
assert_eq!(r, -1_i64);
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
test_mm_cvttsd_si64();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtsd_ss() {
|
||||
let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
|
||||
let b = _mm_setr_pd(2.0, -5.0);
|
||||
|
||||
let r = _mm_cvtsd_ss(a, b);
|
||||
|
||||
assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
|
||||
|
||||
let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
|
||||
let b = _mm_setr_pd(f64::INFINITY, -5.0);
|
||||
|
||||
let r = _mm_cvtsd_ss(a, b);
|
||||
|
||||
assert_eq_m128(
|
||||
r,
|
||||
_mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY),
|
||||
);
|
||||
}
|
||||
test_mm_cvtsd_ss();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtss_sd() {
|
||||
let a = _mm_setr_pd(-1.1, 2.2);
|
||||
let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
|
||||
let r = _mm_cvtss_sd(a, b);
|
||||
assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
|
||||
|
||||
let a = _mm_setr_pd(-1.1, f64::INFINITY);
|
||||
let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
|
||||
|
||||
let r = _mm_cvtss_sd(a, b);
|
||||
assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
|
||||
}
|
||||
test_mm_cvtss_sd();
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_movemask_pd() {
|
||||
let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
|
||||
assert_eq!(r, 0b01);
|
||||
|
||||
let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
|
||||
assert_eq!(r, 0b11);
|
||||
}
|
||||
test_mm_movemask_pd();
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user