Merge pull request #1495 from folkertdev/add-llvm-sse2-cvtps2dq
add `llvm.x86.sse2.cvtps2dq`
This commit is contained in:
commit
c511676a62
@ -251,6 +251,9 @@ unsafe fn test_simd() {
|
|||||||
test_mm_add_epi8();
|
test_mm_add_epi8();
|
||||||
test_mm_add_pd();
|
test_mm_add_pd();
|
||||||
test_mm_cvtepi8_epi16();
|
test_mm_cvtepi8_epi16();
|
||||||
|
#[cfg(not(jit))]
|
||||||
|
test_mm_cvtps_epi32();
|
||||||
|
test_mm_cvttps_epi32();
|
||||||
test_mm_cvtsi128_si64();
|
test_mm_cvtsi128_si64();
|
||||||
|
|
||||||
test_mm_extract_epi8();
|
test_mm_extract_epi8();
|
||||||
@ -476,6 +479,41 @@ unsafe fn test_mm256_permutevar8x32_epi32() {
|
|||||||
assert_eq_m256i(r, e);
|
assert_eq_m256i(r, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
#[cfg(not(jit))]
|
||||||
|
unsafe fn test_mm_cvtps_epi32() {
|
||||||
|
let floats: [f32; 4] = [1.5, -2.5, i32::MAX as f32 + 1.0, f32::NAN];
|
||||||
|
|
||||||
|
let float_vec = _mm_loadu_ps(floats.as_ptr());
|
||||||
|
let int_vec = _mm_cvtps_epi32(float_vec);
|
||||||
|
|
||||||
|
let mut ints: [i32; 4] = [0; 4];
|
||||||
|
_mm_storeu_si128(ints.as_mut_ptr() as *mut __m128i, int_vec);
|
||||||
|
|
||||||
|
// this is very different from `floats.map(|f| f as i32)`!
|
||||||
|
let expected_ints: [i32; 4] = [2, -2, i32::MIN, i32::MIN];
|
||||||
|
|
||||||
|
assert_eq!(ints, expected_ints);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
unsafe fn test_mm_cvttps_epi32() {
|
||||||
|
let floats: [f32; 4] = [1.5, -2.5, i32::MAX as f32 + 1.0, f32::NAN];
|
||||||
|
|
||||||
|
let float_vec = _mm_loadu_ps(floats.as_ptr());
|
||||||
|
let int_vec = _mm_cvttps_epi32(float_vec);
|
||||||
|
|
||||||
|
let mut ints: [i32; 4] = [0; 4];
|
||||||
|
_mm_storeu_si128(ints.as_mut_ptr() as *mut __m128i, int_vec);
|
||||||
|
|
||||||
|
// this is very different from `floats.map(|f| f as i32)`!
|
||||||
|
let expected_ints: [i32; 4] = [1, -2, i32::MIN, i32::MIN];
|
||||||
|
|
||||||
|
assert_eq!(ints, expected_ints);
|
||||||
|
}
|
||||||
|
|
||||||
fn test_checked_mul() {
|
fn test_checked_mul() {
|
||||||
let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
|
let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
|
||||||
assert_eq!(u, None);
|
assert_eq!(u, None);
|
||||||
|
@ -459,11 +459,20 @@ fn select4(
|
|||||||
intrinsic_args!(fx, args => (a); intrinsic);
|
intrinsic_args!(fx, args => (a); intrinsic);
|
||||||
let a = a.load_scalar(fx);
|
let a = a.load_scalar(fx);
|
||||||
|
|
||||||
|
let value = fx.bcx.ins().x86_cvtt2dq(types::I32X4, a);
|
||||||
|
let cvalue = CValue::by_val(value, ret.layout());
|
||||||
|
ret.write_cvalue(fx, cvalue);
|
||||||
|
}
|
||||||
|
"llvm.x86.sse2.cvtps2dq" => {
|
||||||
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32
|
||||||
|
intrinsic_args!(fx, args => (a); intrinsic);
|
||||||
|
let a = a.load_scalar(fx);
|
||||||
|
|
||||||
// Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned
|
// Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned
|
||||||
// into 0x80000000 for which Cranelift doesn't have a native instruction.
|
// into 0x80000000 for which Cranelift doesn't have a native instruction.
|
||||||
codegen_inline_asm_inner(
|
codegen_inline_asm_inner(
|
||||||
fx,
|
fx,
|
||||||
&[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))],
|
&[InlineAsmTemplatePiece::String(format!("cvtps2dq xmm0, xmm0"))],
|
||||||
&[CInlineAsmOperand::InOut {
|
&[CInlineAsmOperand::InOut {
|
||||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
||||||
_late: true,
|
_late: true,
|
||||||
|
Loading…
Reference in New Issue
Block a user