Merge pull request #1490 from folkertdev/add-llvm-x86-crc32
add all `llvm.x86.sse42.crc32.*.*` intrinsics
This commit is contained in:
commit
2df34f9091
@ -210,6 +210,21 @@ fn rust_call_abi() {
|
|||||||
#[allow(improper_ctypes_definitions)]
|
#[allow(improper_ctypes_definitions)]
|
||||||
extern "C" fn foo(_a: I64X2) {}
|
extern "C" fn foo(_a: I64X2) {}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
#[target_feature(enable = "sse4.2")]
|
||||||
|
#[cfg(not(jit))]
|
||||||
|
unsafe fn test_crc32() {
|
||||||
|
assert!(is_x86_feature_detected!("sse4.2"));
|
||||||
|
|
||||||
|
let a = 42u32;
|
||||||
|
let b = 0xdeadbeefu64;
|
||||||
|
|
||||||
|
assert_eq!(_mm_crc32_u8(a, b as u8), 4135334616);
|
||||||
|
assert_eq!(_mm_crc32_u16(a, b as u16), 1200687288);
|
||||||
|
assert_eq!(_mm_crc32_u32(a, b as u32), 2543798776);
|
||||||
|
assert_eq!(_mm_crc32_u64(a as u64, b as u64), 241952147);
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
#[target_feature(enable = "sse2")]
|
#[target_feature(enable = "sse2")]
|
||||||
unsafe fn test_simd() {
|
unsafe fn test_simd() {
|
||||||
@ -249,6 +264,9 @@ unsafe fn test_simd() {
|
|||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
|
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
|
||||||
assert_eq!(mask1, 1);
|
assert_eq!(mask1, 1);
|
||||||
|
|
||||||
|
#[cfg(not(jit))]
|
||||||
|
test_crc32();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
@ -847,16 +847,27 @@ fn select4(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
"llvm.x86.sse42.crc32.32.32" => {
|
"llvm.x86.sse42.crc32.32.8"
|
||||||
|
| "llvm.x86.sse42.crc32.32.16"
|
||||||
|
| "llvm.x86.sse42.crc32.32.32"
|
||||||
|
| "llvm.x86.sse42.crc32.64.64" => {
|
||||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1419&text=_mm_crc32_u32
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1419&text=_mm_crc32_u32
|
||||||
intrinsic_args!(fx, args => (crc, v); intrinsic);
|
intrinsic_args!(fx, args => (crc, v); intrinsic);
|
||||||
|
|
||||||
let crc = crc.load_scalar(fx);
|
let crc = crc.load_scalar(fx);
|
||||||
let v = v.load_scalar(fx);
|
let v = v.load_scalar(fx);
|
||||||
|
|
||||||
|
let asm = match intrinsic {
|
||||||
|
"llvm.x86.sse42.crc32.32.8" => "crc32 eax, dl",
|
||||||
|
"llvm.x86.sse42.crc32.32.16" => "crc32 eax, dx",
|
||||||
|
"llvm.x86.sse42.crc32.32.32" => "crc32 eax, edx",
|
||||||
|
"llvm.x86.sse42.crc32.64.64" => "crc32 rax, rdx",
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
codegen_inline_asm_inner(
|
codegen_inline_asm_inner(
|
||||||
fx,
|
fx,
|
||||||
&[InlineAsmTemplatePiece::String("crc32 eax, edx".to_string())],
|
&[InlineAsmTemplatePiece::String(asm.to_string())],
|
||||||
&[
|
&[
|
||||||
CInlineAsmOperand::InOut {
|
CInlineAsmOperand::InOut {
|
||||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
|
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
|
||||||
|
Loading…
Reference in New Issue
Block a user