Merge pull request #1490 from folkertdev/add-llvm-x86-crc32

add all `llvm.x86.sse42.crc32.*.*` intrinsics
This commit is contained in:
bjorn3 2024-05-11 22:39:21 +02:00 committed by GitHub
commit 2df34f9091
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 31 additions and 2 deletions

View File

@ -210,6 +210,21 @@ fn rust_call_abi() {
#[allow(improper_ctypes_definitions)] #[allow(improper_ctypes_definitions)]
extern "C" fn foo(_a: I64X2) {} extern "C" fn foo(_a: I64X2) {}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
#[cfg(not(jit))]
unsafe fn test_crc32() {
assert!(is_x86_feature_detected!("sse4.2"));
let a = 42u32;
let b = 0xdeadbeefu64;
assert_eq!(_mm_crc32_u8(a, b as u8), 4135334616);
assert_eq!(_mm_crc32_u16(a, b as u16), 1200687288);
assert_eq!(_mm_crc32_u32(a, b as u32), 2543798776);
assert_eq!(_mm_crc32_u64(a as u64, b as u64), 241952147);
}
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
unsafe fn test_simd() { unsafe fn test_simd() {
@ -249,6 +264,9 @@ unsafe fn test_simd() {
#[rustfmt::skip] #[rustfmt::skip]
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
assert_eq!(mask1, 1); assert_eq!(mask1, 1);
#[cfg(not(jit))]
test_crc32();
} }
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]

View File

@ -847,16 +847,27 @@ fn select4(
} }
} }
"llvm.x86.sse42.crc32.32.32" => { "llvm.x86.sse42.crc32.32.8"
| "llvm.x86.sse42.crc32.32.16"
| "llvm.x86.sse42.crc32.32.32"
| "llvm.x86.sse42.crc32.64.64" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1419&text=_mm_crc32_u32 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1419&text=_mm_crc32_u32
intrinsic_args!(fx, args => (crc, v); intrinsic); intrinsic_args!(fx, args => (crc, v); intrinsic);
let crc = crc.load_scalar(fx); let crc = crc.load_scalar(fx);
let v = v.load_scalar(fx); let v = v.load_scalar(fx);
let asm = match intrinsic {
"llvm.x86.sse42.crc32.32.8" => "crc32 eax, dl",
"llvm.x86.sse42.crc32.32.16" => "crc32 eax, dx",
"llvm.x86.sse42.crc32.32.32" => "crc32 eax, edx",
"llvm.x86.sse42.crc32.64.64" => "crc32 rax, rdx",
_ => unreachable!(),
};
codegen_inline_asm_inner( codegen_inline_asm_inner(
fx, fx,
&[InlineAsmTemplatePiece::String("crc32 eax, edx".to_string())], &[InlineAsmTemplatePiece::String(asm.to_string())],
&[ &[
CInlineAsmOperand::InOut { CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),