From 9f562f290dc088c3e24bc7dfb374146ccb48ede8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 5 Sep 2023 20:06:50 +0200 Subject: [PATCH 1/2] Remove special handling in codegen for some SSE2 "storeu" intrinsics Those were removed from stdarch in https://github.com/rust-lang/stdarch/pull/1463 (`<*mut _>::write_unaligned` is used instead) --- src/intrinsics/llvm_x86.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index fdd27a454e0..b990ed7f8bc 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -506,14 +506,6 @@ fn select4( ret.place_lane(fx, 2).to_ptr().store(fx, res_2, MemFlags::trusted()); ret.place_lane(fx, 3).to_ptr().store(fx, res_3, MemFlags::trusted()); } - "llvm.x86.sse2.storeu.dq" | "llvm.x86.sse2.storeu.pd" => { - intrinsic_args!(fx, args => (mem_addr, a); intrinsic); - let mem_addr = mem_addr.load_scalar(fx); - - // FIXME correctly handle the unalignment - let dest = CPlace::for_ptr(Pointer::new(mem_addr), a.layout()); - dest.write_cvalue(fx, a); - } "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => { let a = match args { [a] => a, From 4cd51770154a0fb0cddd1e94acd34bf7e559f03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 5 Sep 2023 20:17:01 +0200 Subject: [PATCH 2/2] Remove special handling in codegen for some AVX and SSE2 shift by immediate intrinsics Those were removed from stdarch in https://github.com/rust-lang/stdarch/pull/1463 (`simd_shl` and `simd_shr` are used instead) --- src/intrinsics/llvm_x86.rs | 240 ------------------------------------- 1 file changed, 240 deletions(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index b990ed7f8bc..e62de6b6147 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -177,244 +177,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane) }); } - "llvm.x86.sse2.psrli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrai.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrai.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.pslli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.pslli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrai.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrai.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.pslli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.pslli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx.psrli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx.psrai.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrai.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrli.q" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrli.q imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 64 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.pslli.q" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.pslli.q imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 64 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx.pslli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.pslli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx2.psrli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrli.w imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx2.psrai.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrai.w imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx2.pslli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.pslli.w imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } "llvm.x86.ssse3.pshuf.b.128" | "llvm.x86.avx2.pshuf.b" => { let (a, b) = match args { [a, b] => (a, b), @@ -563,8 +325,6 @@ fn select4( // llvm.x86.avx2.vperm2i128 // llvm.x86.ssse3.pshuf.b.128 // llvm.x86.avx2.pshuf.b -// llvm.x86.avx2.psrli.w -// llvm.x86.sse2.psrli.w fn llvm_add_sub<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>,