Implement _mm256_permute2f128_ps and _mm256_permute2f128_pd intrinsics
This commit is contained in:
parent
f6a8c3afb5
commit
6a53acefd8
@ -172,8 +172,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
}
|
||||
}
|
||||
}
|
||||
"llvm.x86.avx2.vperm2i128" => {
|
||||
"llvm.x86.avx2.vperm2i128"
|
||||
| "llvm.x86.avx.vperm2f128.ps.256"
|
||||
| "llvm.x86.avx.vperm2f128.pd.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd
|
||||
let (a, b, imm8) = match args {
|
||||
[a, b, imm8] => (a, b, imm8),
|
||||
_ => bug!("wrong number of args for intrinsic {intrinsic}"),
|
||||
@ -182,19 +186,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
let b = codegen_operand(fx, b);
|
||||
let imm8 = codegen_operand(fx, imm8).load_scalar(fx);
|
||||
|
||||
let a_0 = a.value_lane(fx, 0).load_scalar(fx);
|
||||
let a_1 = a.value_lane(fx, 1).load_scalar(fx);
|
||||
let a_low = fx.bcx.ins().iconcat(a_0, a_1);
|
||||
let a_2 = a.value_lane(fx, 2).load_scalar(fx);
|
||||
let a_3 = a.value_lane(fx, 3).load_scalar(fx);
|
||||
let a_high = fx.bcx.ins().iconcat(a_2, a_3);
|
||||
let a_low = a.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx);
|
||||
let a_high = a.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx);
|
||||
|
||||
let b_0 = b.value_lane(fx, 0).load_scalar(fx);
|
||||
let b_1 = b.value_lane(fx, 1).load_scalar(fx);
|
||||
let b_low = fx.bcx.ins().iconcat(b_0, b_1);
|
||||
let b_2 = b.value_lane(fx, 2).load_scalar(fx);
|
||||
let b_3 = b.value_lane(fx, 3).load_scalar(fx);
|
||||
let b_high = fx.bcx.ins().iconcat(b_2, b_3);
|
||||
let b_low = b.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx);
|
||||
let b_high = b.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx);
|
||||
|
||||
fn select4(
|
||||
fx: &mut FunctionCx<'_, '_, '_>,
|
||||
@ -219,16 +215,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
|
||||
let control0 = imm8;
|
||||
let res_low = select4(fx, a_high, a_low, b_high, b_low, control0);
|
||||
let (res_0, res_1) = fx.bcx.ins().isplit(res_low);
|
||||
|
||||
let control1 = fx.bcx.ins().ushr_imm(imm8, 4);
|
||||
let res_high = select4(fx, a_high, a_low, b_high, b_low, control1);
|
||||
let (res_2, res_3) = fx.bcx.ins().isplit(res_high);
|
||||
|
||||
ret.place_lane(fx, 0).to_ptr().store(fx, res_0, MemFlags::trusted());
|
||||
ret.place_lane(fx, 1).to_ptr().store(fx, res_1, MemFlags::trusted());
|
||||
ret.place_lane(fx, 2).to_ptr().store(fx, res_2, MemFlags::trusted());
|
||||
ret.place_lane(fx, 3).to_ptr().store(fx, res_3, MemFlags::trusted());
|
||||
ret.place_typed_lane(fx, fx.tcx.types.u128, 0).to_ptr().store(
|
||||
fx,
|
||||
res_low,
|
||||
MemFlags::trusted(),
|
||||
);
|
||||
ret.place_typed_lane(fx, fx.tcx.types.u128, 1).to_ptr().store(
|
||||
fx,
|
||||
res_high,
|
||||
MemFlags::trusted(),
|
||||
);
|
||||
}
|
||||
"llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
|
||||
let a = match args {
|
||||
|
@ -243,6 +243,34 @@ impl<'tcx> CValue<'tcx> {
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let lane_layout = fx.layout_of(lane_ty);
|
||||
assert!(lane_idx < lane_count);
|
||||
|
||||
match self.0 {
|
||||
CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
|
||||
CValueInner::ByRef(ptr, None) => {
|
||||
let field_offset = lane_layout.size * lane_idx;
|
||||
let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
|
||||
CValue::by_ref(field_ptr, lane_layout)
|
||||
}
|
||||
CValueInner::ByRef(_, Some(_)) => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`CValue::value_field`] except using the passed type as lane type instead of the one
|
||||
/// specified by the vector type.
|
||||
pub(crate) fn value_typed_lane(
|
||||
self,
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
lane_ty: Ty<'tcx>,
|
||||
lane_idx: u64,
|
||||
) -> CValue<'tcx> {
|
||||
let layout = self.1;
|
||||
assert!(layout.ty.is_simd());
|
||||
let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let lane_layout = fx.layout_of(lane_ty);
|
||||
assert!(
|
||||
(lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size
|
||||
);
|
||||
|
||||
match self.0 {
|
||||
CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
|
||||
CValueInner::ByRef(ptr, None) => {
|
||||
@ -734,6 +762,34 @@ impl<'tcx> CPlace<'tcx> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`CPlace::place_field`] except using the passed type as lane type instead of the one
|
||||
/// specified by the vector type.
|
||||
pub(crate) fn place_typed_lane(
|
||||
self,
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
lane_ty: Ty<'tcx>,
|
||||
lane_idx: u64,
|
||||
) -> CPlace<'tcx> {
|
||||
let layout = self.layout();
|
||||
assert!(layout.ty.is_simd());
|
||||
let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let lane_layout = fx.layout_of(lane_ty);
|
||||
assert!(
|
||||
(lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size
|
||||
);
|
||||
|
||||
match self.inner {
|
||||
CPlaceInner::Var(_, _) => unreachable!(),
|
||||
CPlaceInner::VarPair(_, _, _) => unreachable!(),
|
||||
CPlaceInner::Addr(ptr, None) => {
|
||||
let field_offset = lane_layout.size * lane_idx;
|
||||
let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
|
||||
CPlace::for_ptr(field_ptr, lane_layout)
|
||||
}
|
||||
CPlaceInner::Addr(_, Some(_)) => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn place_index(
|
||||
self,
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
|
Loading…
x
Reference in New Issue
Block a user