Implement _mm256_permute2f128_ps and _mm256_permute2f128_pd intrinsics
This commit is contained in:
parent
f6a8c3afb5
commit
6a53acefd8
@ -172,8 +172,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"llvm.x86.avx2.vperm2i128" => {
|
"llvm.x86.avx2.vperm2i128"
|
||||||
|
| "llvm.x86.avx.vperm2f128.ps.256"
|
||||||
|
| "llvm.x86.avx.vperm2f128.pd.256" => {
|
||||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256
|
||||||
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps
|
||||||
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd
|
||||||
let (a, b, imm8) = match args {
|
let (a, b, imm8) = match args {
|
||||||
[a, b, imm8] => (a, b, imm8),
|
[a, b, imm8] => (a, b, imm8),
|
||||||
_ => bug!("wrong number of args for intrinsic {intrinsic}"),
|
_ => bug!("wrong number of args for intrinsic {intrinsic}"),
|
||||||
@ -182,19 +186,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||||||
let b = codegen_operand(fx, b);
|
let b = codegen_operand(fx, b);
|
||||||
let imm8 = codegen_operand(fx, imm8).load_scalar(fx);
|
let imm8 = codegen_operand(fx, imm8).load_scalar(fx);
|
||||||
|
|
||||||
let a_0 = a.value_lane(fx, 0).load_scalar(fx);
|
let a_low = a.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx);
|
||||||
let a_1 = a.value_lane(fx, 1).load_scalar(fx);
|
let a_high = a.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx);
|
||||||
let a_low = fx.bcx.ins().iconcat(a_0, a_1);
|
|
||||||
let a_2 = a.value_lane(fx, 2).load_scalar(fx);
|
|
||||||
let a_3 = a.value_lane(fx, 3).load_scalar(fx);
|
|
||||||
let a_high = fx.bcx.ins().iconcat(a_2, a_3);
|
|
||||||
|
|
||||||
let b_0 = b.value_lane(fx, 0).load_scalar(fx);
|
let b_low = b.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx);
|
||||||
let b_1 = b.value_lane(fx, 1).load_scalar(fx);
|
let b_high = b.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx);
|
||||||
let b_low = fx.bcx.ins().iconcat(b_0, b_1);
|
|
||||||
let b_2 = b.value_lane(fx, 2).load_scalar(fx);
|
|
||||||
let b_3 = b.value_lane(fx, 3).load_scalar(fx);
|
|
||||||
let b_high = fx.bcx.ins().iconcat(b_2, b_3);
|
|
||||||
|
|
||||||
fn select4(
|
fn select4(
|
||||||
fx: &mut FunctionCx<'_, '_, '_>,
|
fx: &mut FunctionCx<'_, '_, '_>,
|
||||||
@ -219,16 +215,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||||||
|
|
||||||
let control0 = imm8;
|
let control0 = imm8;
|
||||||
let res_low = select4(fx, a_high, a_low, b_high, b_low, control0);
|
let res_low = select4(fx, a_high, a_low, b_high, b_low, control0);
|
||||||
let (res_0, res_1) = fx.bcx.ins().isplit(res_low);
|
|
||||||
|
|
||||||
let control1 = fx.bcx.ins().ushr_imm(imm8, 4);
|
let control1 = fx.bcx.ins().ushr_imm(imm8, 4);
|
||||||
let res_high = select4(fx, a_high, a_low, b_high, b_low, control1);
|
let res_high = select4(fx, a_high, a_low, b_high, b_low, control1);
|
||||||
let (res_2, res_3) = fx.bcx.ins().isplit(res_high);
|
|
||||||
|
|
||||||
ret.place_lane(fx, 0).to_ptr().store(fx, res_0, MemFlags::trusted());
|
ret.place_typed_lane(fx, fx.tcx.types.u128, 0).to_ptr().store(
|
||||||
ret.place_lane(fx, 1).to_ptr().store(fx, res_1, MemFlags::trusted());
|
fx,
|
||||||
ret.place_lane(fx, 2).to_ptr().store(fx, res_2, MemFlags::trusted());
|
res_low,
|
||||||
ret.place_lane(fx, 3).to_ptr().store(fx, res_3, MemFlags::trusted());
|
MemFlags::trusted(),
|
||||||
|
);
|
||||||
|
ret.place_typed_lane(fx, fx.tcx.types.u128, 1).to_ptr().store(
|
||||||
|
fx,
|
||||||
|
res_high,
|
||||||
|
MemFlags::trusted(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
"llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
|
"llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
|
||||||
let a = match args {
|
let a = match args {
|
||||||
|
@ -243,6 +243,34 @@ impl<'tcx> CValue<'tcx> {
|
|||||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||||
let lane_layout = fx.layout_of(lane_ty);
|
let lane_layout = fx.layout_of(lane_ty);
|
||||||
assert!(lane_idx < lane_count);
|
assert!(lane_idx < lane_count);
|
||||||
|
|
||||||
|
match self.0 {
|
||||||
|
CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
|
||||||
|
CValueInner::ByRef(ptr, None) => {
|
||||||
|
let field_offset = lane_layout.size * lane_idx;
|
||||||
|
let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
|
||||||
|
CValue::by_ref(field_ptr, lane_layout)
|
||||||
|
}
|
||||||
|
CValueInner::ByRef(_, Some(_)) => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like [`CValue::value_field`] except using the passed type as lane type instead of the one
|
||||||
|
/// specified by the vector type.
|
||||||
|
pub(crate) fn value_typed_lane(
|
||||||
|
self,
|
||||||
|
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||||
|
lane_ty: Ty<'tcx>,
|
||||||
|
lane_idx: u64,
|
||||||
|
) -> CValue<'tcx> {
|
||||||
|
let layout = self.1;
|
||||||
|
assert!(layout.ty.is_simd());
|
||||||
|
let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||||
|
let lane_layout = fx.layout_of(lane_ty);
|
||||||
|
assert!(
|
||||||
|
(lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size
|
||||||
|
);
|
||||||
|
|
||||||
match self.0 {
|
match self.0 {
|
||||||
CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
|
CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
|
||||||
CValueInner::ByRef(ptr, None) => {
|
CValueInner::ByRef(ptr, None) => {
|
||||||
@ -734,6 +762,34 @@ impl<'tcx> CPlace<'tcx> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Like [`CPlace::place_field`] except using the passed type as lane type instead of the one
|
||||||
|
/// specified by the vector type.
|
||||||
|
pub(crate) fn place_typed_lane(
|
||||||
|
self,
|
||||||
|
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||||
|
lane_ty: Ty<'tcx>,
|
||||||
|
lane_idx: u64,
|
||||||
|
) -> CPlace<'tcx> {
|
||||||
|
let layout = self.layout();
|
||||||
|
assert!(layout.ty.is_simd());
|
||||||
|
let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||||
|
let lane_layout = fx.layout_of(lane_ty);
|
||||||
|
assert!(
|
||||||
|
(lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size
|
||||||
|
);
|
||||||
|
|
||||||
|
match self.inner {
|
||||||
|
CPlaceInner::Var(_, _) => unreachable!(),
|
||||||
|
CPlaceInner::VarPair(_, _, _) => unreachable!(),
|
||||||
|
CPlaceInner::Addr(ptr, None) => {
|
||||||
|
let field_offset = lane_layout.size * lane_idx;
|
||||||
|
let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
|
||||||
|
CPlace::for_ptr(field_ptr, lane_layout)
|
||||||
|
}
|
||||||
|
CPlaceInner::Addr(_, Some(_)) => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn place_index(
|
pub(crate) fn place_index(
|
||||||
self,
|
self,
|
||||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user