diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs index c1694760998..e9b7daf1492 100644 --- a/src/intrinsics/llvm.rs +++ b/src/intrinsics/llvm.rs @@ -51,6 +51,21 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( }); } + _ if intrinsic.starts_with("llvm.fma.v") => { + intrinsic_args!(fx, args => (x,y,z); intrinsic); + + simd_trio_for_each_lane( + fx, + x, + y, + z, + ret, + &|fx, _lane_ty, _res_lane_ty, lane_x, lane_y, lane_z| { + fx.bcx.ins().fma(lane_x, lane_y, lane_z) + }, + ); + } + _ => { fx.tcx .sess diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 83d5d53624e..58b31a53432 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -132,6 +132,35 @@ fn simd_pair_for_each_lane<'tcx>( } } +fn simd_trio_for_each_lane<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + x: CValue<'tcx>, + y: CValue<'tcx>, + z: CValue<'tcx>, + ret: CPlace<'tcx>, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value, Value) -> Value, +) { + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + assert_eq!(lane_count, ret_lane_count); + + for lane_idx in 0..lane_count { + let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx); + let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx); + let z_lane = z.value_lane(fx, lane_idx).load_scalar(fx); + + let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, x_lane, y_lane, z_lane); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); + } +} + fn simd_reduce<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, val: CValue<'tcx>,