From d2eeed4ff577ee35693a32ae95f043f57c267cb3 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 23 Nov 2020 11:45:41 +0100 Subject: [PATCH] Implement more simd_reduce_* intrinsics --- src/intrinsics/mod.rs | 42 ++++++++++++++++++++++++++++++++++++ src/intrinsics/simd.rs | 49 +++++++++++++++++++----------------------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index f9d72178524..3563aa250a9 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -263,6 +263,48 @@ fn simd_pair_for_each_lane<'tcx, M: Module>( } } +fn simd_reduce<'tcx, M: Module>( + fx: &mut FunctionCx<'_, 'tcx, M>, + val: CValue<'tcx>, + ret: CPlace<'tcx>, + f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, TyAndLayout<'tcx>, Value, Value) -> Value, +) { + let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout()); + assert_eq!(lane_layout, ret.layout()); + + let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx); + for lane_idx in 1..lane_count { + let lane = val + .value_field(fx, mir::Field::new(lane_idx.into())) + .load_scalar(fx); + res_val = f(fx, lane_layout, res_val, lane); + } + let res = CValue::by_val(res_val, lane_layout); + ret.write_cvalue(fx, res); +} + +fn simd_reduce_bool<'tcx, M: Module>( + fx: &mut FunctionCx<'_, 'tcx, M>, + val: CValue<'tcx>, + ret: CPlace<'tcx>, + f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, Value, Value) -> Value, +) { + let (_lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout()); + assert!(ret.layout().ty.is_bool()); + + let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx); + let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean + for lane_idx in 1..lane_count { + let lane = val + .value_field(fx, mir::Field::new(lane_idx.into())) + .load_scalar(fx); + let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean + res_val = f(fx, res_val, lane); + } + let res = CValue::by_val(res_val, ret.layout()); + ret.write_cvalue(fx, res); +} + fn bool_to_zero_or_max_uint<'tcx>( fx: &mut FunctionCx<'_, 'tcx, impl Module>, layout: TyAndLayout<'tcx>, diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 20a02e14bb2..0b85309f4c5 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -233,45 +233,40 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) { validate_simd_type!(fx, intrinsic, span, v.layout().ty); - let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, v.layout()); - assert_eq!(lane_layout.ty, ret.layout().ty); - - let mut res_val = v.value_field(fx, mir::Field::new(0)).load_scalar(fx); - for lane_idx in 1..lane_count { - let lane = v.value_field(fx, mir::Field::new(lane_idx.into())).load_scalar(fx); - res_val = if lane_layout.ty.is_floating_point() { - fx.bcx.ins().fadd(res_val, lane) + simd_reduce(fx, v, ret, |fx, lane_layout, a, b| { + if lane_layout.ty.is_floating_point() { + fx.bcx.ins().fadd(a, b) } else { - fx.bcx.ins().iadd(res_val, lane) - }; - } - let res = CValue::by_val(res_val, lane_layout); - ret.write_cvalue(fx, res); + fx.bcx.ins().iadd(a, b) + } + }); }; simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) { validate_simd_type!(fx, intrinsic, span, v.layout().ty); - let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, v.layout()); - assert_eq!(lane_layout.ty, ret.layout().ty); - - let mut res_val = v.value_field(fx, mir::Field::new(0)).load_scalar(fx); - for lane_idx in 1..lane_count { - let lane = v.value_field(fx, mir::Field::new(lane_idx.into())).load_scalar(fx); - res_val = if lane_layout.ty.is_floating_point() { - fx.bcx.ins().fmul(res_val, lane) + simd_reduce(fx, v, ret, |fx, lane_layout, a, b| { + if lane_layout.ty.is_floating_point() { + fx.bcx.ins().fmul(a, b) } else { - fx.bcx.ins().imul(res_val, lane) - }; - } - let res = CValue::by_val(res_val, lane_layout); - ret.write_cvalue(fx, res); + fx.bcx.ins().imul(a, b) + } + }); + }; + + simd_reduce_all, (c v) { + validate_simd_type!(fx, intrinsic, span, v.layout().ty); + simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b)); + }; + + simd_reduce_any, (c v) { + validate_simd_type!(fx, intrinsic, span, v.layout().ty); + simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b)); }; // simd_fabs // simd_saturating_add // simd_bitmask // simd_select - // simd_reduce_{add,mul}_{,un}ordered // simd_rem } }