Pass Ty instead of TyAndLayout to the closure of various simd helpers

This reduces the total amount of llvm ir lines for simd related
functions from 9604 to 9467.
This commit is contained in:
bjorn3 2022-01-09 19:07:15 +01:00
parent 2633024850
commit b7cda373d5
3 changed files with 75 additions and 110 deletions

View File

@ -73,17 +73,17 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
kind => unreachable!("kind {:?}", kind),
};
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind() {
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
let res_lane = match lane_ty.kind() {
ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
_ => unreachable!("{:?}", lane_ty),
};
bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
});
};
"llvm.x86.sse2.psrli.d", (c a, o imm8) {
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
_ => fx.bcx.ins().iconst(types::I32, 0),
@ -92,7 +92,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
};
"llvm.x86.sse2.pslli.d", (c a, o imm8) {
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
_ => fx.bcx.ins().iconst(types::I32, 0),

View File

@ -108,12 +108,7 @@ fn simd_for_each_lane<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
val: CValue<'tcx>,
ret: CPlace<'tcx>,
f: &dyn Fn(
&mut FunctionCx<'_, '_, 'tcx>,
TyAndLayout<'tcx>,
TyAndLayout<'tcx>,
Value,
) -> Value,
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value) -> Value,
) {
let layout = val.layout();
@ -126,7 +121,7 @@ fn simd_for_each_lane<'tcx>(
for lane_idx in 0..lane_count {
let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
let res_lane = f(fx, lane_layout, ret_lane_layout, lane);
let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, lane);
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
@ -138,13 +133,7 @@ fn simd_pair_for_each_lane<'tcx>(
x: CValue<'tcx>,
y: CValue<'tcx>,
ret: CPlace<'tcx>,
f: &dyn Fn(
&mut FunctionCx<'_, '_, 'tcx>,
TyAndLayout<'tcx>,
TyAndLayout<'tcx>,
Value,
Value,
) -> Value,
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value) -> Value,
) {
assert_eq!(x.layout(), y.layout());
let layout = x.layout();
@ -159,7 +148,7 @@ fn simd_pair_for_each_lane<'tcx>(
let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx);
let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);
let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane);
let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, x_lane, y_lane);
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
@ -171,7 +160,7 @@ fn simd_reduce<'tcx>(
val: CValue<'tcx>,
acc: Option<Value>,
ret: CPlace<'tcx>,
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Value, Value) -> Value,
) {
let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
let lane_layout = fx.layout_of(lane_ty);
@ -181,7 +170,7 @@ fn simd_reduce<'tcx>(
if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) };
for lane_idx in start_lane..lane_count {
let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
res_val = f(fx, lane_layout, res_val, lane);
res_val = f(fx, lane_layout.ty, res_val, lane);
}
let res = CValue::by_val(res_val, lane_layout);
ret.write_cvalue(fx, res);
@ -215,10 +204,10 @@ fn simd_reduce_bool<'tcx>(
fn bool_to_zero_or_max_uint<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
layout: TyAndLayout<'tcx>,
ty: Ty<'tcx>,
val: Value,
) -> Value {
let ty = fx.clif_type(layout.ty).unwrap();
let ty = fx.clif_type(ty).unwrap();
let int_ty = match ty {
types::F32 => types::I32,

View File

@ -17,76 +17,52 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span:
macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) {
// FIXME use vector instructions when possible
simd_pair_for_each_lane(
$fx,
$x,
$y,
$ret,
&|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind() {
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
};
simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
let res_lane = match lane_ty.kind() {
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
_ => unreachable!("{:?}", lane_ty),
};
let ty = fx.clif_type(res_lane_layout.ty).unwrap();
let ty = fx.clif_type(res_lane_ty).unwrap();
let res_lane = fx.bcx.ins().bint(ty, res_lane);
fx.bcx.ins().ineg(res_lane)
},
);
let res_lane = fx.bcx.ins().bint(ty, res_lane);
fx.bcx.ins().ineg(res_lane)
});
}
macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) {
// FIXME use vector instructions when possible
simd_pair_for_each_lane(
$fx,
$x,
$y,
$ret,
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
match lane_layout.ty.kind() {
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
}
},
);
simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
match lane_ty.kind() {
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
_ => unreachable!("{:?}", lane_ty),
}
});
}
macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) {
// FIXME use vector instructions when possible
simd_pair_for_each_lane(
$fx,
$x,
$y,
$ret,
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
match lane_layout.ty.kind() {
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
}
},
);
simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
match lane_ty.kind() {
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
_ => unreachable!("{:?}", lane_ty),
}
});
}
macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
// FIXME use vector instructions when possible
simd_pair_for_each_lane(
$fx,
$x,
$y,
$ret,
&|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
match lane_layout.ty.kind() {
ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
}
},
);
simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
match lane_ty.kind() {
ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
_ => unreachable!("{:?}", lane_ty),
}
});
}
pub(super) fn codegen_simd_intrinsic_call<'tcx>(
@ -105,13 +81,13 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_cast, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, ret_lane_layout, lane| {
let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
simd_for_each_lane(fx, a, ret, &|fx, lane_ty, ret_lane_ty, lane| {
let ret_lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap();
let from_signed = type_sign(lane_layout.ty);
let to_signed = type_sign(ret_lane_layout.ty);
let from_signed = type_sign(lane_ty);
let to_signed = type_sign(ret_lane_ty);
clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed)
clif_int_or_float_cast(fx, lane, from_signed, ret_lane_clif_ty, to_signed)
});
};
@ -277,8 +253,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_neg, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
match lane_layout.ty.kind() {
simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
match lane_ty.kind() {
ty::Int(_) => fx.bcx.ins().ineg(lane),
ty::Float(_) => fx.bcx.ins().fneg(lane),
_ => unreachable!(),
@ -288,14 +264,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_fabs, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
fx.bcx.ins().fabs(lane)
});
};
simd_fsqrt, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
fx.bcx.ins().sqrt(lane)
});
};
@ -318,8 +294,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
};
simd_rem, (c x, c y) {
validate_simd_type(fx, intrinsic, span, x.layout().ty);
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
match lane_layout.ty.kind() {
simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
match lane_ty.kind() {
ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
ty::Float(FloatTy::F32) => fx.lib_call(
@ -334,7 +310,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
vec![AbiParam::new(types::F64)],
&[x_lane, y_lane],
)[0],
_ => unreachable!("{:?}", lane_layout.ty),
_ => unreachable!("{:?}", lane_ty),
}
});
};
@ -393,8 +369,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_round, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
match lane_layout.ty.kind() {
simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
match lane_ty.kind() {
ty::Float(FloatTy::F32) => fx.lib_call(
"roundf",
vec![AbiParam::new(types::F32)],
@ -407,33 +383,33 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
vec![AbiParam::new(types::F64)],
&[lane],
)[0],
_ => unreachable!("{:?}", lane_layout.ty),
_ => unreachable!("{:?}", lane_ty),
}
});
};
simd_ceil, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
fx.bcx.ins().ceil(lane)
});
};
simd_floor, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
fx.bcx.ins().floor(lane)
});
};
simd_trunc, (c a) {
validate_simd_type(fx, intrinsic, span, a.layout().ty);
simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
fx.bcx.ins().trunc(lane)
});
};
simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
validate_simd_type(fx, intrinsic, span, v.layout().ty);
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
if lane_layout.ty.is_floating_point() {
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
if lane_ty.is_floating_point() {
fx.bcx.ins().fadd(a, b)
} else {
fx.bcx.ins().iadd(a, b)
@ -443,8 +419,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
validate_simd_type(fx, intrinsic, span, v.layout().ty);
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
if lane_layout.ty.is_floating_point() {
simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
if lane_ty.is_floating_point() {
fx.bcx.ins().fmul(a, b)
} else {
fx.bcx.ins().imul(a, b)
@ -464,23 +440,23 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_reduce_and, (c v) {
validate_simd_type(fx, intrinsic, span, v.layout().ty);
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().band(a, b));
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().band(a, b));
};
simd_reduce_or, (c v) {
validate_simd_type(fx, intrinsic, span, v.layout().ty);
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bor(a, b));
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bor(a, b));
};
simd_reduce_xor, (c v) {
validate_simd_type(fx, intrinsic, span, v.layout().ty);
simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
};
simd_reduce_min, (c v) {
validate_simd_type(fx, intrinsic, span, v.layout().ty);
simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
let lt = match layout.ty.kind() {
simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
let lt = match ty.kind() {
ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::LessThan, a, b),
@ -492,8 +468,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_reduce_max, (c v) {
validate_simd_type(fx, intrinsic, span, v.layout().ty);
simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
let gt = match layout.ty.kind() {
simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
let gt = match ty.kind() {
ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),
ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::GreaterThan, a, b),