Implement all llvm intrinsics necessary for the image crate

Fixes #1379
This commit is contained in:
bjorn3 2023-10-23 14:10:22 +00:00
parent efd3081083
commit a558968dbe

View File

@ -344,6 +344,109 @@ fn select4(
fx.bcx.ins().sshr(a_lane, saturated_count)
});
}
"llvm.x86.sse2.psad.bw" => {
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.u8);
assert_eq!(ret_lane_ty, fx.tcx.types.u64);
assert_eq!(lane_count, ret_lane_count * 8);
let ret_lane_layout = fx.layout_of(fx.tcx.types.u64);
for out_lane_idx in 0..lane_count / 8 {
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff);
lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff);
}
let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout);
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.ssse3.pmadd.ub.sw.128" => {
intrinsic_args!(fx, args => (a, b); intrinsic);
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.u8);
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
assert_eq!(lane_count, ret_lane_count * 2);
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
for out_lane_idx in 0..lane_count / 2 {
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0);
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0);
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1);
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1);
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1);
let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0);
let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16));
let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16));
let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val);
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.sse2.pmadd.wd" => {
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.i16);
assert_eq!(ret_lane_ty, fx.tcx.types.i32);
assert_eq!(lane_count, ret_lane_count * 2);
let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
for out_lane_idx in 0..lane_count / 2 {
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
let res_lane = fx.bcx.ins().iadd(mul0, mul1);
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
}
}
_ => {
fx.tcx
.sess