Implement all llvm intrinsics necessary for the image crate
Fixes #1379
This commit is contained in:
parent
efd3081083
commit
a558968dbe
@ -344,6 +344,109 @@ fn select4(
|
||||
fx.bcx.ins().sshr(a_lane, saturated_count)
|
||||
});
|
||||
}
|
||||
"llvm.x86.sse2.psad.bw" => {
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.u8);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.u64);
|
||||
assert_eq!(lane_count, ret_lane_count * 8);
|
||||
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.u64);
|
||||
for out_lane_idx in 0..lane_count / 8 {
|
||||
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
|
||||
|
||||
for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
|
||||
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
|
||||
|
||||
let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
|
||||
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
|
||||
let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff);
|
||||
lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff);
|
||||
}
|
||||
|
||||
let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout);
|
||||
|
||||
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
"llvm.x86.ssse3.pmadd.ub.sw.128" => {
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.u8);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(lane_count, ret_lane_count * 2);
|
||||
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
|
||||
for out_lane_idx in 0..lane_count / 2 {
|
||||
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0);
|
||||
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0);
|
||||
|
||||
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1);
|
||||
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1);
|
||||
|
||||
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
|
||||
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
|
||||
|
||||
let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1);
|
||||
|
||||
let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0);
|
||||
|
||||
let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16));
|
||||
let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16));
|
||||
|
||||
let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
|
||||
let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val);
|
||||
|
||||
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
|
||||
|
||||
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
"llvm.x86.sse2.pmadd.wd" => {
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.i32);
|
||||
assert_eq!(lane_count, ret_lane_count * 2);
|
||||
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
|
||||
for out_lane_idx in 0..lane_count / 2 {
|
||||
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
|
||||
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
|
||||
|
||||
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
|
||||
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
|
||||
|
||||
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
|
||||
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
|
||||
|
||||
let res_lane = fx.bcx.ins().iadd(mul0, mul1);
|
||||
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
|
||||
|
||||
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
fx.tcx
|
||||
.sess
|
||||
|
Loading…
Reference in New Issue
Block a user