Auto merge of #116609 - eduardosm:bump-stdarch, r=workingjubilee
Bump stdarch submodule and remove special handling for LLVM intrinsics that are no longer needed Bumps stdarch to pull https://github.com/rust-lang/stdarch/pull/1477, which reimplemented some functions with portable SIMD intrinsics instead of arch specific LLVM intrinsics. Handling of those LLVM intrinsics is removed from cranelift codegen and miri. cc `@RalfJung` `@bjorn3`
This commit is contained in:
commit
3089c315b1
@ -32,41 +32,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
ret.write_cvalue(fx, CValue::by_val(res, fx.layout_of(fx.tcx.types.i64)));
|
||||
}
|
||||
|
||||
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
|
||||
"llvm.x86.sse2.pmovmskb.128"
|
||||
| "llvm.x86.avx2.pmovmskb"
|
||||
| "llvm.x86.sse.movmsk.ps"
|
||||
| "llvm.x86.sse2.movmsk.pd" => {
|
||||
intrinsic_args!(fx, args => (a); intrinsic);
|
||||
|
||||
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
|
||||
let lane_ty = fx.clif_type(lane_ty).unwrap();
|
||||
assert!(lane_count <= 32);
|
||||
|
||||
let mut res = fx.bcx.ins().iconst(types::I32, 0);
|
||||
|
||||
for lane in (0..lane_count).rev() {
|
||||
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
|
||||
|
||||
// cast float to int
|
||||
let a_lane = match lane_ty {
|
||||
types::F32 => codegen_bitcast(fx, types::I32, a_lane),
|
||||
types::F64 => codegen_bitcast(fx, types::I64, a_lane),
|
||||
_ => a_lane,
|
||||
};
|
||||
|
||||
// extract sign bit of an int
|
||||
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1));
|
||||
|
||||
// shift sign bit into result
|
||||
let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false);
|
||||
res = fx.bcx.ins().ishl_imm(res, 1);
|
||||
res = fx.bcx.ins().bor(res, a_lane_sign);
|
||||
}
|
||||
|
||||
let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32));
|
||||
ret.write_cvalue(fx, res);
|
||||
}
|
||||
"llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
|
||||
let (x, y, kind) = match args {
|
||||
[x, y, kind] => (x, y, kind),
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 333e9e9977188d0748327e9b5be0f3f412063174
|
||||
Subproject commit f4528dd6e85d97bb802240d7cd048b6e1bf72540
|
@ -209,25 +209,6 @@ fn emulate_x86_sse_intrinsic(
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_movemask_ps function.
|
||||
// Returns a scalar integer where the i-th bit is the highest
|
||||
// bit of the i-th component of `op`.
|
||||
// https://www.felixcloutier.com/x86/movmskps
|
||||
"movmsk.ps" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
|
||||
let mut res = 0;
|
||||
for i in 0..op_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?;
|
||||
let op = op.to_u32()?;
|
||||
|
||||
// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
|
||||
res |= (op >> 31) << i;
|
||||
}
|
||||
|
||||
this.write_scalar(Scalar::from_u32(res), dest)?;
|
||||
}
|
||||
_ => return Ok(EmulateForeignItemResult::NotSupported),
|
||||
}
|
||||
Ok(EmulateForeignItemResult::NeedsJumping)
|
||||
|
@ -1,8 +1,4 @@
|
||||
use rustc_apfloat::{
|
||||
ieee::{Double, Single},
|
||||
Float as _,
|
||||
};
|
||||
use rustc_middle::mir;
|
||||
use rustc_apfloat::ieee::Double;
|
||||
use rustc_middle::ty::layout::LayoutOf as _;
|
||||
use rustc_middle::ty::Ty;
|
||||
use rustc_span::Symbol;
|
||||
@ -39,49 +35,6 @@ fn emulate_x86_sse2_intrinsic(
|
||||
// Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
|
||||
// vectors.
|
||||
match unprefixed_name {
|
||||
// Used to implement the _mm_avg_epu8 and _mm_avg_epu16 functions.
|
||||
// Averages packed unsigned 8/16-bit integers in `left` and `right`.
|
||||
"pavg.b" | "pavg.w" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let right = this.read_immediate(&this.project_index(&right, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// Widen the operands to avoid overflow
|
||||
let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
|
||||
let left = this.int_to_int_or_float(&left, twice_wide)?;
|
||||
let right = this.int_to_int_or_float(&right, twice_wide)?;
|
||||
|
||||
// Calculate left + right + 1
|
||||
let added = this.wrapping_binary_op(mir::BinOp::Add, &left, &right)?;
|
||||
let added = this.wrapping_binary_op(
|
||||
mir::BinOp::Add,
|
||||
&added,
|
||||
&ImmTy::from_uint(1u32, twice_wide),
|
||||
)?;
|
||||
|
||||
// Calculate (left + right + 1) / 2
|
||||
let divided = this.wrapping_binary_op(
|
||||
mir::BinOp::Div,
|
||||
&added,
|
||||
&ImmTy::from_uint(2u32, twice_wide),
|
||||
)?;
|
||||
|
||||
// Narrow back to the original type
|
||||
let res = this.int_to_int_or_float(÷d, dest.layout)?;
|
||||
this.write_immediate(*res, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_madd_epi16 function.
|
||||
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
|
||||
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
|
||||
@ -118,70 +71,6 @@ fn emulate_x86_sse2_intrinsic(
|
||||
this.write_scalar(Scalar::from_i32(res), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions.
|
||||
"pmulh.w" | "pmulhu.w" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let right = this.read_immediate(&this.project_index(&right, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// Widen the operands to avoid overflow
|
||||
let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
|
||||
let left = this.int_to_int_or_float(&left, twice_wide)?;
|
||||
let right = this.int_to_int_or_float(&right, twice_wide)?;
|
||||
|
||||
// Multiply
|
||||
let multiplied = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
|
||||
// Keep the high half
|
||||
let high = this.wrapping_binary_op(
|
||||
mir::BinOp::Shr,
|
||||
&multiplied,
|
||||
&ImmTy::from_uint(dest.layout.size.bits(), twice_wide),
|
||||
)?;
|
||||
|
||||
// Narrow back to the original type
|
||||
let res = this.int_to_int_or_float(&high, dest.layout)?;
|
||||
this.write_immediate(*res, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_mul_epu32 function.
|
||||
// Multiplies the the low unsigned 32-bit integers from each packed
|
||||
// 64-bit element and stores the result as 64-bit unsigned integers.
|
||||
"pmulu.dq" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// left and right are u32x4, dest is u64x2
|
||||
assert_eq!(left_len, 4);
|
||||
assert_eq!(right_len, 4);
|
||||
assert_eq!(dest_len, 2);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op_i = i.checked_mul(2).unwrap();
|
||||
let left = this.read_scalar(&this.project_index(&left, op_i)?)?.to_u32()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, op_i)?)?.to_u32()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// The multiplication will not overflow because stripping the
|
||||
// operands are expanded from 32-bit to 64-bit.
|
||||
let res = u64::from(left).checked_mul(u64::from(right)).unwrap();
|
||||
this.write_scalar(Scalar::from_u64(res), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_sad_epu8 function.
|
||||
// Computes the absolute differences of packed unsigned 8-bit integers in `a`
|
||||
// and `b`, then horizontally sum each consecutive 8 differences to produce
|
||||
@ -370,25 +259,6 @@ enum ShiftOp {
|
||||
this.write_scalar(Scalar::from_u64(res), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cvtepi32_ps function.
|
||||
// Converts packed i32 to packed f32.
|
||||
// FIXME: Can we get rid of this intrinsic and just use simd_as?
|
||||
"cvtdq2ps" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, op_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_i32()?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = Scalar::from_f32(Single::from_i128(op.into()).value);
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cvtps_epi32 and _mm_cvttps_epi32 functions.
|
||||
// Converts packed f32 to packed i32.
|
||||
"cvtps2dq" | "cvttps2dq" => {
|
||||
@ -652,31 +522,6 @@ enum ShiftOp {
|
||||
};
|
||||
this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?;
|
||||
}
|
||||
// Used to implement the _mm_cvtpd_ps and _mm_cvtps_pd functions.
|
||||
// Converts packed f32/f64 to packed f64/f32.
|
||||
"cvtpd2ps" | "cvtps2pd" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
// For cvtpd2ps: op is f64x2, dest is f32x4
|
||||
// For cvtps2pd: op is f32x4, dest is f64x2
|
||||
// In either case, the two first values are converted
|
||||
for i in 0..op_len.min(dest_len) {
|
||||
let op = this.read_immediate(&this.project_index(&op, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = this.float_to_float_or_int(&op, dest.layout)?;
|
||||
this.write_immediate(*res, &dest)?;
|
||||
}
|
||||
// For f32 -> f64, ignore the remaining
|
||||
// For f64 -> f32, fill the remaining with zeros
|
||||
for i in op_len..dest_len {
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cvtpd_epi32 and _mm_cvttpd_epi32 functions.
|
||||
// Converts packed f64 to packed i32.
|
||||
"cvtpd2dq" | "cvttpd2dq" => {
|
||||
@ -772,25 +617,6 @@ enum ShiftOp {
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_movemask_pd function.
|
||||
// Returns a scalar integer where the i-th bit is the highest
|
||||
// bit of the i-th component of `op`.
|
||||
// https://www.felixcloutier.com/x86/movmskpd
|
||||
"movmsk.pd" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
|
||||
let mut res = 0;
|
||||
for i in 0..op_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?;
|
||||
let op = op.to_u64()?;
|
||||
|
||||
// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
|
||||
res |= (op >> 63) << i;
|
||||
}
|
||||
|
||||
this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
|
||||
}
|
||||
// Used to implement the `_mm_pause` function.
|
||||
// The intrinsic is used to hint the processor that the code is in a spin-loop.
|
||||
"pause" => {
|
||||
|
@ -22,32 +22,6 @@ fn emulate_x86_sse3_intrinsic(
|
||||
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap();
|
||||
|
||||
match unprefixed_name {
|
||||
// Used to implement the _mm_addsub_ps and _mm_addsub_pd functions.
|
||||
// Alternatingly add and subtract floating point (f32 or f64) from
|
||||
// `left` and `right`
|
||||
"addsub.ps" | "addsub.pd" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let right = this.read_immediate(&this.project_index(&right, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
// Even elements are subtracted and odd elements are added.
|
||||
let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add };
|
||||
let res = this.wrapping_binary_op(op, &left, &right)?;
|
||||
|
||||
this.write_immediate(*res, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_h{add,sub}_p{s,d} functions.
|
||||
// Horizontally add/subtract adjacent floating point values
|
||||
// in `left` and `right`.
|
||||
|
@ -117,12 +117,12 @@ unsafe fn test_mm_mul_epu32() {
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn test_mm_sad_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm_setr_epi8(
|
||||
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
|
||||
1, 2, 3, 4,
|
||||
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
|
||||
1, 2, 3, 4,
|
||||
);
|
||||
let a = _mm_setr_epi8(
|
||||
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
|
||||
1, 2, 3, 4,
|
||||
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
|
||||
1, 2, 3, 4,
|
||||
);
|
||||
let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
|
||||
let r = _mm_sad_epu8(a, b);
|
||||
let e = _mm_setr_epi64x(1020, 614);
|
||||
|
Loading…
Reference in New Issue
Block a user