Auto merge of #3075 - eduardosm:x86-addcarry-subborrow, r=RalfJung

Move `llvm.x86.*` shims into `shims::x86` and implement `_addcarry_u32` and `_subborrow_u{32,64}`

This PR moves all `llvm.x86.*` shims into `shims::x86` and adds `llvm.x86.addcarry.32`, `llvm.x86.subborrow.32` and `llvm.x86.subborrow.64`.

Additionally, it fixes the input carry semantics of `llvm.x86.addcarry.32`. The input carry is an 8-bit value that is interpreted as 1 when it is non-zero.

https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
This commit is contained in:
bors 2023-09-25 06:10:43 +00:00
commit d644603b80
7 changed files with 2084 additions and 1925 deletions

View File

@ -22,7 +22,7 @@
};
use super::backtrace::EvalContextExt as _;
use crate::helpers::{convert::Truncate, target_os_is_unix};
use crate::helpers::target_os_is_unix;
use crate::*;
/// Returned by `emulate_foreign_item_by_name`.
@ -981,30 +981,7 @@ fn emulate_foreign_item_by_name(
throw_unsup_format!("unsupported `llvm.prefetch` type argument: {}", ty);
}
}
"llvm.x86.addcarry.64" if this.tcx.sess.target.arch == "x86_64" => {
// Computes u8+u64+u64, returning tuple (u8,u64) comprising the output carry and truncated sum.
let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let c_in = this.read_scalar(c_in)?.to_u8()?;
let a = this.read_scalar(a)?.to_u64()?;
let b = this.read_scalar(b)?.to_u64()?;
#[allow(clippy::arithmetic_side_effects)]
// adding two u64 and a u8 cannot wrap in a u128
let wide_sum = u128::from(c_in) + u128::from(a) + u128::from(b);
#[allow(clippy::arithmetic_side_effects)] // it's a u128, we can shift by 64
let (c_out, sum) = ((wide_sum >> 64).truncate::<u8>(), wide_sum.truncate::<u64>());
let c_out_field = this.project_field(dest, 0)?;
this.write_scalar(Scalar::from_u8(c_out), &c_out_field)?;
let sum_field = this.project_field(dest, 1)?;
this.write_scalar(Scalar::from_u64(sum), &sum_field)?;
}
"llvm.x86.sse2.pause"
if this.tcx.sess.target.arch == "x86" || this.tcx.sess.target.arch == "x86_64" =>
{
let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
this.yield_active_thread();
}
// FIXME: Move these to an `arm` submodule.
"llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => {
let [arg] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let arg = this.read_scalar(arg)?.to_i32()?;
@ -1055,13 +1032,11 @@ fn emulate_foreign_item_by_name(
}
}
name if name.starts_with("llvm.x86.sse.") => {
return shims::x86::sse::EvalContextExt::emulate_x86_sse_intrinsic(
this, link_name, abi, args, dest,
);
}
name if name.starts_with("llvm.x86.sse2.") => {
return shims::x86::sse2::EvalContextExt::emulate_x86_sse2_intrinsic(
name if name.starts_with("llvm.x86.")
&& (this.tcx.sess.target.arch == "x86"
|| this.tcx.sess.target.arch == "x86_64") =>
{
return shims::x86::EvalContextExt::emulate_x86_intrinsic(
this, link_name, abi, args, dest,
);
}

View File

@ -1,11 +1,98 @@
use rustc_middle::mir;
use rustc_span::Symbol;
use rustc_target::abi::Size;
use rustc_target::spec::abi::Abi;
use crate::*;
use helpers::bool_to_simd_element;
use shims::foreign_items::EmulateByNameResult;
pub(super) mod sse;
pub(super) mod sse2;
mod sse;
mod sse2;
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
crate::MiriInterpCxExt<'mir, 'tcx>
{
fn emulate_x86_intrinsic(
&mut self,
link_name: Symbol,
abi: Abi,
args: &[OpTy<'tcx, Provenance>],
dest: &PlaceTy<'tcx, Provenance>,
) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
let this = self.eval_context_mut();
// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
match unprefixed_name {
// Used to implement the `_addcarry_u32` and `_addcarry_u64` functions.
// Computes a + b with input and output carry. The input carry is an 8-bit
// value, which is interpreted as 1 if it is non-zero. The output carry is
// an 8-bit value that will be 0 or 1.
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
"addcarry.32" | "addcarry.64" => {
if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" {
return Ok(EmulateByNameResult::NotSupported);
}
let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let c_in = this.read_scalar(c_in)?.to_u8()? != 0;
let a = this.read_immediate(a)?;
let b = this.read_immediate(b)?;
let (sum, overflow1) = this.overflowing_binary_op(mir::BinOp::Add, &a, &b)?;
let (sum, overflow2) = this.overflowing_binary_op(
mir::BinOp::Add,
&sum,
&ImmTy::from_uint(c_in, a.layout),
)?;
let c_out = overflow1 | overflow2;
this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?;
this.write_immediate(*sum, &this.project_field(dest, 1)?)?;
}
// Used to implement the `_subborrow_u32` and `_subborrow_u64` functions.
// Computes a - b with input and output borrow. The input borrow is an 8-bit
// value, which is interpreted as 1 if it is non-zero. The output borrow is
// an 8-bit value that will be 0 or 1.
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
"subborrow.32" | "subborrow.64" => {
if unprefixed_name == "subborrow.64" && this.tcx.sess.target.arch != "x86_64" {
return Ok(EmulateByNameResult::NotSupported);
}
let [b_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let b_in = this.read_scalar(b_in)?.to_u8()? != 0;
let a = this.read_immediate(a)?;
let b = this.read_immediate(b)?;
let (sub, overflow1) = this.overflowing_binary_op(mir::BinOp::Sub, &a, &b)?;
let (sub, overflow2) = this.overflowing_binary_op(
mir::BinOp::Sub,
&sub,
&ImmTy::from_uint(b_in, a.layout),
)?;
let b_out = overflow1 | overflow2;
this.write_scalar(Scalar::from_u8(b_out.into()), &this.project_field(dest, 0)?)?;
this.write_immediate(*sub, &this.project_field(dest, 1)?)?;
}
name if name.starts_with("sse.") => {
return sse::EvalContextExt::emulate_x86_sse_intrinsic(
this, link_name, abi, args, dest,
);
}
name if name.starts_with("sse2.") => {
return sse2::EvalContextExt::emulate_x86_sse2_intrinsic(
this, link_name, abi, args, dest,
);
}
_ => return Ok(EmulateByNameResult::NotSupported),
}
Ok(EmulateByNameResult::NeedsJumping)
}
}
/// Floating point comparison operation
///

View File

@ -10,7 +10,9 @@
use shims::foreign_items::EmulateByNameResult;
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
crate::MiriInterpCxExt<'mir, 'tcx>
{
fn emulate_x86_sse_intrinsic(
&mut self,
link_name: Symbol,

View File

@ -13,7 +13,9 @@
use shims::foreign_items::EmulateByNameResult;
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
crate::MiriInterpCxExt<'mir, 'tcx>
{
fn emulate_x86_sse2_intrinsic(
&mut self,
link_name: Symbol,
@ -753,6 +755,12 @@ enum ShiftOp {
this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
}
// Used to implement the `_mm_pause` function.
// The intrinsic is used to hint the processor that the code is in a spin-loop.
"pause" => {
let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
this.yield_active_thread();
}
_ => return Ok(EmulateByNameResult::NotSupported),
}
Ok(EmulateByNameResult::NeedsJumping)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,51 @@
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86 {
#[cfg(target_arch = "x86")]
use core::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as arch;
fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) {
let mut sum = 0;
// SAFETY: There are no safety requirements for calling `_addcarry_u32`.
// It's just unsafe for API consistency with other intrinsics.
let c_out = unsafe { arch::_addcarry_u32(c_in, a, b, &mut sum) };
(c_out, sum)
}
fn sbb(b_in: u8, a: u32, b: u32) -> (u8, u32) {
let mut sum = 0;
// SAFETY: There are no safety requirements for calling `_subborrow_u32`.
// It's just unsafe for API consistency with other intrinsics.
let b_out = unsafe { arch::_subborrow_u32(b_in, a, b, &mut sum) };
(b_out, sum)
}
pub fn main() {
assert_eq!(adc(0, 1, 1), (0, 2));
assert_eq!(adc(1, 1, 1), (0, 3));
assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
assert_eq!(adc(0, u32::MAX, u32::MAX), (1, u32::MAX - 1));
assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX));
assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX));
assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX));
assert_eq!(sbb(0, 1, 1), (0, 0));
assert_eq!(sbb(1, 1, 1), (1, u32::MAX));
assert_eq!(sbb(2, 1, 1), (1, u32::MAX)); // any non-zero borrow acts as 1!
assert_eq!(sbb(u8::MAX, 1, 1), (1, u32::MAX));
assert_eq!(sbb(0, 2, 1), (0, 1));
assert_eq!(sbb(1, 2, 1), (0, 0));
assert_eq!(sbb(2, 2, 1), (0, 0));
assert_eq!(sbb(u8::MAX, 2, 1), (0, 0));
assert_eq!(sbb(0, 1, 2), (1, u32::MAX));
assert_eq!(sbb(1, 1, 2), (1, u32::MAX - 1));
assert_eq!(sbb(2, 1, 2), (1, u32::MAX - 1));
assert_eq!(sbb(u8::MAX, 1, 2), (1, u32::MAX - 1));
}
}
#[cfg(target_arch = "x86_64")]
mod x86_64 {
use core::arch::x86_64 as arch;
@ -10,13 +58,42 @@ fn adc(c_in: u8, a: u64, b: u64) -> (u8, u64) {
(c_out, sum)
}
fn sbb(b_in: u8, a: u64, b: u64) -> (u8, u64) {
let mut sum = 0;
// SAFETY: There are no safety requirements for calling `_subborrow_u64`.
// It's just unsafe for API consistency with other intrinsics.
let b_out = unsafe { arch::_subborrow_u64(b_in, a, b, &mut sum) };
(b_out, sum)
}
pub fn main() {
assert_eq!(adc(0, 1, 1), (0, 2));
assert_eq!(adc(1, 1, 1), (0, 3));
assert_eq!(adc(3, u64::MAX, u64::MAX), (2, 1));
assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
assert_eq!(adc(0, u64::MAX, u64::MAX), (1, u64::MAX - 1));
assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX));
assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX));
assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX));
assert_eq!(sbb(0, 1, 1), (0, 0));
assert_eq!(sbb(1, 1, 1), (1, u64::MAX));
assert_eq!(sbb(2, 1, 1), (1, u64::MAX)); // any non-zero borrow acts as 1!
assert_eq!(sbb(u8::MAX, 1, 1), (1, u64::MAX));
assert_eq!(sbb(0, 2, 1), (0, 1));
assert_eq!(sbb(1, 2, 1), (0, 0));
assert_eq!(sbb(2, 2, 1), (0, 0));
assert_eq!(sbb(u8::MAX, 2, 1), (0, 0));
assert_eq!(sbb(0, 1, 2), (1, u64::MAX));
assert_eq!(sbb(1, 1, 2), (1, u64::MAX - 1));
assert_eq!(sbb(2, 1, 2), (1, u64::MAX - 1));
assert_eq!(sbb(u8::MAX, 1, 2), (1, u64::MAX - 1));
}
}
fn main() {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
x86::main();
#[cfg(target_arch = "x86_64")]
x86_64::main();
}