Auto merge of #58304 - gnzlbg:simd_saturated, r=nagisa

Add generic simd saturated add/sub intrinsics

r? @eddyb
This commit is contained in:
bors 2019-02-24 04:16:12 +00:00
commit 7cb3ee453b
26 changed files with 894 additions and 8 deletions

View File

@ -515,14 +515,24 @@ macro_rules! mk_struct {
let t_f32 = self.type_f32();
let t_f64 = self.type_f64();
let t_v2f32 = self.type_vector(t_f32, 2);
let t_v4f32 = self.type_vector(t_f32, 4);
let t_v8f32 = self.type_vector(t_f32, 8);
let t_v16f32 = self.type_vector(t_f32, 16);
macro_rules! vector_types {
($id_out:ident: $elem_ty:ident, $len:expr) => {
let $id_out = self.type_vector($elem_ty, $len);
};
($($id_out:ident: $elem_ty:ident, $len:expr;)*) => {
$(vector_types!($id_out: $elem_ty, $len);)*
}
}
vector_types! {
t_v2f32: t_f32, 2;
t_v4f32: t_f32, 4;
t_v8f32: t_f32, 8;
t_v16f32: t_f32, 16;
let t_v2f64 = self.type_vector(t_f64, 2);
let t_v4f64 = self.type_vector(t_f64, 4);
let t_v8f64 = self.type_vector(t_f64, 8);
t_v2f64: t_f64, 2;
t_v4f64: t_f64, 4;
t_v8f64: t_f64, 8;
}
ifn!("llvm.memset.p0i8.i16", fn(i8p, t_i8, t_i16, t_i32, i1) -> void);
ifn!("llvm.memset.p0i8.i32", fn(i8p, t_i8, t_i32, t_i32, i1) -> void);

View File

@ -1848,7 +1848,52 @@ macro_rules! arith {
simd_xor: Uint, Int => xor;
simd_fmax: Float => maxnum;
simd_fmin: Float => minnum;
}
if name == "simd_saturating_add" || name == "simd_saturating_sub" {
let lhs = args[0].immediate();
let rhs = args[1].immediate();
let is_add = name == "simd_saturating_add";
let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
let (signed, elem_width, elem_ty) = match in_elem.sty {
ty::Int(i) =>
(
true,
i.bit_width().unwrap_or(ptr_bits),
bx.cx.type_int_from_ty(i)
),
ty::Uint(i) =>
(
false,
i.bit_width().unwrap_or(ptr_bits),
bx.cx.type_uint_from_ty(i)
),
_ => {
return_error!(
"expected element type `{}` of vector type `{}` \
to be a signed or unsigned integer type",
arg_tys[0].simd_type(tcx).sty, arg_tys[0]
);
}
};
let llvm_intrinsic = &format!(
"llvm.{}{}.sat.v{}i{}",
if signed { 's' } else { 'u' },
if is_add { "add" } else { "sub" },
in_len, elem_width
);
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
let f = bx.declare_cfn(
&llvm_intrinsic,
bx.type_func(&[vec_ty, vec_ty], vec_ty)
);
llvm::SetUnnamedAddr(f, false);
let v = bx.call(f, &[lhs, rhs], None);
return Ok(v);
}
span_bug!(span, "unknown SIMD intrinsic");
}

View File

@ -410,7 +410,8 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"simd_add" | "simd_sub" | "simd_mul" | "simd_rem" |
"simd_div" | "simd_shl" | "simd_shr" |
"simd_and" | "simd_or" | "simd_xor" |
"simd_fmin" | "simd_fmax" | "simd_fpow" => {
"simd_fmin" | "simd_fmax" | "simd_fpow" |
"simd_saturating_add" | "simd_saturating_sub" => {
(1, vec![param(0), param(0)], param(0))
}
"simd_fsqrt" | "simd_fsin" | "simd_fcos" | "simd_fexp" | "simd_fexp2" |

View File

@ -0,0 +1,685 @@
// compile-flags: -C no-prepopulate-passes
// ignore-tidy-linelength
#![crate_type = "lib"]
#![feature(repr_simd, platform_intrinsics)]
#![allow(non_camel_case_types)]
#![deny(unused)]
// signed integer types
#[repr(simd)] #[derive(Copy, Clone)] pub struct i8x2(i8, i8);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i8x4(i8, i8, i8, i8);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i8x8(
i8, i8, i8, i8, i8, i8, i8, i8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i8x16(
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i8x32(
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i8x64(
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i16x2(i16, i16);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i16x4(i16, i16, i16, i16);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i16x8(
i16, i16, i16, i16, i16, i16, i16, i16,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i16x16(
i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i16x32(
i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16,
i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i32x2(i32, i32);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i32x4(i32, i32, i32, i32);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i32x8(
i32, i32, i32, i32, i32, i32, i32, i32,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i32x16(
i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i64x2(i64, i64);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i64x4(i64, i64, i64, i64);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i64x8(
i64, i64, i64, i64, i64, i64, i64, i64,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i128x2(i128, i128);
#[repr(simd)] #[derive(Copy, Clone)] pub struct i128x4(i128, i128, i128, i128);
// unsigned integer types
#[repr(simd)] #[derive(Copy, Clone)] pub struct u8x2(u8, u8);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u8x4(u8, u8, u8, u8);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u8x8(
u8, u8, u8, u8, u8, u8, u8, u8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u8x16(
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u8x32(
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u8x64(
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u16x2(u16, u16);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u16x4(u16, u16, u16, u16);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u16x8(
u16, u16, u16, u16, u16, u16, u16, u16,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u16x16(
u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u16x32(
u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u32x2(u32, u32);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u32x4(u32, u32, u32, u32);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u32x8(
u32, u32, u32, u32, u32, u32, u32, u32,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u32x16(
u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u64x2(u64, u64);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u64x4(u64, u64, u64, u64);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u64x8(
u64, u64, u64, u64, u64, u64, u64, u64,
);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u128x2(u128, u128);
#[repr(simd)] #[derive(Copy, Clone)] pub struct u128x4(u128, u128, u128, u128);
extern "platform-intrinsic" {
fn simd_saturating_add<T>(x: T, y: T) -> T;
fn simd_saturating_sub<T>(x: T, y: T) -> T;
}
// CHECK-LABEL: @sadd_i8x2
#[no_mangle]
pub unsafe fn sadd_i8x2(x: i8x2, y: i8x2) -> i8x2 {
// CHECK: %{{[0-9]+}} = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %{{[0-9]+}}, <2 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i8x4
#[no_mangle]
pub unsafe fn sadd_i8x4(x: i8x4, y: i8x4) -> i8x4 {
// CHECK: %{{[0-9]+}} = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %{{[0-9]+}}, <4 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i8x8
#[no_mangle]
pub unsafe fn sadd_i8x8(x: i8x8, y: i8x8) -> i8x8 {
// CHECK: %{{[0-9]+}} = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %{{[0-9]+}}, <8 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i8x16
#[no_mangle]
pub unsafe fn sadd_i8x16(x: i8x16, y: i8x16) -> i8x16 {
// CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i8x32
#[no_mangle]
pub unsafe fn sadd_i8x32(x: i8x32, y: i8x32) -> i8x32 {
// CHECK: %{{[0-9]+}} = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i8x64
#[no_mangle]
pub unsafe fn sadd_i8x64(x: i8x64, y: i8x64) -> i8x64 {
// CHECK: %{{[0-9]+}} = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i16x2
#[no_mangle]
pub unsafe fn sadd_i16x2(x: i16x2, y: i16x2) -> i16x2 {
// CHECK: %{{[0-9]+}} = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %{{[0-9]+}}, <2 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i16x4
#[no_mangle]
pub unsafe fn sadd_i16x4(x: i16x4, y: i16x4) -> i16x4 {
// CHECK: %{{[0-9]+}} = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %{{[0-9]+}}, <4 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i16x8
#[no_mangle]
pub unsafe fn sadd_i16x8(x: i16x8, y: i16x8) -> i16x8 {
// CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i16x16
#[no_mangle]
pub unsafe fn sadd_i16x16(x: i16x16, y: i16x16) -> i16x16 {
// CHECK: %{{[0-9]+}} = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i16x32
#[no_mangle]
pub unsafe fn sadd_i16x32(x: i16x32, y: i16x32) -> i16x32 {
// CHECK: %{{[0-9]+}} = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i32x2
#[no_mangle]
pub unsafe fn sadd_i32x2(x: i32x2, y: i32x2) -> i32x2 {
// CHECK: %{{[0-9]+}} = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %{{[0-9]+}}, <2 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i32x4
#[no_mangle]
pub unsafe fn sadd_i32x4(x: i32x4, y: i32x4) -> i32x4 {
// CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i32x8
#[no_mangle]
pub unsafe fn sadd_i32x8(x: i32x8, y: i32x8) -> i32x8 {
// CHECK: %{{[0-9]+}} = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %{{[0-9]+}}, <8 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i32x16
#[no_mangle]
pub unsafe fn sadd_i32x16(x: i32x16, y: i32x16) -> i32x16 {
// CHECK: %{{[0-9]+}} = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i64x2
#[no_mangle]
pub unsafe fn sadd_i64x2(x: i64x2, y: i64x2) -> i64x2 {
// CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i64x4
#[no_mangle]
pub unsafe fn sadd_i64x4(x: i64x4, y: i64x4) -> i64x4 {
// CHECK: %{{[0-9]+}} = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %{{[0-9]+}}, <4 x i64> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i64x8
#[no_mangle]
pub unsafe fn sadd_i64x8(x: i64x8, y: i64x8) -> i64x8 {
// CHECK: %{{[0-9]+}} = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i128x2
#[no_mangle]
pub unsafe fn sadd_i128x2(x: i128x2, y: i128x2) -> i128x2 {
// CHECK: %{{[0-9]+}} = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %{{[0-9]+}}, <2 x i128> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @sadd_i128x4
#[no_mangle]
pub unsafe fn sadd_i128x4(x: i128x4, y: i128x4) -> i128x4 {
// CHECK: %{{[0-9]+}} = call <4 x i128> @llvm.sadd.sat.v4i128(<4 x i128> %{{[0-9]+}}, <4 x i128> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u8x2
#[no_mangle]
pub unsafe fn uadd_u8x2(x: u8x2, y: u8x2) -> u8x2 {
// CHECK: %{{[0-9]+}} = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %{{[0-9]+}}, <2 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u8x4
#[no_mangle]
pub unsafe fn uadd_u8x4(x: u8x4, y: u8x4) -> u8x4 {
// CHECK: %{{[0-9]+}} = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %{{[0-9]+}}, <4 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u8x8
#[no_mangle]
pub unsafe fn uadd_u8x8(x: u8x8, y: u8x8) -> u8x8 {
// CHECK: %{{[0-9]+}} = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %{{[0-9]+}}, <8 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u8x16
#[no_mangle]
pub unsafe fn uadd_u8x16(x: u8x16, y: u8x16) -> u8x16 {
// CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u8x32
#[no_mangle]
pub unsafe fn uadd_u8x32(x: u8x32, y: u8x32) -> u8x32 {
// CHECK: %{{[0-9]+}} = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u8x64
#[no_mangle]
pub unsafe fn uadd_u8x64(x: u8x64, y: u8x64) -> u8x64 {
// CHECK: %{{[0-9]+}} = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u16x2
#[no_mangle]
pub unsafe fn uadd_u16x2(x: u16x2, y: u16x2) -> u16x2 {
// CHECK: %{{[0-9]+}} = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %{{[0-9]+}}, <2 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u16x4
#[no_mangle]
pub unsafe fn uadd_u16x4(x: u16x4, y: u16x4) -> u16x4 {
// CHECK: %{{[0-9]+}} = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %{{[0-9]+}}, <4 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u16x8
#[no_mangle]
pub unsafe fn uadd_u16x8(x: u16x8, y: u16x8) -> u16x8 {
// CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u16x16
#[no_mangle]
pub unsafe fn uadd_u16x16(x: u16x16, y: u16x16) -> u16x16 {
// CHECK: %{{[0-9]+}} = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u16x32
#[no_mangle]
pub unsafe fn uadd_u16x32(x: u16x32, y: u16x32) -> u16x32 {
// CHECK: %{{[0-9]+}} = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u32x2
#[no_mangle]
pub unsafe fn uadd_u32x2(x: u32x2, y: u32x2) -> u32x2 {
// CHECK: %{{[0-9]+}} = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %{{[0-9]+}}, <2 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u32x4
#[no_mangle]
pub unsafe fn uadd_u32x4(x: u32x4, y: u32x4) -> u32x4 {
// CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u32x8
#[no_mangle]
pub unsafe fn uadd_u32x8(x: u32x8, y: u32x8) -> u32x8 {
// CHECK: %{{[0-9]+}} = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %{{[0-9]+}}, <8 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u32x16
#[no_mangle]
pub unsafe fn uadd_u32x16(x: u32x16, y: u32x16) -> u32x16 {
// CHECK: %{{[0-9]+}} = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u64x2
#[no_mangle]
pub unsafe fn uadd_u64x2(x: u64x2, y: u64x2) -> u64x2 {
// CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u64x4
#[no_mangle]
pub unsafe fn uadd_u64x4(x: u64x4, y: u64x4) -> u64x4 {
// CHECK: %{{[0-9]+}} = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %{{[0-9]+}}, <4 x i64> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u64x8
#[no_mangle]
pub unsafe fn uadd_u64x8(x: u64x8, y: u64x8) -> u64x8 {
// CHECK: %{{[0-9]+}} = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u128x2
#[no_mangle]
pub unsafe fn uadd_u128x2(x: u128x2, y: u128x2) -> u128x2 {
// CHECK: %{{[0-9]+}} = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %{{[0-9]+}}, <2 x i128> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @uadd_u128x4
#[no_mangle]
pub unsafe fn uadd_u128x4(x: u128x4, y: u128x4) -> u128x4 {
// CHECK: %{{[0-9]+}} = call <4 x i128> @llvm.uadd.sat.v4i128(<4 x i128> %{{[0-9]+}}, <4 x i128> %{{[0-9]+}})
simd_saturating_add(x, y)
}
// CHECK-LABEL: @ssub_i8x2
#[no_mangle]
pub unsafe fn ssub_i8x2(x: i8x2, y: i8x2) -> i8x2 {
// CHECK: %{{[0-9]+}} = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %{{[0-9]+}}, <2 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i8x4
#[no_mangle]
pub unsafe fn ssub_i8x4(x: i8x4, y: i8x4) -> i8x4 {
// CHECK: %{{[0-9]+}} = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %{{[0-9]+}}, <4 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i8x8
#[no_mangle]
pub unsafe fn ssub_i8x8(x: i8x8, y: i8x8) -> i8x8 {
// CHECK: %{{[0-9]+}} = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %{{[0-9]+}}, <8 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i8x16
#[no_mangle]
pub unsafe fn ssub_i8x16(x: i8x16, y: i8x16) -> i8x16 {
// CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i8x32
#[no_mangle]
pub unsafe fn ssub_i8x32(x: i8x32, y: i8x32) -> i8x32 {
// CHECK: %{{[0-9]+}} = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i8x64
#[no_mangle]
pub unsafe fn ssub_i8x64(x: i8x64, y: i8x64) -> i8x64 {
// CHECK: %{{[0-9]+}} = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i16x2
#[no_mangle]
pub unsafe fn ssub_i16x2(x: i16x2, y: i16x2) -> i16x2 {
// CHECK: %{{[0-9]+}} = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %{{[0-9]+}}, <2 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i16x4
#[no_mangle]
pub unsafe fn ssub_i16x4(x: i16x4, y: i16x4) -> i16x4 {
// CHECK: %{{[0-9]+}} = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %{{[0-9]+}}, <4 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i16x8
#[no_mangle]
pub unsafe fn ssub_i16x8(x: i16x8, y: i16x8) -> i16x8 {
// CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i16x16
#[no_mangle]
pub unsafe fn ssub_i16x16(x: i16x16, y: i16x16) -> i16x16 {
// CHECK: %{{[0-9]+}} = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i16x32
#[no_mangle]
pub unsafe fn ssub_i16x32(x: i16x32, y: i16x32) -> i16x32 {
// CHECK: %{{[0-9]+}} = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i32x2
#[no_mangle]
pub unsafe fn ssub_i32x2(x: i32x2, y: i32x2) -> i32x2 {
// CHECK: %{{[0-9]+}} = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %{{[0-9]+}}, <2 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i32x4
#[no_mangle]
pub unsafe fn ssub_i32x4(x: i32x4, y: i32x4) -> i32x4 {
// CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i32x8
#[no_mangle]
pub unsafe fn ssub_i32x8(x: i32x8, y: i32x8) -> i32x8 {
// CHECK: %{{[0-9]+}} = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %{{[0-9]+}}, <8 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i32x16
#[no_mangle]
pub unsafe fn ssub_i32x16(x: i32x16, y: i32x16) -> i32x16 {
// CHECK: %{{[0-9]+}} = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i64x2
#[no_mangle]
pub unsafe fn ssub_i64x2(x: i64x2, y: i64x2) -> i64x2 {
// CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i64x4
#[no_mangle]
pub unsafe fn ssub_i64x4(x: i64x4, y: i64x4) -> i64x4 {
// CHECK: %{{[0-9]+}} = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %{{[0-9]+}}, <4 x i64> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i64x8
#[no_mangle]
pub unsafe fn ssub_i64x8(x: i64x8, y: i64x8) -> i64x8 {
// CHECK: %{{[0-9]+}} = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i128x2
#[no_mangle]
pub unsafe fn ssub_i128x2(x: i128x2, y: i128x2) -> i128x2 {
// CHECK: %{{[0-9]+}} = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %{{[0-9]+}}, <2 x i128> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @ssub_i128x4
#[no_mangle]
pub unsafe fn ssub_i128x4(x: i128x4, y: i128x4) -> i128x4 {
// CHECK: %{{[0-9]+}} = call <4 x i128> @llvm.ssub.sat.v4i128(<4 x i128> %{{[0-9]+}}, <4 x i128> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u8x2
#[no_mangle]
pub unsafe fn usub_u8x2(x: u8x2, y: u8x2) -> u8x2 {
// CHECK: %{{[0-9]+}} = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %{{[0-9]+}}, <2 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u8x4
#[no_mangle]
pub unsafe fn usub_u8x4(x: u8x4, y: u8x4) -> u8x4 {
// CHECK: %{{[0-9]+}} = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %{{[0-9]+}}, <4 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u8x8
#[no_mangle]
pub unsafe fn usub_u8x8(x: u8x8, y: u8x8) -> u8x8 {
// CHECK: %{{[0-9]+}} = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %{{[0-9]+}}, <8 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u8x16
#[no_mangle]
pub unsafe fn usub_u8x16(x: u8x16, y: u8x16) -> u8x16 {
// CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u8x32
#[no_mangle]
pub unsafe fn usub_u8x32(x: u8x32, y: u8x32) -> u8x32 {
// CHECK: %{{[0-9]+}} = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u8x64
#[no_mangle]
pub unsafe fn usub_u8x64(x: u8x64, y: u8x64) -> u8x64 {
// CHECK: %{{[0-9]+}} = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u16x2
#[no_mangle]
pub unsafe fn usub_u16x2(x: u16x2, y: u16x2) -> u16x2 {
// CHECK: %{{[0-9]+}} = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %{{[0-9]+}}, <2 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u16x4
#[no_mangle]
pub unsafe fn usub_u16x4(x: u16x4, y: u16x4) -> u16x4 {
// CHECK: %{{[0-9]+}} = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %{{[0-9]+}}, <4 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u16x8
#[no_mangle]
pub unsafe fn usub_u16x8(x: u16x8, y: u16x8) -> u16x8 {
// CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u16x16
#[no_mangle]
pub unsafe fn usub_u16x16(x: u16x16, y: u16x16) -> u16x16 {
// CHECK: %{{[0-9]+}} = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u16x32
#[no_mangle]
pub unsafe fn usub_u16x32(x: u16x32, y: u16x32) -> u16x32 {
// CHECK: %{{[0-9]+}} = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u32x2
#[no_mangle]
pub unsafe fn usub_u32x2(x: u32x2, y: u32x2) -> u32x2 {
// CHECK: %{{[0-9]+}} = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %{{[0-9]+}}, <2 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u32x4
#[no_mangle]
pub unsafe fn usub_u32x4(x: u32x4, y: u32x4) -> u32x4 {
// CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u32x8
#[no_mangle]
pub unsafe fn usub_u32x8(x: u32x8, y: u32x8) -> u32x8 {
// CHECK: %{{[0-9]+}} = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %{{[0-9]+}}, <8 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u32x16
#[no_mangle]
pub unsafe fn usub_u32x16(x: u32x16, y: u32x16) -> u32x16 {
// CHECK: %{{[0-9]+}} = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u64x2
#[no_mangle]
pub unsafe fn usub_u64x2(x: u64x2, y: u64x2) -> u64x2 {
// CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u64x4
#[no_mangle]
pub unsafe fn usub_u64x4(x: u64x4, y: u64x4) -> u64x4 {
// CHECK: %{{[0-9]+}} = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %{{[0-9]+}}, <4 x i64> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u64x8
#[no_mangle]
pub unsafe fn usub_u64x8(x: u64x8, y: u64x8) -> u64x8 {
// CHECK: %{{[0-9]+}} = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u128x2
#[no_mangle]
pub unsafe fn usub_u128x2(x: u128x2, y: u128x2) -> u128x2 {
// CHECK: %{{[0-9]+}} = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %{{[0-9]+}}, <2 x i128> %{{[0-9]+}})
simd_saturating_sub(x, y)
}
// CHECK-LABEL: @usub_u128x4
#[no_mangle]
pub unsafe fn usub_u128x4(x: u128x4, y: u128x4) -> u128x4 {
// CHECK: %{{[0-9]+}} = call <4 x i128> @llvm.usub.sat.v4i128(<4 x i128> %{{[0-9]+}}, <4 x i128> %{{[0-9]+}})
simd_saturating_sub(x, y)
}

View File

@ -0,0 +1,92 @@
// run-pass
// ignore-emscripten
// min-llvm-version 8.0
#![allow(non_camel_case_types)]
#![feature(repr_simd, platform_intrinsics)]
#[repr(simd)]
#[derive(Copy, Clone, PartialEq, Debug)]
struct u32x4(pub u32, pub u32, pub u32, pub u32);
#[repr(simd)]
#[derive(Copy, Clone, PartialEq, Debug)]
struct i32x4(pub i32, pub i32, pub i32, pub i32);
extern "platform-intrinsic" {
fn simd_saturating_add<T>(x: T, y: T) -> T;
fn simd_saturating_sub<T>(x: T, y: T) -> T;
}
fn main() {
// unsigned
{
const M: u32 = u32::max_value();
let a = u32x4(1, 2, 3, 4);
let b = u32x4(2, 4, 6, 8);
let m = u32x4(M, M, M, M);
let m1 = u32x4(M - 1, M - 1, M - 1, M - 1);
let z = u32x4(0, 0, 0, 0);
unsafe {
assert_eq!(simd_saturating_add(z, z), z);
assert_eq!(simd_saturating_add(z, a), a);
assert_eq!(simd_saturating_add(b, z), b);
assert_eq!(simd_saturating_add(a, a), b);
assert_eq!(simd_saturating_add(a, m), m);
assert_eq!(simd_saturating_add(m, b), m);
assert_eq!(simd_saturating_add(m1, a), m);
assert_eq!(simd_saturating_sub(b, z), b);
assert_eq!(simd_saturating_sub(b, a), a);
assert_eq!(simd_saturating_sub(a, a), z);
assert_eq!(simd_saturating_sub(a, b), z);
assert_eq!(simd_saturating_sub(a, m1), z);
assert_eq!(simd_saturating_sub(b, m1), z);
}
}
// signed
{
const MIN: i32 = i32::min_value();
const MAX: i32 = i32::max_value();
let a = i32x4(1, 2, 3, 4);
let b = i32x4(2, 4, 6, 8);
let c = i32x4(-1, -2, -3, -4);
let d = i32x4(-2, -4, -6, -8);
let max = i32x4(MAX, MAX, MAX, MAX);
let max1 = i32x4(MAX - 1, MAX - 1, MAX - 1, MAX - 1);
let min = i32x4(MIN, MIN, MIN, MIN);
let min1 = i32x4(MIN + 1, MIN + 1, MIN + 1, MIN + 1);
let z = i32x4(0, 0, 0, 0);
unsafe {
assert_eq!(simd_saturating_add(z, z), z);
assert_eq!(simd_saturating_add(z, a), a);
assert_eq!(simd_saturating_add(b, z), b);
assert_eq!(simd_saturating_add(a, a), b);
assert_eq!(simd_saturating_add(a, max), max);
assert_eq!(simd_saturating_add(max, b), max);
assert_eq!(simd_saturating_add(max1, a), max);
assert_eq!(simd_saturating_add(min1, z), min1);
assert_eq!(simd_saturating_add(min, z), min);
assert_eq!(simd_saturating_add(min1, c), min);
assert_eq!(simd_saturating_add(min, c), min);
assert_eq!(simd_saturating_add(min1, d), min);
assert_eq!(simd_saturating_add(min, d), min);
assert_eq!(simd_saturating_sub(b, z), b);
assert_eq!(simd_saturating_sub(b, a), a);
assert_eq!(simd_saturating_sub(a, a), z);
assert_eq!(simd_saturating_sub(a, b), c);
assert_eq!(simd_saturating_sub(z, max), min1);
assert_eq!(simd_saturating_sub(min1, z), min1);
assert_eq!(simd_saturating_sub(min1, a), min);
assert_eq!(simd_saturating_sub(min1, b), min);
}
}
}

View File

@ -0,0 +1,38 @@
// ignore-emscripten
// ignore-tidy-linelength
#![feature(repr_simd, platform_intrinsics)]
#![allow(non_camel_case_types)]
#[repr(simd)]
#[derive(Copy, Clone)]
pub struct i32x4(pub i32, pub i32, pub i32, pub i32);
#[repr(simd)]
#[derive(Copy, Clone)]
pub struct x4<T>(pub T, pub T, pub T, pub T);
#[repr(simd)]
#[derive(Copy, Clone)]
pub struct f32x4(pub f32, pub f32, pub f32, pub f32);
extern "platform-intrinsic" {
fn simd_saturating_add<T>(x: T, y: T) -> T;
fn simd_saturating_sub<T>(x: T, y: T) -> T;
}
fn main() {
let x = i32x4(0, 0, 0, 0);
let y = x4(0_usize, 0, 0, 0);
let z = f32x4(0.0, 0.0, 0.0, 0.0);
unsafe {
simd_saturating_add(x, x);
simd_saturating_add(y, y);
simd_saturating_sub(x, x);
simd_saturating_sub(y, y);
simd_saturating_add(z, z);
//~^ ERROR expected element type `f32` of vector type `f32x4` to be a signed or unsigned integer type
simd_saturating_sub(z, z);
//~^ ERROR expected element type `f32` of vector type `f32x4` to be a signed or unsigned integer type
}
}

View File

@ -0,0 +1,15 @@
error[E0511]: invalid monomorphization of `simd_saturating_add` intrinsic: expected element type `f32` of vector type `f32x4` to be a signed or unsigned integer type
--> $DIR/simd-intrinsic-generic-arithmetic-saturating.rs:33:9
|
LL | simd_saturating_add(z, z);
| ^^^^^^^^^^^^^^^^^^^^^^^^^
error[E0511]: invalid monomorphization of `simd_saturating_sub` intrinsic: expected element type `f32` of vector type `f32x4` to be a signed or unsigned integer type
--> $DIR/simd-intrinsic-generic-arithmetic-saturating.rs:35:9
|
LL | simd_saturating_sub(z, z);
| ^^^^^^^^^^^^^^^^^^^^^^^^^
error: aborting due to 2 previous errors
For more information about this error, try `rustc --explain E0511`.