diff --git a/src/librustc_llvm/ffi.rs b/src/librustc_llvm/ffi.rs index 0ec5700f5f3..c0cdd212770 100644 --- a/src/librustc_llvm/ffi.rs +++ b/src/librustc_llvm/ffi.rs @@ -621,6 +621,7 @@ extern "C" { pub fn LLVMConstIntGetSExtValue(ConstantVal: ValueRef) -> c_longlong; pub fn LLVMRustConstInt128Get(ConstantVal: ValueRef, SExt: bool, high: *mut u64, low: *mut u64) -> bool; + pub fn LLVMConstRealGetDouble (ConstantVal: ValueRef, losesInfo: *mut Bool) -> f64; // Operations on composite constants @@ -1201,6 +1202,46 @@ extern "C" { Name: *const c_char) -> ValueRef; + pub fn LLVMRustBuildVectorReduceFAdd(B: BuilderRef, + Acc: ValueRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceFMul(B: BuilderRef, + Acc: ValueRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceAdd(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceMul(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceAnd(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceOr(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceXor(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceMin(B: BuilderRef, + Src: ValueRef, + IsSigned: bool) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceMax(B: BuilderRef, + Src: ValueRef, + IsSigned: bool) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceFMin(B: BuilderRef, + Src: ValueRef, + IsNaN: bool) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceFMax(B: BuilderRef, + Src: ValueRef, + IsNaN: bool) + -> ValueRef; + pub fn LLVMBuildIsNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef; pub fn LLVMBuildIsNotNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef; pub fn LLVMBuildPtrDiff(B: BuilderRef, @@ -1567,6 +1608,7 @@ extern "C" { pub fn LLVMRustWriteValueToString(value_ref: ValueRef, s: RustStringRef); pub fn LLVMIsAConstantInt(value_ref: ValueRef) -> ValueRef; + pub fn LLVMIsAConstantFP(value_ref: ValueRef) -> ValueRef; pub fn LLVMRustPassKind(Pass: PassRef) -> PassKind; pub fn LLVMRustFindAndCreatePass(Pass: *const c_char) -> PassRef; diff --git a/src/librustc_trans/builder.rs b/src/librustc_trans/builder.rs index d4e05a18e3a..3f5a9a54ff1 100644 --- a/src/librustc_trans/builder.rs +++ b/src/librustc_trans/builder.rs @@ -955,6 +955,147 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { } } + pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fadd_fast"); + unsafe { + // FIXME: add a non-fast math version once + // https://bugs.llvm.org/show_bug.cgi?id=36732 + // is fixed. + let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceFAdd is not available in LLVM version < 5.0"); + } + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmul_fast"); + unsafe { + // FIXME: add a non-fast math version once + // https://bugs.llvm.org/show_bug.cgi?id=36732 + // is fixed. + let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceFMul is not available in LLVM version < 5.0"); + } + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_add(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.add"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceAdd(self.llbuilder, src); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceAdd is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_mul(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.mul"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceMul(self.llbuilder, src); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceMul is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_and(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.and"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceAnd(self.llbuilder, src); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceAnd is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_or(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.or"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceOr(self.llbuilder, src); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceOr is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_xor(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.xor"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceXor is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_fmin(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmin"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, true); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceFMin is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_fmax(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmax"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, true); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceFMax is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmin_fast"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, false); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceFMin is not available in LLVM version < 5.0"); + } + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_fmax_fast(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmax_fast"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, false); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceFMax is not available in LLVM version < 5.0"); + } + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_min(&self, src: ValueRef, is_signed: bool) -> ValueRef { + self.count_insn("vector.reduce.min"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceMin is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn vector_reduce_max(&self, src: ValueRef, is_signed: bool) -> ValueRef { + self.count_insn("vector.reduce.max"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceMax(self.llbuilder, src, is_signed); + if instr.is_null() { + bug!("LLVMRustBuildVectorReduceMax is not available in LLVM version < 5.0"); + } + instr + } + } + pub fn extract_value(&self, agg_val: ValueRef, idx: u64) -> ValueRef { self.count_insn("extractvalue"); assert_eq!(idx as c_uint as u64, idx); diff --git a/src/librustc_trans/common.rs b/src/librustc_trans/common.rs index c71c0cc0ebf..e83e73c8ae7 100644 --- a/src/librustc_trans/common.rs +++ b/src/librustc_trans/common.rs @@ -269,6 +269,19 @@ pub fn const_get_elt(v: ValueRef, idx: u64) -> ValueRef { } } +pub fn const_get_real(v: ValueRef) -> Option<(f64, bool)> { + unsafe { + if is_const_real(v) { + let mut loses_info: llvm::Bool = ::std::mem::uninitialized(); + let r = llvm::LLVMConstRealGetDouble(v, &mut loses_info as *mut llvm::Bool); + let loses_info = if loses_info == 1 { true } else { false }; + Some((r, loses_info)) + } else { + None + } + } +} + pub fn const_to_uint(v: ValueRef) -> u64 { unsafe { llvm::LLVMConstIntGetZExtValue(v) @@ -281,6 +294,13 @@ pub fn is_const_integral(v: ValueRef) -> bool { } } +pub fn is_const_real(v: ValueRef) -> bool { + unsafe { + !llvm::LLVMIsAConstantFP(v).is_null() + } +} + + #[inline] fn hi_lo_to_u128(lo: u64, hi: u64) -> u128 { ((hi as u128) << 64) | (lo as u128) diff --git a/src/librustc_trans/intrinsic.rs b/src/librustc_trans/intrinsic.rs index 4cef7470c62..c3de9e0ffcc 100644 --- a/src/librustc_trans/intrinsic.rs +++ b/src/librustc_trans/intrinsic.rs @@ -1018,14 +1018,22 @@ fn generic_simd_intrinsic<'a, 'tcx>( name, $($fmt)*)); } } - macro_rules! require { - ($cond: expr, $($fmt: tt)*) => { - if !$cond { + macro_rules! return_error { + ($($fmt: tt)*) => { + { emit_error!($($fmt)*); return Err(()); } } } + + macro_rules! require { + ($cond: expr, $($fmt: tt)*) => { + if !$cond { + return_error!($($fmt)*); + } + }; + } macro_rules! require_simd { ($ty: expr, $position: expr) => { require!($ty.is_simd(), "expected SIMD {} type, found non-SIMD `{}`", $position, $ty) @@ -1145,6 +1153,161 @@ fn generic_simd_intrinsic<'a, 'tcx>( return Ok(bx.extract_element(args[0].immediate(), args[1].immediate())) } + macro_rules! arith_red { + ($name:tt : $integer_reduce:ident, $float_reduce:ident, $ordered:expr) => { + if name == $name { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_) | ty::TyUint(_) => { + let r = bx.$integer_reduce(args[0].immediate()); + if $ordered { + // if overflow occurs, the result is the + // mathematical result modulo 2^n: + if name.contains("mul") { + Ok(bx.mul(args[1].immediate(), r)) + } else { + Ok(bx.add(args[1].immediate(), r)) + } + } else { + Ok(bx.$integer_reduce(args[0].immediate())) + } + }, + ty::TyFloat(f) => { + // ordered arithmetic reductions take an accumulator + let acc = if $ordered { + let acc = args[1].immediate(); + // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36734 + // * if the accumulator of the fadd isn't 0, incorrect + // code is generated + // * if the accumulator of the fmul isn't 1, incorrect + // code is generated + match const_get_real(acc) { + None => return_error!("accumulator of {} is not a constant", $name), + Some((v, loses_info)) => { + if $name.contains("mul") && v != 1.0_f64 { + return_error!("accumulator of {} is not 1.0", $name); + } else if $name.contains("add") && v != 0.0_f64 { + return_error!("accumulator of {} is not 0.0", $name); + } else if loses_info { + return_error!("accumulator of {} loses information", $name); + } + } + } + acc + } else { + // unordered arithmetic reductions do not: + match f.bit_width() { + 32 => C_undef(Type::f32(bx.cx)), + 64 => C_undef(Type::f64(bx.cx)), + v => { + return_error!(r#" +unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#, + $name, in_ty, in_elem, v, ret_ty + ) + } + } + + }; + Ok(bx.$float_reduce(acc, args[0].immediate())) + } + _ => { + return_error!( + "unsupported {} from `{}` with element `{}` to `{}`", + $name, in_ty, in_elem, ret_ty + ) + }, + } + } + } + } + + arith_red!("simd_reduce_add_ordered": vector_reduce_add, vector_reduce_fadd_fast, true); + arith_red!("simd_reduce_mul_ordered": vector_reduce_mul, vector_reduce_fmul_fast, true); + arith_red!("simd_reduce_add_unordered": vector_reduce_add, vector_reduce_fadd_fast, false); + arith_red!("simd_reduce_mul_unordered": vector_reduce_mul, vector_reduce_fmul_fast, false); + + macro_rules! minmax_red { + ($name:tt: $int_red:ident, $float_red:ident) => { + if name == $name { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.$int_red(args[0].immediate(), true)) + }, + ty::TyUint(_u) => { + Ok(bx.$int_red(args[0].immediate(), false)) + }, + ty::TyFloat(_f) => { + Ok(bx.$float_red(args[0].immediate())) + } + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + $name, in_ty, in_elem, ret_ty) + }, + } + } + + } + } + + minmax_red!("simd_reduce_min": vector_reduce_min, vector_reduce_fmin); + minmax_red!("simd_reduce_max": vector_reduce_max, vector_reduce_fmax); + + minmax_red!("simd_reduce_min_nanless": vector_reduce_min, vector_reduce_fmin_fast); + minmax_red!("simd_reduce_max_nanless": vector_reduce_max, vector_reduce_fmax_fast); + + macro_rules! bitwise_red { + ($name:tt : $red:ident, $boolean:expr) => { + if name == $name { + let input = if !$boolean { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + args[0].immediate() + } else { + match in_elem.sty { + ty::TyInt(_) | ty::TyUint(_) => {}, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + $name, in_ty, in_elem, ret_ty) + } + } + + // boolean reductions operate on vectors of i1s: + let i1 = Type::i1(bx.cx); + let i1xn = Type::vector(&i1, in_len as u64); + bx.trunc(args[0].immediate(), i1xn) + }; + return match in_elem.sty { + ty::TyInt(_) | ty::TyUint(_) => { + let r = bx.$red(input); + Ok( + if !$boolean { + r + } else { + bx.zext(r, Type::bool(bx.cx)) + } + ) + }, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + $name, in_ty, in_elem, ret_ty) + }, + } + } + } + } + + bitwise_red!("simd_reduce_and": vector_reduce_and, false); + bitwise_red!("simd_reduce_or": vector_reduce_or, false); + bitwise_red!("simd_reduce_xor": vector_reduce_xor, false); + bitwise_red!("simd_reduce_all": vector_reduce_and, true); + bitwise_red!("simd_reduce_any": vector_reduce_or, true); + if name == "simd_cast" { require_simd!(ret_ty, "return"); let out_len = ret_ty.simd_size(tcx); diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs index 59cf8a0358e..99707a4a3c0 100644 --- a/src/librustc_typeck/check/intrinsic.rs +++ b/src/librustc_typeck/check/intrinsic.rs @@ -361,6 +361,14 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, "simd_insert" => (2, vec![param(0), tcx.types.u32, param(1)], param(0)), "simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)), "simd_cast" => (2, vec![param(0)], param(1)), + "simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool), + "simd_reduce_add_ordered" | "simd_reduce_mul_ordered" + => (2, vec![param(0), param(1)], param(1)), + "simd_reduce_add_unordered" | "simd_reduce_mul_unordered" | + "simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" | + "simd_reduce_min" | "simd_reduce_max" | + "simd_reduce_min_nanless" | "simd_reduce_max_nanless" + => (2, vec![param(0)], param(1)), name if name.starts_with("simd_shuffle") => { match name["simd_shuffle".len()..].parse() { Ok(n) => { diff --git a/src/rustllvm/RustWrapper.cpp b/src/rustllvm/RustWrapper.cpp index 27d5496f576..a5644d6f9e2 100644 --- a/src/rustllvm/RustWrapper.cpp +++ b/src/rustllvm/RustWrapper.cpp @@ -21,6 +21,8 @@ #if LLVM_VERSION_GE(5, 0) #include "llvm/ADT/Optional.h" +#else +#include #endif //===----------------------------------------------------------------------=== @@ -1395,3 +1397,98 @@ LLVMRustModuleCost(LLVMModuleRef M) { auto f = unwrap(M)->functions(); return std::distance(std::begin(f), std::end(f)); } + +// Vector reductions: +#if LLVM_VERSION_GE(5, 0) +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc),unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMul(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateFMulReduce(unwrap(Acc),unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceAdd(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateAddReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMul(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateMulReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceAnd(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateAndReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceOr(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateOrReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceXor(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateXorReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMin(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) { + return wrap(unwrap(B)->CreateIntMinReduce(unwrap(Src), IsSigned)); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMax(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) { + return wrap(unwrap(B)->CreateIntMaxReduce(unwrap(Src), IsSigned)); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMin(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) { + return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Src), NoNaN)); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) { + return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN)); +} + +#else + +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef, LLVMValueRef, LLVMValueRef) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMul(LLVMBuilderRef, LLVMValueRef, LLVMValueRef) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceAdd(LLVMBuilderRef, LLVMValueRef) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMul(LLVMBuilderRef, LLVMValueRef) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceAnd(LLVMBuilderRef, LLVMValueRef) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceOr(LLVMBuilderRef, LLVMValueRef) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceXor(LLVMBuilderRef, LLVMValueRef) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMin(LLVMBuilderRef, LLVMValueRef, bool) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMax(LLVMBuilderRef, LLVMValueRef, bool) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMin(LLVMBuilderRef, LLVMValueRef, bool) { + return nullptr; +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMax(LLVMBuilderRef, LLVMValueRef, bool) { + return nullptr; +} +#endif diff --git a/src/test/compile-fail/simd-intrinsic-generic-reduction.rs b/src/test/compile-fail/simd-intrinsic-generic-reduction.rs new file mode 100644 index 00000000000..57e4bb76a6c --- /dev/null +++ b/src/test/compile-fail/simd-intrinsic-generic-reduction.rs @@ -0,0 +1,82 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// min-llvm-version 5.0 +// ignore-emscripten + +// Test that the simd_reduce_{op} intrinsics produce ok-ish error +// messages when misused. + +#![feature(repr_simd, platform_intrinsics)] +#![allow(non_camel_case_types)] + +#[repr(simd)] +#[derive(Copy, Clone)] +pub struct f32x4(pub f32, pub f32, pub f32, pub f32); + +#[repr(simd)] +#[derive(Copy, Clone)] +pub struct u32x4(pub u32, pub u32, pub u32, pub u32); + + +extern "platform-intrinsic" { + fn simd_reduce_add_ordered(x: T, y: U) -> U; + fn simd_reduce_mul_ordered(x: T, y: U) -> U; + fn simd_reduce_and(x: T) -> U; + fn simd_reduce_or(x: T) -> U; + fn simd_reduce_xor(x: T) -> U; + fn simd_reduce_all(x: T) -> bool; + fn simd_reduce_any(x: T) -> bool; +} + +fn main() { + let x = u32x4(0, 0, 0, 0); + let z = f32x4(0.0, 0.0, 0.0, 0.0); + + unsafe { + simd_reduce_add_ordered(z, 0_f32); + simd_reduce_mul_ordered(z, 1_f32); + + simd_reduce_add_ordered(z, 2_f32); + //~^ ERROR accumulator of simd_reduce_add_ordered is not 0.0 + simd_reduce_mul_ordered(z, 3_f32); + //~^ ERROR accumulator of simd_reduce_mul_ordered is not 1.0 + + let _: f32 = simd_reduce_and(x); + //~^ ERROR expected return type `u32` (element of input `u32x4`), found `f32` + let _: f32 = simd_reduce_or(x); + //~^ ERROR expected return type `u32` (element of input `u32x4`), found `f32` + let _: f32 = simd_reduce_xor(x); + //~^ ERROR expected return type `u32` (element of input `u32x4`), found `f32` + + let _: f32 = simd_reduce_and(z); + //~^ ERROR unsupported simd_reduce_and from `f32x4` with element `f32` to `f32` + let _: f32 = simd_reduce_or(z); + //~^ ERROR unsupported simd_reduce_or from `f32x4` with element `f32` to `f32` + let _: f32 = simd_reduce_xor(z); + //~^ ERROR unsupported simd_reduce_xor from `f32x4` with element `f32` to `f32` + + let _: bool = simd_reduce_all(z); + //~^ ERROR unsupported simd_reduce_all from `f32x4` with element `f32` to `bool` + let _: bool = simd_reduce_any(z); + //~^ ERROR unsupported simd_reduce_any from `f32x4` with element `f32` to `bool` + + foo(0_f32); + } +} + +#[inline(never)] +unsafe fn foo(x: f32) { + let z = f32x4(0.0, 0.0, 0.0, 0.0); + simd_reduce_add_ordered(z, x); + //~^ ERROR accumulator of simd_reduce_add_ordered is not a constant + simd_reduce_mul_ordered(z, x); + //~^ ERROR accumulator of simd_reduce_mul_ordered is not a constant +} diff --git a/src/test/run-pass/simd-intrinsic-generic-reduction.rs b/src/test/run-pass/simd-intrinsic-generic-reduction.rs new file mode 100644 index 00000000000..9a1214d3b35 --- /dev/null +++ b/src/test/run-pass/simd-intrinsic-generic-reduction.rs @@ -0,0 +1,172 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// min-llvm-version 5.0 +// ignore-emscripten + +// Test that the simd_reduce_{op} intrinsics produce the correct results. + +#![feature(repr_simd, platform_intrinsics)] +#[allow(non_camel_case_types)] + +#[repr(simd)] +#[derive(Copy, Clone)] +struct i32x4(pub i32, pub i32, pub i32, pub i32); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct u32x4(pub u32, pub u32, pub u32, pub u32); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct f32x4(pub f32, pub f32, pub f32, pub f32); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct b8x4(pub i8, pub i8, pub i8, pub i8); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct b8x16( + pub i8, pub i8, pub i8, pub i8, + pub i8, pub i8, pub i8, pub i8, + pub i8, pub i8, pub i8, pub i8, + pub i8, pub i8, pub i8, pub i8 +); + +extern "platform-intrinsic" { + fn simd_reduce_add_unordered(x: T) -> U; + fn simd_reduce_mul_unordered(x: T) -> U; + fn simd_reduce_add_ordered(x: T, acc: U) -> U; + fn simd_reduce_mul_ordered(x: T, acc: U) -> U; + fn simd_reduce_min(x: T) -> U; + fn simd_reduce_max(x: T) -> U; + fn simd_reduce_min_nanless(x: T) -> U; + fn simd_reduce_max_nanless(x: T) -> U; + fn simd_reduce_and(x: T) -> U; + fn simd_reduce_or(x: T) -> U; + fn simd_reduce_xor(x: T) -> U; + fn simd_reduce_all(x: T) -> bool; + fn simd_reduce_any(x: T) -> bool; +} + +fn main() { + unsafe { + let x = i32x4(1, -2, 3, 4); + let r: i32 = simd_reduce_add_unordered(x); + assert_eq!(r, 6_i32); + let r: i32 = simd_reduce_mul_unordered(x); + assert_eq!(r, -24_i32); + let r: i32 = simd_reduce_add_ordered(x, -1); + assert_eq!(r, 5_i32); + let r: i32 = simd_reduce_mul_ordered(x, -1); + assert_eq!(r, 24_i32); + + let r: i32 = simd_reduce_min(x); + assert_eq!(r, -2_i32); + let r: i32 = simd_reduce_max(x); + assert_eq!(r, 4_i32); + + let x = i32x4(-1, -1, -1, -1); + let r: i32 = simd_reduce_and(x); + assert_eq!(r, -1_i32); + let r: i32 = simd_reduce_or(x); + assert_eq!(r, -1_i32); + let r: i32 = simd_reduce_xor(x); + assert_eq!(r, 0_i32); + + let x = i32x4(-1, -1, 0, -1); + let r: i32 = simd_reduce_and(x); + assert_eq!(r, 0_i32); + let r: i32 = simd_reduce_or(x); + assert_eq!(r, -1_i32); + let r: i32 = simd_reduce_xor(x); + assert_eq!(r, -1_i32); + } + + unsafe { + let x = u32x4(1, 2, 3, 4); + let r: u32 = simd_reduce_add_unordered(x); + assert_eq!(r, 10_u32); + let r: u32 = simd_reduce_mul_unordered(x); + assert_eq!(r, 24_u32); + let r: u32 = simd_reduce_add_ordered(x, 1); + assert_eq!(r, 11_u32); + let r: u32 = simd_reduce_mul_ordered(x, 2); + assert_eq!(r, 48_u32); + + let r: u32 = simd_reduce_min(x); + assert_eq!(r, 1_u32); + let r: u32 = simd_reduce_max(x); + assert_eq!(r, 4_u32); + + let t = u32::max_value(); + let x = u32x4(t, t, t, t); + let r: u32 = simd_reduce_and(x); + assert_eq!(r, t); + let r: u32 = simd_reduce_or(x); + assert_eq!(r, t); + let r: u32 = simd_reduce_xor(x); + assert_eq!(r, 0_u32); + + let x = u32x4(t, t, 0, t); + let r: u32 = simd_reduce_and(x); + assert_eq!(r, 0_u32); + let r: u32 = simd_reduce_or(x); + assert_eq!(r, t); + let r: u32 = simd_reduce_xor(x); + assert_eq!(r, t); + } + + unsafe { + let x = f32x4(1., -2., 3., 4.); + let r: f32 = simd_reduce_add_unordered(x); + assert_eq!(r, 6_f32); + let r: f32 = simd_reduce_mul_unordered(x); + assert_eq!(r, -24_f32); + // FIXME: only works correctly for accumulator, 0: + // https://bugs.llvm.org/show_bug.cgi?id=36734 + let r: f32 = simd_reduce_add_ordered(x, 0.); + assert_eq!(r, 6_f32); + // FIXME: only works correctly for accumulator, 1: + // https://bugs.llvm.org/show_bug.cgi?id=36734 + let r: f32 = simd_reduce_mul_ordered(x, 1.); + assert_eq!(r, -24_f32); + + let r: f32 = simd_reduce_min(x); + assert_eq!(r, -2_f32); + let r: f32 = simd_reduce_max(x); + assert_eq!(r, 4_f32); + let r: f32 = simd_reduce_min_nanless(x); + assert_eq!(r, -2_f32); + let r: f32 = simd_reduce_max_nanless(x); + assert_eq!(r, 4_f32); + } + + unsafe { + let x = b8x4(!0, !0, !0, !0); + let r: bool = simd_reduce_all(x); + assert_eq!(r, true); + let r: bool = simd_reduce_any(x); + assert_eq!(r, true); + + let x = b8x4(!0, !0, 0, !0); + let r: bool = simd_reduce_all(x); + assert_eq!(r, false); + let r: bool = simd_reduce_any(x); + assert_eq!(r, true); + + let x = b8x4(0, 0, 0, 0); + let r: bool = simd_reduce_all(x); + assert_eq!(r, false); + let r: bool = simd_reduce_any(x); + assert_eq!(r, false); + } +}