simd: Implement missing reduction intrinsics
Implements the following simd reduction intrinsics: - simd_reduce_add_ordered - simd_reduce_mul_ordered - simd_reduce_min_nanless - simd_reduce_max_nanless - simd_reduce_xor - simd_reduce_any - simd_reduce_all Also fixes the ordering of simd_reduce_min and simd_reduce_max, which were tested to be flipped. Both simd_reduce_min_nanless and simd_reduce_max_nanless are identical to their non-nanless variants for the time being. An attempt was made at a more optimal codegen solution based on vector_reduce_op. However, this approach ran into masking issues for floating-point vector types, which appears to be broken for the same reason that comparison operations such as simd_lt are broken for floating-point vector types. More investigation is required, however, to determine a root cause and appropriate fix. This should be enough to pass the generic-reduction-pass.rs ui tests with the 'master' feature enabled. Signed-off-by: Andy Sadler <andrewsadler122@gmail.com>
This commit is contained in:
parent
b4626b3ca0
commit
4df874f73d
@ -40,7 +40,6 @@ src/test/ui/simd/intrinsic/generic-as.rs
|
||||
src/test/ui/simd/intrinsic/generic-bitmask-pass.rs
|
||||
src/test/ui/simd/intrinsic/generic-comparison-pass.rs
|
||||
src/test/ui/simd/intrinsic/generic-gather-pass.rs
|
||||
src/test/ui/simd/intrinsic/generic-reduction-pass.rs
|
||||
src/test/ui/simd/intrinsic/generic-select-pass.rs
|
||||
src/test/ui/simd/issue-17170.rs
|
||||
src/test/ui/simd/issue-39720.rs
|
||||
|
@ -11,6 +11,7 @@ src/test/ui/simd/intrinsic/generic-arithmetic-saturating-pass.rs
|
||||
src/test/ui/simd/intrinsic/generic-cast-pass.rs
|
||||
src/test/ui/simd/intrinsic/generic-cast-pointer-width.rs
|
||||
src/test/ui/simd/intrinsic/generic-elements-pass.rs
|
||||
src/test/ui/simd/intrinsic/generic-reduction-pass.rs
|
||||
src/test/ui/simd/intrinsic/inlining-issue67557-ice.rs
|
||||
src/test/ui/simd/intrinsic/inlining-issue67557.rs
|
||||
src/test/ui/simd/monomorphize-shuffle-index.rs
|
||||
|
@ -1460,15 +1460,47 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
pub fn vector_reduce_fadd(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
|
||||
let element_count = vector_type.get_num_units();
|
||||
(0..element_count).into_iter()
|
||||
.map(|i| self.context
|
||||
.new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
|
||||
.to_rvalue())
|
||||
.fold(acc, |x, i| x + i)
|
||||
}
|
||||
|
||||
#[cfg(not(feature="master"))]
|
||||
pub fn vector_reduce_fadd(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
pub fn vector_reduce_fmul(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
|
||||
let element_count = vector_type.get_num_units();
|
||||
(0..element_count).into_iter()
|
||||
.map(|i| self.context
|
||||
.new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
|
||||
.to_rvalue())
|
||||
.fold(acc, |x, i| x * i)
|
||||
}
|
||||
|
||||
#[cfg(not(feature="master"))]
|
||||
pub fn vector_reduce_fmul(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
// Inspired by Hacker's Delight min implementation.
|
||||
pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
self.vector_reduce(src, |a, b, context| {
|
||||
let differences_or_zeros = difference_or_zero(a, b, context);
|
||||
context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
|
||||
context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
|
||||
})
|
||||
}
|
||||
|
||||
@ -1476,10 +1508,51 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
||||
pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
self.vector_reduce(src, |a, b, context| {
|
||||
let differences_or_zeros = difference_or_zero(a, b, context);
|
||||
context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
|
||||
context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
pub fn vector_reduce_fmin(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
|
||||
let element_count = vector_type.get_num_units();
|
||||
let mut acc = self.context.new_vector_access(None, src, self.context.new_rvalue_zero(self.int_type)).to_rvalue();
|
||||
for i in 1..element_count {
|
||||
let elem = self.context
|
||||
.new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
|
||||
.to_rvalue();
|
||||
let cmp = self.context.new_comparison(None, ComparisonOp::LessThan, acc, elem);
|
||||
acc = self.select(cmp, acc, elem);
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
#[cfg(not(feature="master"))]
|
||||
pub fn vector_reduce_fmin(&mut self, _src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
pub fn vector_reduce_fmax(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
|
||||
let element_count = vector_type.get_num_units();
|
||||
let mut acc = self.context.new_vector_access(None, src, self.context.new_rvalue_zero(self.int_type)).to_rvalue();
|
||||
for i in 1..element_count {
|
||||
let elem = self.context
|
||||
.new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
|
||||
.to_rvalue();
|
||||
let cmp = self.context.new_comparison(None, ComparisonOp::GreaterThan, acc, elem);
|
||||
acc = self.select(cmp, acc, elem);
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
#[cfg(not(feature="master"))]
|
||||
pub fn vector_reduce_fmax(&mut self, _src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
|
||||
pub fn vector_select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, else_val: RValue<'gcc>) -> RValue<'gcc> {
|
||||
// cond is a vector of integers, not of bools.
|
||||
let cond_type = cond.get_type();
|
||||
|
@ -2,7 +2,7 @@
|
||||
use gccjit::{ToRValue, ComparisonOp, UnaryOp};
|
||||
use gccjit::{BinaryOp, RValue, Type};
|
||||
use rustc_codegen_ssa::base::compare_simd_types;
|
||||
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
|
||||
use rustc_codegen_ssa::common::{IntPredicate, TypeKind, span_invalid_monomorphization_error};
|
||||
use rustc_codegen_ssa::mir::operand::OperandRef;
|
||||
use rustc_codegen_ssa::mir::place::PlaceRef;
|
||||
use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
|
||||
@ -667,9 +667,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
mul,
|
||||
1.0
|
||||
);
|
||||
arith_red!(
|
||||
simd_reduce_add_ordered: BinaryOp::Plus,
|
||||
vector_reduce_fadd,
|
||||
true,
|
||||
add,
|
||||
0.0
|
||||
);
|
||||
arith_red!(
|
||||
simd_reduce_mul_ordered: BinaryOp::Mult,
|
||||
vector_reduce_fmul,
|
||||
true,
|
||||
mul,
|
||||
1.0
|
||||
);
|
||||
|
||||
|
||||
macro_rules! minmax_red {
|
||||
($name:ident: $reduction:ident) => {
|
||||
($name:ident: $int_red:ident, $float_red:ident) => {
|
||||
if name == sym::$name {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
@ -679,7 +694,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
ret_ty
|
||||
);
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) | ty::Float(_) => Ok(bx.$reduction(args[0].immediate())),
|
||||
ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())),
|
||||
ty::Float(_) => Ok(bx.$float_red(args[0].immediate())),
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
@ -692,8 +708,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
};
|
||||
}
|
||||
|
||||
minmax_red!(simd_reduce_min: vector_reduce_min);
|
||||
minmax_red!(simd_reduce_max: vector_reduce_max);
|
||||
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
|
||||
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
|
||||
// TODO(sadlerap): revisit these intrinsics to generate more optimal reductions
|
||||
minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin);
|
||||
minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax);
|
||||
|
||||
macro_rules! bitwise_red {
|
||||
($name:ident : $op:expr, $boolean:expr) => {
|
||||
@ -719,15 +738,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
),
|
||||
}
|
||||
|
||||
// boolean reductions operate on vectors of i1s:
|
||||
let i1 = bx.type_i1();
|
||||
let i1xn = bx.type_vector(i1, in_len as u64);
|
||||
bx.trunc(args[0].immediate(), i1xn)
|
||||
args[0].immediate()
|
||||
};
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {
|
||||
let r = bx.vector_reduce_op(input, $op);
|
||||
Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
|
||||
Ok(if !$boolean { r } else { bx.icmp(IntPredicate::IntNE, r, bx.context.new_rvalue_zero(r.get_type())) })
|
||||
}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
@ -743,6 +759,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
|
||||
bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false);
|
||||
bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false);
|
||||
bitwise_red!(simd_reduce_xor: BinaryOp::BitwiseXor, false);
|
||||
bitwise_red!(simd_reduce_all: BinaryOp::BitwiseAnd, true);
|
||||
bitwise_red!(simd_reduce_any: BinaryOp::BitwiseOr, true);
|
||||
|
||||
unimplemented!("simd {}", name);
|
||||
}
|
||||
|
@ -247,10 +247,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
||||
|
||||
self.context.new_array_type(None, ty, len)
|
||||
}
|
||||
|
||||
pub fn type_bool(&self) -> Type<'gcc> {
|
||||
self.context.new_type::<bool>()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn struct_fields<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>) -> (Vec<Type<'gcc>>, bool) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user