From 4df874f73d329e2aa3ffa7800f26476499302ec6 Mon Sep 17 00:00:00 2001 From: Andy Sadler Date: Sun, 12 Jun 2022 01:06:58 -0500 Subject: [PATCH] simd: Implement missing reduction intrinsics Implements the following simd reduction intrinsics: - simd_reduce_add_ordered - simd_reduce_mul_ordered - simd_reduce_min_nanless - simd_reduce_max_nanless - simd_reduce_xor - simd_reduce_any - simd_reduce_all Also fixes the ordering of simd_reduce_min and simd_reduce_max, which were tested to be flipped. Both simd_reduce_min_nanless and simd_reduce_max_nanless are identical to their non-nanless variants for the time being. An attempt was made at a more optimal codegen solution based on vector_reduce_op. However, this approach ran into masking issues for floating-point vector types, which appears to be broken for the same reason that comparison operations such as simd_lt are broken for floating-point vector types. More investigation is required, however, to determine a root cause and appropriate fix. This should be enough to pass the generic-reduction-pass.rs ui tests with the 'master' feature enabled. Signed-off-by: Andy Sadler --- failing-ui-tests.txt | 1 - failing-ui-tests12.txt | 1 + src/builder.rs | 77 ++++++++++++++++++++++++++++++++++++++++-- src/intrinsic/simd.rs | 39 +++++++++++++++------ src/type_.rs | 4 --- 5 files changed, 105 insertions(+), 17 deletions(-) diff --git a/failing-ui-tests.txt b/failing-ui-tests.txt index 0e34110a060..6d36c963013 100644 --- a/failing-ui-tests.txt +++ b/failing-ui-tests.txt @@ -40,7 +40,6 @@ src/test/ui/simd/intrinsic/generic-as.rs src/test/ui/simd/intrinsic/generic-bitmask-pass.rs src/test/ui/simd/intrinsic/generic-comparison-pass.rs src/test/ui/simd/intrinsic/generic-gather-pass.rs -src/test/ui/simd/intrinsic/generic-reduction-pass.rs src/test/ui/simd/intrinsic/generic-select-pass.rs src/test/ui/simd/issue-17170.rs src/test/ui/simd/issue-39720.rs diff --git a/failing-ui-tests12.txt b/failing-ui-tests12.txt index 0a50c0a2ce1..d0d8a08421a 100644 --- a/failing-ui-tests12.txt +++ b/failing-ui-tests12.txt @@ -11,6 +11,7 @@ src/test/ui/simd/intrinsic/generic-arithmetic-saturating-pass.rs src/test/ui/simd/intrinsic/generic-cast-pass.rs src/test/ui/simd/intrinsic/generic-cast-pointer-width.rs src/test/ui/simd/intrinsic/generic-elements-pass.rs +src/test/ui/simd/intrinsic/generic-reduction-pass.rs src/test/ui/simd/intrinsic/inlining-issue67557-ice.rs src/test/ui/simd/intrinsic/inlining-issue67557.rs src/test/ui/simd/monomorphize-shuffle-index.rs diff --git a/src/builder.rs b/src/builder.rs index 41df7e647b5..899eff06412 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -1460,15 +1460,47 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { unimplemented!(); } + #[cfg(feature="master")] + pub fn vector_reduce_fadd(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> { + let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_count = vector_type.get_num_units(); + (0..element_count).into_iter() + .map(|i| self.context + .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _)) + .to_rvalue()) + .fold(acc, |x, i| x + i) + } + + #[cfg(not(feature="master"))] + pub fn vector_reduce_fadd(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!(); + } + pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { unimplemented!(); } + #[cfg(feature="master")] + pub fn vector_reduce_fmul(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> { + let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_count = vector_type.get_num_units(); + (0..element_count).into_iter() + .map(|i| self.context + .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _)) + .to_rvalue()) + .fold(acc, |x, i| x * i) + } + + #[cfg(not(feature="master"))] + pub fn vector_reduce_fmul(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!() + } + // Inspired by Hacker's Delight min implementation. pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { self.vector_reduce(src, |a, b, context| { let differences_or_zeros = difference_or_zero(a, b, context); - context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros) + context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros) }) } @@ -1476,10 +1508,51 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { self.vector_reduce(src, |a, b, context| { let differences_or_zeros = difference_or_zero(a, b, context); - context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros) + context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros) }) } + #[cfg(feature="master")] + pub fn vector_reduce_fmin(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { + let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_count = vector_type.get_num_units(); + let mut acc = self.context.new_vector_access(None, src, self.context.new_rvalue_zero(self.int_type)).to_rvalue(); + for i in 1..element_count { + let elem = self.context + .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _)) + .to_rvalue(); + let cmp = self.context.new_comparison(None, ComparisonOp::LessThan, acc, elem); + acc = self.select(cmp, acc, elem); + } + acc + } + + #[cfg(not(feature="master"))] + pub fn vector_reduce_fmin(&mut self, _src: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!(); + } + + #[cfg(feature="master")] + pub fn vector_reduce_fmax(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { + let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_count = vector_type.get_num_units(); + let mut acc = self.context.new_vector_access(None, src, self.context.new_rvalue_zero(self.int_type)).to_rvalue(); + for i in 1..element_count { + let elem = self.context + .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _)) + .to_rvalue(); + let cmp = self.context.new_comparison(None, ComparisonOp::GreaterThan, acc, elem); + acc = self.select(cmp, acc, elem); + } + acc + } + + #[cfg(not(feature="master"))] + pub fn vector_reduce_fmax(&mut self, _src: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!(); + } + + pub fn vector_select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, else_val: RValue<'gcc>) -> RValue<'gcc> { // cond is a vector of integers, not of bools. let cond_type = cond.get_type(); diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index dbf6ee6d285..8aed06869a9 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -2,7 +2,7 @@ use gccjit::{ToRValue, ComparisonOp, UnaryOp}; use gccjit::{BinaryOp, RValue, Type}; use rustc_codegen_ssa::base::compare_simd_types; -use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error}; +use rustc_codegen_ssa::common::{IntPredicate, TypeKind, span_invalid_monomorphization_error}; use rustc_codegen_ssa::mir::operand::OperandRef; use rustc_codegen_ssa::mir::place::PlaceRef; use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods}; @@ -667,9 +667,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, mul, 1.0 ); + arith_red!( + simd_reduce_add_ordered: BinaryOp::Plus, + vector_reduce_fadd, + true, + add, + 0.0 + ); + arith_red!( + simd_reduce_mul_ordered: BinaryOp::Mult, + vector_reduce_fmul, + true, + mul, + 1.0 + ); + macro_rules! minmax_red { - ($name:ident: $reduction:ident) => { + ($name:ident: $int_red:ident, $float_red:ident) => { if name == sym::$name { require!( ret_ty == in_elem, @@ -679,7 +694,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, ret_ty ); return match in_elem.kind() { - ty::Int(_) | ty::Uint(_) | ty::Float(_) => Ok(bx.$reduction(args[0].immediate())), + ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())), + ty::Float(_) => Ok(bx.$float_red(args[0].immediate())), _ => return_error!( "unsupported {} from `{}` with element `{}` to `{}`", sym::$name, @@ -692,8 +708,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, }; } - minmax_red!(simd_reduce_min: vector_reduce_min); - minmax_red!(simd_reduce_max: vector_reduce_max); + minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin); + minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax); + // TODO(sadlerap): revisit these intrinsics to generate more optimal reductions + minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin); + minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax); macro_rules! bitwise_red { ($name:ident : $op:expr, $boolean:expr) => { @@ -719,15 +738,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, ), } - // boolean reductions operate on vectors of i1s: - let i1 = bx.type_i1(); - let i1xn = bx.type_vector(i1, in_len as u64); - bx.trunc(args[0].immediate(), i1xn) + args[0].immediate() }; return match in_elem.kind() { ty::Int(_) | ty::Uint(_) => { let r = bx.vector_reduce_op(input, $op); - Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) }) + Ok(if !$boolean { r } else { bx.icmp(IntPredicate::IntNE, r, bx.context.new_rvalue_zero(r.get_type())) }) } _ => return_error!( "unsupported {} from `{}` with element `{}` to `{}`", @@ -743,6 +759,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false); bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false); + bitwise_red!(simd_reduce_xor: BinaryOp::BitwiseXor, false); + bitwise_red!(simd_reduce_all: BinaryOp::BitwiseAnd, true); + bitwise_red!(simd_reduce_any: BinaryOp::BitwiseOr, true); unimplemented!("simd {}", name); } diff --git a/src/type_.rs b/src/type_.rs index c97e9586005..d7eca2a33df 100644 --- a/src/type_.rs +++ b/src/type_.rs @@ -247,10 +247,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { self.context.new_array_type(None, ty, len) } - - pub fn type_bool(&self) -> Type<'gcc> { - self.context.new_type::() - } } pub fn struct_fields<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>) -> (Vec>, bool) {