diff --git a/src/asm.rs b/src/asm.rs index 1cae78114c9..738c990fa82 100644 --- a/src/asm.rs +++ b/src/asm.rs @@ -595,7 +595,7 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister { InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg) | InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg) => "x", InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => "v", - InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => unimplemented!(), + InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => "Yk", InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => unimplemented!(), InlineAsmRegClass::X86( X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg, diff --git a/src/builder.rs b/src/builder.rs index f0b93c3d517..160a7df0315 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -7,6 +7,7 @@ use gccjit::{ BinaryOp, Block, ComparisonOp, + Context, Function, LValue, RValue, @@ -1380,6 +1381,85 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { pub fn shuffle_vector(&mut self, _v1: RValue<'gcc>, _v2: RValue<'gcc>, _mask: RValue<'gcc>) -> RValue<'gcc> { unimplemented!(); } + + pub fn vector_reduce(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc> + where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc> + { + let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_count = vector_type.get_num_units(); + let mut vector_elements = vec![]; + for i in 0..element_count { + vector_elements.push(i); + } + let mask_type = self.context.new_vector_type(self.int_type, element_count as u64); + let mut shift = 1; + let mut res = src; + while shift < element_count { + let vector_elements: Vec<_> = + vector_elements.iter() + .map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32)) + .collect(); + let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements); + let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask); + shift *= 2; + res = op(res, shifted, &self.context); + } + self.context.new_vector_access(None, res, self.context.new_rvalue_zero(self.int_type)) + .to_rvalue() + } + + pub fn vector_reduce_op(&mut self, src: RValue<'gcc>, op: BinaryOp) -> RValue<'gcc> { + self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b)) + } + + pub fn vector_reduce_fadd_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!(); + } + + pub fn vector_reduce_fmul_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!(); + } + + // Inspired by Hacker's Delight min implementation. + pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { + self.vector_reduce(src, |a, b, context| { + let differences_or_zeros = difference_or_zero(a, b, context); + context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros) + }) + } + + // Inspired by Hacker's Delight max implementation. + pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { + self.vector_reduce(src, |a, b, context| { + let differences_or_zeros = difference_or_zero(a, b, context); + context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros) + }) + } + + pub fn vector_select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, else_val: RValue<'gcc>) -> RValue<'gcc> { + // cond is a vector of integers, not of bools. + let vector_type = cond.get_type().dyncast_vector().expect("vector type"); + let num_units = vector_type.get_num_units(); + let vector_type = self.context.new_vector_type(self.int_type, num_units as u64); + let zeros = vec![self.context.new_rvalue_zero(self.int_type); num_units]; + let zeros = self.context.new_rvalue_from_vector(None, vector_type, &zeros); + + let masks = self.context.new_comparison(None, ComparisonOp::NotEquals, cond, zeros); + let then_vals = masks & then_val; + + let ones = vec![self.context.new_rvalue_one(self.int_type); num_units]; + let ones = self.context.new_rvalue_from_vector(None, vector_type, &ones); + let inverted_masks = masks + ones; + let else_vals = inverted_masks & else_val; + + then_vals | else_vals + } +} + +fn difference_or_zero<'gcc>(a: RValue<'gcc>, b: RValue<'gcc>, context: &'gcc Context<'gcc>) -> RValue<'gcc> { + let difference = a - b; + let masks = context.new_comparison(None, ComparisonOp::GreaterThanEquals, b, a); + difference & masks } impl<'a, 'gcc, 'tcx> StaticBuilderMethods for Builder<'a, 'gcc, 'tcx> { diff --git a/src/common.rs b/src/common.rs index 703e20947fe..e4a08da446b 100644 --- a/src/common.rs +++ b/src/common.rs @@ -117,8 +117,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> { unimplemented!(); } - fn const_real(&self, _t: Type<'gcc>, _val: f64) -> RValue<'gcc> { - unimplemented!(); + fn const_real(&self, typ: Type<'gcc>, val: f64) -> RValue<'gcc> { + self.context.new_rvalue_from_double(typ, val) } fn const_str(&self, s: Symbol) -> (RValue<'gcc>, RValue<'gcc>) { diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index 9204fbdfaba..dccfb89409d 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; -use gccjit::{RValue, Type, ToRValue}; +use gccjit::{BinaryOp, RValue, Type, ToRValue}; use rustc_codegen_ssa::base::compare_simd_types; use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error}; use rustc_codegen_ssa::mir::operand::OperandRef; @@ -222,6 +222,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue()); } + if name == sym::simd_select { + let m_elem_ty = in_elem; + let m_len = in_len; + require_simd!(arg_tys[1], "argument"); + let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx()); + require!( + m_len == v_len, + "mismatched lengths: mask length `{}` != other vector length `{}`", + m_len, + v_len + ); + match m_elem_ty.kind() { + ty::Int(_) => {} + _ => return_error!("mask element type is `{}`, expected `i_`", m_elem_ty), + } + return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate())); + } + if name == sym::simd_cast { require_simd!(ret_ty, "return"); let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx()); @@ -543,7 +561,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, } macro_rules! arith_red { - ($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident, + ($name:ident : $vec_op:expr, $float_reduce:ident, $ordered:expr, $op:ident, $identity:expr) => { if name == sym::$name { require!( @@ -555,36 +573,25 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, ); return match in_elem.kind() { ty::Int(_) | ty::Uint(_) => { - let r = bx.$integer_reduce(args[0].immediate()); + let r = bx.vector_reduce_op(args[0].immediate(), $vec_op); if $ordered { // if overflow occurs, the result is the // mathematical result modulo 2^n: Ok(bx.$op(args[1].immediate(), r)) - } else { - Ok(bx.$integer_reduce(args[0].immediate())) + } + else { + Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op)) } } - ty::Float(f) => { - let acc = if $ordered { + ty::Float(_) => { + if $ordered { // ordered arithmetic reductions take an accumulator - args[1].immediate() - } else { - // unordered arithmetic reductions use the identity accumulator - match f.bit_width() { - 32 => bx.const_real(bx.type_f32(), $identity), - 64 => bx.const_real(bx.type_f64(), $identity), - v => return_error!( - r#" -unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#, - sym::$name, - in_ty, - in_elem, - v, - ret_ty - ), - } - }; - Ok(bx.$float_reduce(acc, args[0].immediate())) + let acc = args[1].immediate(); + Ok(bx.$float_reduce(acc, args[0].immediate())) + } + else { + Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op)) + } } _ => return_error!( "unsupported {} from `{}` with element `{}` to `{}`", @@ -598,14 +605,96 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#, }; } - // TODO: use a recursive algorithm a-la Hacker's Delight. arith_red!( - simd_reduce_add_unordered: vector_reduce_add, + simd_reduce_add_unordered: BinaryOp::Plus, vector_reduce_fadd_fast, false, add, - 0.0 + 0.0 // TODO: Use this argument. ); + arith_red!( + simd_reduce_mul_unordered: BinaryOp::Mult, + vector_reduce_fmul_fast, + false, + mul, + 1.0 + ); + + macro_rules! minmax_red { + ($name:ident: $reduction:ident) => { + if name == sym::$name { + require!( + ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, + in_ty, + ret_ty + ); + return match in_elem.kind() { + ty::Int(_) | ty::Uint(_) | ty::Float(_) => Ok(bx.$reduction(args[0].immediate())), + _ => return_error!( + "unsupported {} from `{}` with element `{}` to `{}`", + sym::$name, + in_ty, + in_elem, + ret_ty + ), + }; + } + }; + } + + minmax_red!(simd_reduce_min: vector_reduce_min); + minmax_red!(simd_reduce_max: vector_reduce_max); + + macro_rules! bitwise_red { + ($name:ident : $op:expr, $boolean:expr) => { + if name == sym::$name { + let input = if !$boolean { + require!( + ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, + in_ty, + ret_ty + ); + args[0].immediate() + } else { + match in_elem.kind() { + ty::Int(_) | ty::Uint(_) => {} + _ => return_error!( + "unsupported {} from `{}` with element `{}` to `{}`", + sym::$name, + in_ty, + in_elem, + ret_ty + ), + } + + // boolean reductions operate on vectors of i1s: + let i1 = bx.type_i1(); + let i1xn = bx.type_vector(i1, in_len as u64); + bx.trunc(args[0].immediate(), i1xn) + }; + return match in_elem.kind() { + ty::Int(_) | ty::Uint(_) => { + let r = bx.vector_reduce_op(input, $op); + Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) }) + } + _ => return_error!( + "unsupported {} from `{}` with element `{}` to `{}`", + sym::$name, + in_ty, + in_elem, + ret_ty + ), + }; + } + }; + } + + bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false); + bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false); unimplemented!("simd {}", name); } diff --git a/src/type_.rs b/src/type_.rs index db2b5ea8ab2..002b95db36d 100644 --- a/src/type_.rs +++ b/src/type_.rs @@ -247,6 +247,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { self.context.new_array_type(None, ty, len) } + + pub fn type_bool(&self) -> Type<'gcc> { + self.context.new_type::() + } } pub fn struct_fields<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>) -> (Vec>, bool) {