Add more SIMD
This commit is contained in:
parent
4636c59df5
commit
ddc152b04d
@ -595,7 +595,7 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
|
||||
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg)
|
||||
| InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg) => "x",
|
||||
InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => "v",
|
||||
InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => unimplemented!(),
|
||||
InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => "Yk",
|
||||
InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => unimplemented!(),
|
||||
InlineAsmRegClass::X86(
|
||||
X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg,
|
||||
|
@ -7,6 +7,7 @@ use gccjit::{
|
||||
BinaryOp,
|
||||
Block,
|
||||
ComparisonOp,
|
||||
Context,
|
||||
Function,
|
||||
LValue,
|
||||
RValue,
|
||||
@ -1380,6 +1381,85 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
||||
pub fn shuffle_vector(&mut self, _v1: RValue<'gcc>, _v2: RValue<'gcc>, _mask: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
|
||||
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
|
||||
{
|
||||
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
|
||||
let element_count = vector_type.get_num_units();
|
||||
let mut vector_elements = vec![];
|
||||
for i in 0..element_count {
|
||||
vector_elements.push(i);
|
||||
}
|
||||
let mask_type = self.context.new_vector_type(self.int_type, element_count as u64);
|
||||
let mut shift = 1;
|
||||
let mut res = src;
|
||||
while shift < element_count {
|
||||
let vector_elements: Vec<_> =
|
||||
vector_elements.iter()
|
||||
.map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32))
|
||||
.collect();
|
||||
let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements);
|
||||
let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask);
|
||||
shift *= 2;
|
||||
res = op(res, shifted, &self.context);
|
||||
}
|
||||
self.context.new_vector_access(None, res, self.context.new_rvalue_zero(self.int_type))
|
||||
.to_rvalue()
|
||||
}
|
||||
|
||||
pub fn vector_reduce_op(&mut self, src: RValue<'gcc>, op: BinaryOp) -> RValue<'gcc> {
|
||||
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
|
||||
}
|
||||
|
||||
pub fn vector_reduce_fadd_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
pub fn vector_reduce_fmul_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
// Inspired by Hacker's Delight min implementation.
|
||||
pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
self.vector_reduce(src, |a, b, context| {
|
||||
let differences_or_zeros = difference_or_zero(a, b, context);
|
||||
context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
|
||||
})
|
||||
}
|
||||
|
||||
// Inspired by Hacker's Delight max implementation.
|
||||
pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
self.vector_reduce(src, |a, b, context| {
|
||||
let differences_or_zeros = difference_or_zero(a, b, context);
|
||||
context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn vector_select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, else_val: RValue<'gcc>) -> RValue<'gcc> {
|
||||
// cond is a vector of integers, not of bools.
|
||||
let vector_type = cond.get_type().dyncast_vector().expect("vector type");
|
||||
let num_units = vector_type.get_num_units();
|
||||
let vector_type = self.context.new_vector_type(self.int_type, num_units as u64);
|
||||
let zeros = vec![self.context.new_rvalue_zero(self.int_type); num_units];
|
||||
let zeros = self.context.new_rvalue_from_vector(None, vector_type, &zeros);
|
||||
|
||||
let masks = self.context.new_comparison(None, ComparisonOp::NotEquals, cond, zeros);
|
||||
let then_vals = masks & then_val;
|
||||
|
||||
let ones = vec![self.context.new_rvalue_one(self.int_type); num_units];
|
||||
let ones = self.context.new_rvalue_from_vector(None, vector_type, &ones);
|
||||
let inverted_masks = masks + ones;
|
||||
let else_vals = inverted_masks & else_val;
|
||||
|
||||
then_vals | else_vals
|
||||
}
|
||||
}
|
||||
|
||||
fn difference_or_zero<'gcc>(a: RValue<'gcc>, b: RValue<'gcc>, context: &'gcc Context<'gcc>) -> RValue<'gcc> {
|
||||
let difference = a - b;
|
||||
let masks = context.new_comparison(None, ComparisonOp::GreaterThanEquals, b, a);
|
||||
difference & masks
|
||||
}
|
||||
|
||||
impl<'a, 'gcc, 'tcx> StaticBuilderMethods for Builder<'a, 'gcc, 'tcx> {
|
||||
|
@ -117,8 +117,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
fn const_real(&self, _t: Type<'gcc>, _val: f64) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
fn const_real(&self, typ: Type<'gcc>, val: f64) -> RValue<'gcc> {
|
||||
self.context.new_rvalue_from_double(typ, val)
|
||||
}
|
||||
|
||||
fn const_str(&self, s: Symbol) -> (RValue<'gcc>, RValue<'gcc>) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use gccjit::{RValue, Type, ToRValue};
|
||||
use gccjit::{BinaryOp, RValue, Type, ToRValue};
|
||||
use rustc_codegen_ssa::base::compare_simd_types;
|
||||
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
|
||||
use rustc_codegen_ssa::mir::operand::OperandRef;
|
||||
@ -222,6 +222,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue());
|
||||
}
|
||||
|
||||
if name == sym::simd_select {
|
||||
let m_elem_ty = in_elem;
|
||||
let m_len = in_len;
|
||||
require_simd!(arg_tys[1], "argument");
|
||||
let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
|
||||
require!(
|
||||
m_len == v_len,
|
||||
"mismatched lengths: mask length `{}` != other vector length `{}`",
|
||||
m_len,
|
||||
v_len
|
||||
);
|
||||
match m_elem_ty.kind() {
|
||||
ty::Int(_) => {}
|
||||
_ => return_error!("mask element type is `{}`, expected `i_`", m_elem_ty),
|
||||
}
|
||||
return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
|
||||
}
|
||||
|
||||
if name == sym::simd_cast {
|
||||
require_simd!(ret_ty, "return");
|
||||
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
|
||||
@ -543,7 +561,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
}
|
||||
|
||||
macro_rules! arith_red {
|
||||
($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
|
||||
($name:ident : $vec_op:expr, $float_reduce:ident, $ordered:expr, $op:ident,
|
||||
$identity:expr) => {
|
||||
if name == sym::$name {
|
||||
require!(
|
||||
@ -555,36 +573,25 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
);
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {
|
||||
let r = bx.$integer_reduce(args[0].immediate());
|
||||
let r = bx.vector_reduce_op(args[0].immediate(), $vec_op);
|
||||
if $ordered {
|
||||
// if overflow occurs, the result is the
|
||||
// mathematical result modulo 2^n:
|
||||
Ok(bx.$op(args[1].immediate(), r))
|
||||
} else {
|
||||
Ok(bx.$integer_reduce(args[0].immediate()))
|
||||
}
|
||||
else {
|
||||
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
|
||||
}
|
||||
}
|
||||
ty::Float(f) => {
|
||||
let acc = if $ordered {
|
||||
ty::Float(_) => {
|
||||
if $ordered {
|
||||
// ordered arithmetic reductions take an accumulator
|
||||
args[1].immediate()
|
||||
} else {
|
||||
// unordered arithmetic reductions use the identity accumulator
|
||||
match f.bit_width() {
|
||||
32 => bx.const_real(bx.type_f32(), $identity),
|
||||
64 => bx.const_real(bx.type_f64(), $identity),
|
||||
v => return_error!(
|
||||
r#"
|
||||
unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
v,
|
||||
ret_ty
|
||||
),
|
||||
}
|
||||
};
|
||||
Ok(bx.$float_reduce(acc, args[0].immediate()))
|
||||
let acc = args[1].immediate();
|
||||
Ok(bx.$float_reduce(acc, args[0].immediate()))
|
||||
}
|
||||
else {
|
||||
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
|
||||
}
|
||||
}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
@ -598,14 +605,96 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
|
||||
};
|
||||
}
|
||||
|
||||
// TODO: use a recursive algorithm a-la Hacker's Delight.
|
||||
arith_red!(
|
||||
simd_reduce_add_unordered: vector_reduce_add,
|
||||
simd_reduce_add_unordered: BinaryOp::Plus,
|
||||
vector_reduce_fadd_fast,
|
||||
false,
|
||||
add,
|
||||
0.0
|
||||
0.0 // TODO: Use this argument.
|
||||
);
|
||||
arith_red!(
|
||||
simd_reduce_mul_unordered: BinaryOp::Mult,
|
||||
vector_reduce_fmul_fast,
|
||||
false,
|
||||
mul,
|
||||
1.0
|
||||
);
|
||||
|
||||
macro_rules! minmax_red {
|
||||
($name:ident: $reduction:ident) => {
|
||||
if name == sym::$name {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
"expected return type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
ret_ty
|
||||
);
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) | ty::Float(_) => Ok(bx.$reduction(args[0].immediate())),
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
minmax_red!(simd_reduce_min: vector_reduce_min);
|
||||
minmax_red!(simd_reduce_max: vector_reduce_max);
|
||||
|
||||
macro_rules! bitwise_red {
|
||||
($name:ident : $op:expr, $boolean:expr) => {
|
||||
if name == sym::$name {
|
||||
let input = if !$boolean {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
"expected return type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
ret_ty
|
||||
);
|
||||
args[0].immediate()
|
||||
} else {
|
||||
match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
}
|
||||
|
||||
// boolean reductions operate on vectors of i1s:
|
||||
let i1 = bx.type_i1();
|
||||
let i1xn = bx.type_vector(i1, in_len as u64);
|
||||
bx.trunc(args[0].immediate(), i1xn)
|
||||
};
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {
|
||||
let r = bx.vector_reduce_op(input, $op);
|
||||
Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
|
||||
}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false);
|
||||
bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false);
|
||||
|
||||
unimplemented!("simd {}", name);
|
||||
}
|
||||
|
@ -247,6 +247,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
||||
|
||||
self.context.new_array_type(None, ty, len)
|
||||
}
|
||||
|
||||
pub fn type_bool(&self) -> Type<'gcc> {
|
||||
self.context.new_type::<bool>()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn struct_fields<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>) -> (Vec<Type<'gcc>>, bool) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user