Add more SIMD intrinsics
This commit is contained in:
parent
17f3dbf656
commit
cb36d78d7b
@ -116,6 +116,10 @@ fn module_codegen(
|
|||||||
context.add_command_line_option("-mavx");
|
context.add_command_line_option("-mavx");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*for feature in tcx.sess.opts.cg.target_feature.split(',') {
|
||||||
|
println!("Feature: {}", feature);
|
||||||
|
}*/
|
||||||
|
|
||||||
for arg in &tcx.sess.opts.cg.llvm_args {
|
for arg in &tcx.sess.opts.cg.llvm_args {
|
||||||
context.add_command_line_option(arg);
|
context.add_command_line_option(arg);
|
||||||
}
|
}
|
||||||
@ -218,6 +222,7 @@ fn module_codegen(
|
|||||||
|
|
||||||
// ... and now that we have everything pre-defined, fill out those definitions.
|
// ... and now that we have everything pre-defined, fill out those definitions.
|
||||||
for &(mono_item, _) in &mono_items {
|
for &(mono_item, _) in &mono_items {
|
||||||
|
//println!("{:?}", mono_item);
|
||||||
mono_item.define::<Builder<'_, '_, '_>>(&cx);
|
mono_item.define::<Builder<'_, '_, '_>>(&cx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,6 +270,8 @@ fn check_ptr_call<'b>(
|
|||||||
actual_val.dereference(self.location).to_rvalue()
|
actual_val.dereference(self.location).to_rvalue()
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// FIXME: this condition seems wrong: it will pass when both types are not
|
||||||
|
// a vector.
|
||||||
assert!(
|
assert!(
|
||||||
(!expected_ty.is_vector() || actual_ty.is_vector())
|
(!expected_ty.is_vector() || actual_ty.is_vector())
|
||||||
&& (expected_ty.is_vector() || !actual_ty.is_vector()),
|
&& (expected_ty.is_vector() || !actual_ty.is_vector()),
|
||||||
@ -283,6 +285,7 @@ fn check_ptr_call<'b>(
|
|||||||
);
|
);
|
||||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||||
// TODO: remove bitcast now that vector types can be compared?
|
// TODO: remove bitcast now that vector types can be compared?
|
||||||
|
println!("Name: {}", func_name);
|
||||||
self.bitcast(actual_val, expected_ty)
|
self.bitcast(actual_val, expected_ty)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -168,7 +168,23 @@ fn declare_raw_fn<'gcc>(
|
|||||||
variadic: bool,
|
variadic: bool,
|
||||||
) -> Function<'gcc> {
|
) -> Function<'gcc> {
|
||||||
if name.starts_with("llvm.") {
|
if name.starts_with("llvm.") {
|
||||||
let intrinsic = llvm::intrinsic(name, cx);
|
let intrinsic = match name {
|
||||||
|
"llvm.fma.f16" => {
|
||||||
|
let param1 = cx.context.new_parameter(None, cx.double_type, "x");
|
||||||
|
let param2 = cx.context.new_parameter(None, cx.double_type, "y");
|
||||||
|
let param3 = cx.context.new_parameter(None, cx.double_type, "z");
|
||||||
|
cx.context.new_function(
|
||||||
|
None,
|
||||||
|
FunctionType::Extern,
|
||||||
|
cx.double_type,
|
||||||
|
&[param1, param2, param3],
|
||||||
|
"fma",
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
_ => llvm::intrinsic(name, cx),
|
||||||
|
};
|
||||||
|
|
||||||
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
|
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
|
||||||
return intrinsic;
|
return intrinsic;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use gccjit::CType;
|
||||||
use gccjit::{Function, FunctionPtrType, RValue, ToRValue, UnaryOp};
|
use gccjit::{Function, FunctionPtrType, RValue, ToRValue, UnaryOp};
|
||||||
use rustc_codegen_ssa::traits::BuilderMethods;
|
use rustc_codegen_ssa::traits::BuilderMethods;
|
||||||
|
|
||||||
@ -320,7 +321,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
|
|||||||
| "__builtin_ia32_vpmadd52luq512_mask"
|
| "__builtin_ia32_vpmadd52luq512_mask"
|
||||||
| "__builtin_ia32_vpmadd52huq256_mask"
|
| "__builtin_ia32_vpmadd52huq256_mask"
|
||||||
| "__builtin_ia32_vpmadd52luq256_mask"
|
| "__builtin_ia32_vpmadd52luq256_mask"
|
||||||
| "__builtin_ia32_vpmadd52huq128_mask" => {
|
| "__builtin_ia32_vpmadd52huq128_mask"
|
||||||
|
| "__builtin_ia32_vfmaddsubph128_mask"
|
||||||
|
| "__builtin_ia32_vfmaddsubph256_mask" => {
|
||||||
let mut new_args = args.to_vec();
|
let mut new_args = args.to_vec();
|
||||||
let arg4_type = gcc_func.get_param_type(3);
|
let arg4_type = gcc_func.get_param_type(3);
|
||||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||||
@ -440,6 +443,19 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
|
|||||||
new_args.push(last_arg);
|
new_args.push(last_arg);
|
||||||
args = new_args.into();
|
args = new_args.into();
|
||||||
}
|
}
|
||||||
|
// NOTE: the LLVM intrinsics receive 3 floats, but the GCC builtin requires 3 vectors.
|
||||||
|
"__builtin_ia32_vfmaddsh3_mask" => {
|
||||||
|
let new_args = args.to_vec();
|
||||||
|
let arg1_type = gcc_func.get_param_type(0);
|
||||||
|
let arg2_type = gcc_func.get_param_type(1);
|
||||||
|
let arg3_type = gcc_func.get_param_type(2);
|
||||||
|
let arg5_type = gcc_func.get_param_type(4);
|
||||||
|
let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 8]);
|
||||||
|
let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 8]);
|
||||||
|
let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 8]);
|
||||||
|
let arg5 = builder.context.new_rvalue_from_int(arg5_type, 4);
|
||||||
|
args = vec![a, b, c, new_args[3], arg5].into();
|
||||||
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -452,7 +468,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
|
|||||||
let arg4 = builder.context.new_bitcast(None, new_args[2], arg4_type);
|
let arg4 = builder.context.new_bitcast(None, new_args[2], arg4_type);
|
||||||
args = vec![new_args[0], new_args[1], arg3, arg4, new_args[3], new_args[5]].into();
|
args = vec![new_args[0], new_args[1], arg3, arg4, new_args[3], new_args[5]].into();
|
||||||
}
|
}
|
||||||
// NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
|
// NOTE: the LLVM intrinsics receive 3 floats, but the GCC builtin requires 3 vectors.
|
||||||
// FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC
|
// FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC
|
||||||
// intrinsic to avoid this.
|
// intrinsic to avoid this.
|
||||||
"__builtin_ia32_vfmaddss3_round" => {
|
"__builtin_ia32_vfmaddss3_round" => {
|
||||||
@ -550,6 +566,25 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
|
|||||||
]
|
]
|
||||||
.into();
|
.into();
|
||||||
}
|
}
|
||||||
|
"__builtin_ia32_rndscalesh_mask_round" => {
|
||||||
|
let new_args = args.to_vec();
|
||||||
|
args = vec![
|
||||||
|
new_args[0],
|
||||||
|
new_args[1],
|
||||||
|
new_args[4],
|
||||||
|
new_args[2],
|
||||||
|
new_args[3],
|
||||||
|
new_args[5],
|
||||||
|
]
|
||||||
|
.into();
|
||||||
|
}
|
||||||
|
"fma" => {
|
||||||
|
let mut new_args = args.to_vec();
|
||||||
|
new_args[0] = builder.context.new_cast(None, new_args[0], builder.double_type);
|
||||||
|
new_args[1] = builder.context.new_cast(None, new_args[1], builder.double_type);
|
||||||
|
new_args[2] = builder.context.new_cast(None, new_args[2], builder.double_type);
|
||||||
|
args = new_args.into();
|
||||||
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -566,7 +601,9 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
|
|||||||
orig_args: &[RValue<'gcc>],
|
orig_args: &[RValue<'gcc>],
|
||||||
) -> RValue<'gcc> {
|
) -> RValue<'gcc> {
|
||||||
match func_name {
|
match func_name {
|
||||||
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
|
"__builtin_ia32_vfmaddss3_round"
|
||||||
|
| "__builtin_ia32_vfmaddsd3_round"
|
||||||
|
| "__builtin_ia32_vfmaddsh3_mask" => {
|
||||||
#[cfg(feature = "master")]
|
#[cfg(feature = "master")]
|
||||||
{
|
{
|
||||||
let zero = builder.context.new_rvalue_zero(builder.int_type);
|
let zero = builder.context.new_rvalue_zero(builder.int_type);
|
||||||
@ -625,6 +662,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
|
|||||||
&[random_number, success_variable.to_rvalue()],
|
&[random_number, success_variable.to_rvalue()],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
"fma" => {
|
||||||
|
let f16_type = builder.context.new_c_type(CType::Float16);
|
||||||
|
return_value = builder.context.new_cast(None, return_value, f16_type);
|
||||||
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1165,6 +1206,9 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
|||||||
"llvm.x86.avx512.mask.store.q.128" => "__builtin_ia32_movdqa64store128_mask",
|
"llvm.x86.avx512.mask.store.q.128" => "__builtin_ia32_movdqa64store128_mask",
|
||||||
"llvm.x86.avx512.mask.store.ps.128" => "__builtin_ia32_storeaps128_mask",
|
"llvm.x86.avx512.mask.store.ps.128" => "__builtin_ia32_storeaps128_mask",
|
||||||
"llvm.x86.avx512.mask.store.pd.128" => "__builtin_ia32_storeapd128_mask",
|
"llvm.x86.avx512.mask.store.pd.128" => "__builtin_ia32_storeapd128_mask",
|
||||||
|
"llvm.x86.avx512fp16.vfmadd.f16" => "__builtin_ia32_vfmaddsh3_mask",
|
||||||
|
"llvm.x86.avx512fp16.vfmaddsub.ph.128" => "__builtin_ia32_vfmaddsubph128_mask",
|
||||||
|
"llvm.x86.avx512fp16.vfmaddsub.ph.256" => "__builtin_ia32_vfmaddsubph256_mask",
|
||||||
|
|
||||||
// TODO: support the tile builtins:
|
// TODO: support the tile builtins:
|
||||||
"llvm.x86.ldtilecfg" => "__builtin_trap",
|
"llvm.x86.ldtilecfg" => "__builtin_trap",
|
||||||
|
Loading…
Reference in New Issue
Block a user