Support more SIMD intrinsics

This commit is contained in:
Antoni Boucher 2022-05-02 21:50:22 -04:00
parent ace3250da8
commit 6bfe2b0b05
5 changed files with 141 additions and 5 deletions

View File

@ -217,11 +217,27 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
return Cow::Borrowed(args);
}
let func_name = format!("{:?}", func_ptr);
let casted_args: Vec<_> = param_types
.into_iter()
.zip(args.iter())
.enumerate()
.map(|(index, (expected_ty, &actual_val))| {
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
// last argument type check.
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
match &*func_name {
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
| "__builtin_ia32_sqrtpd512_mask" => {
if index == args.len() - 1 {
return actual_val;
}
},
_ => (),
}
let actual_ty = actual_val.get_type();
if expected_ty != actual_ty {
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
@ -286,7 +302,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
}
fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
let args = self.check_ptr_call("call", func_ptr, args);
let mut args = self.check_ptr_call("call", func_ptr, args);
// gccjit requires to use the result of functions, even when it's not used.
// That's why we assign the result to a local or call add_eval().
@ -298,6 +314,92 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
if return_type != void_type {
unsafe { RETURN_VALUE_COUNT += 1 };
let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
// arguments here.
if gcc_func.get_param_count() != args.len() {
let func_name = format!("{:?}", func_ptr);
match &*func_name {
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
| "__builtin_ia32_pmaxuq128_mask"
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
=> {
// TODO: refactor by separating those intrinsics outside of this branch.
let add_before_last_arg =
match &*func_name {
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
_ => false,
};
let new_first_arg_is_zero =
match &*func_name {
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
_ => false
};
let arg3_index =
match &*func_name {
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
_ => 2,
};
let mut new_args = args.to_vec();
let arg3_type = gcc_func.get_param_type(arg3_index);
let first_arg =
if new_first_arg_is_zero {
let vector_type = arg3_type.dyncast_vector().expect("vector type");
let zero = self.context.new_rvalue_zero(vector_type.get_element_type());
let num_units = vector_type.get_num_units();
self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
}
else {
self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
};
if add_before_last_arg {
new_args.insert(new_args.len() - 1, first_arg);
}
else {
new_args.push(first_arg);
}
let arg4_index =
match &*func_name {
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
_ => 3,
};
let arg4_type = gcc_func.get_param_type(arg4_index);
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
if add_before_last_arg {
new_args.insert(new_args.len() - 1, minus_one);
}
else {
new_args.push(minus_one);
}
args = new_args.into();
},
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
let mut new_args = args.to_vec();
if args.len() == 3 {
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to
// the same GCC intrinsic, but the former has 3 parameters and the
// latter has 4 so it doesn't require this additional argument.
let arg4_type = gcc_func.get_param_type(3);
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
}
let arg5_type = gcc_func.get_param_type(4);
new_args.push(self.context.new_rvalue_from_int(arg5_type, 4));
args = new_args.into();
},
_ => (),
}
}
self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
result.to_rvalue()
}

View File

@ -35,6 +35,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>,
pub functions: RefCell<FxHashMap<String, Function<'gcc>>>,
pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>,
pub tls_model: gccjit::TlsModel,
@ -184,6 +185,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
current_func: RefCell::new(None),
normal_function_addresses: Default::default(),
functions: RefCell::new(functions),
intrinsics: RefCell::new(FxHashMap::default()),
tls_model,
@ -315,8 +317,16 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
}
fn get_fn_addr(&self, instance: Instance<'tcx>) -> RValue<'gcc> {
let func = get_fn(self, instance);
let func = self.rvalue_as_function(func);
let func_name = self.tcx.symbol_name(instance).name;
let func =
if self.intrinsics.borrow().contains_key(func_name) {
self.intrinsics.borrow()[func_name].clone()
}
else {
let func = get_fn(self, instance);
self.rvalue_as_function(func)
};
let ptr = func.get_address(None);
// TODO(antoyo): don't do this twice: i.e. in declare_fn and here.

View File

@ -11,6 +11,7 @@ use crate::intrinsic::llvm;
impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
pub fn get_or_insert_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {
if self.globals.borrow().contains_key(name) {
// TODO: use [] instead of .get().expect()?
let typ = self.globals.borrow().get(name).expect("global").get_type();
let global = self.context.new_global(None, GlobalKind::Imported, typ, name);
if is_tls {
@ -103,7 +104,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
/// update the declaration and return existing Value instead.
fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*llvm::CallConv*/, return_type: Type<'gcc>, param_types: &[Type<'gcc>], variadic: bool) -> Function<'gcc> {
if name.starts_with("llvm.") {
return llvm::intrinsic(name, cx);
let intrinsic = llvm::intrinsic(name, cx);
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
return intrinsic;
}
let func =
if cx.functions.borrow().contains_key(name) {

View File

@ -4275,5 +4275,8 @@ match name {
"llvm.xcore.getid" => "__builtin_getid",
"llvm.xcore.getps" => "__builtin_getps",
"llvm.xcore.setps" => "__builtin_setps",
_ => unimplemented!("***** unsupported LLVM intrinsic {}", name),
_ => {
println!("***** unsupported LLVM intrinsic {}", name);
""
},
}

View File

@ -21,6 +21,24 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
"llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
"llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
"llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
"llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
"llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
"llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
"llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
"llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
"llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
"llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
"llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
"llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
"llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddps512_mask",
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
// The above doc points to unknown builtins for the following, so override them:
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",