Support more SIMD intrinsics
This commit is contained in:
parent
ace3250da8
commit
6bfe2b0b05
104
src/builder.rs
104
src/builder.rs
@ -217,11 +217,27 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
||||
return Cow::Borrowed(args);
|
||||
}
|
||||
|
||||
let func_name = format!("{:?}", func_ptr);
|
||||
|
||||
let casted_args: Vec<_> = param_types
|
||||
.into_iter()
|
||||
.zip(args.iter())
|
||||
.enumerate()
|
||||
.map(|(index, (expected_ty, &actual_val))| {
|
||||
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
|
||||
// last argument type check.
|
||||
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
|
||||
match &*func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
|
||||
| "__builtin_ia32_sqrtpd512_mask" => {
|
||||
if index == args.len() - 1 {
|
||||
return actual_val;
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let actual_ty = actual_val.get_type();
|
||||
if expected_ty != actual_ty {
|
||||
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
|
||||
@ -286,7 +302,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
||||
}
|
||||
|
||||
fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
|
||||
let args = self.check_ptr_call("call", func_ptr, args);
|
||||
let mut args = self.check_ptr_call("call", func_ptr, args);
|
||||
|
||||
// gccjit requires to use the result of functions, even when it's not used.
|
||||
// That's why we assign the result to a local or call add_eval().
|
||||
@ -298,6 +314,92 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
||||
if return_type != void_type {
|
||||
unsafe { RETURN_VALUE_COUNT += 1 };
|
||||
let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
|
||||
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
|
||||
// arguments here.
|
||||
if gcc_func.get_param_count() != args.len() {
|
||||
let func_name = format!("{:?}", func_ptr);
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
|
||||
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
|
||||
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
|
||||
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
|
||||
| "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
|
||||
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
|
||||
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
|
||||
=> {
|
||||
// TODO: refactor by separating those intrinsics outside of this branch.
|
||||
let add_before_last_arg =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
|
||||
_ => false,
|
||||
};
|
||||
let new_first_arg_is_zero =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
|
||||
_ => false
|
||||
};
|
||||
let arg3_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
|
||||
_ => 2,
|
||||
};
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(arg3_index);
|
||||
let first_arg =
|
||||
if new_first_arg_is_zero {
|
||||
let vector_type = arg3_type.dyncast_vector().expect("vector type");
|
||||
let zero = self.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
|
||||
}
|
||||
else {
|
||||
self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
|
||||
};
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, first_arg);
|
||||
}
|
||||
else {
|
||||
new_args.push(first_arg);
|
||||
}
|
||||
let arg4_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
|
||||
_ => 3,
|
||||
};
|
||||
let arg4_type = gcc_func.get_param_type(arg4_index);
|
||||
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, minus_one);
|
||||
}
|
||||
else {
|
||||
new_args.push(minus_one);
|
||||
}
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
}
|
||||
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
new_args.push(self.context.new_rvalue_from_int(arg5_type, 4));
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
|
||||
result.to_rvalue()
|
||||
}
|
||||
|
@ -35,6 +35,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
|
||||
pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>,
|
||||
|
||||
pub functions: RefCell<FxHashMap<String, Function<'gcc>>>,
|
||||
pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>,
|
||||
|
||||
pub tls_model: gccjit::TlsModel,
|
||||
|
||||
@ -184,6 +185,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
||||
current_func: RefCell::new(None),
|
||||
normal_function_addresses: Default::default(),
|
||||
functions: RefCell::new(functions),
|
||||
intrinsics: RefCell::new(FxHashMap::default()),
|
||||
|
||||
tls_model,
|
||||
|
||||
@ -315,8 +317,16 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
|
||||
}
|
||||
|
||||
fn get_fn_addr(&self, instance: Instance<'tcx>) -> RValue<'gcc> {
|
||||
let func = get_fn(self, instance);
|
||||
let func = self.rvalue_as_function(func);
|
||||
let func_name = self.tcx.symbol_name(instance).name;
|
||||
|
||||
let func =
|
||||
if self.intrinsics.borrow().contains_key(func_name) {
|
||||
self.intrinsics.borrow()[func_name].clone()
|
||||
}
|
||||
else {
|
||||
let func = get_fn(self, instance);
|
||||
self.rvalue_as_function(func)
|
||||
};
|
||||
let ptr = func.get_address(None);
|
||||
|
||||
// TODO(antoyo): don't do this twice: i.e. in declare_fn and here.
|
||||
|
@ -11,6 +11,7 @@ use crate::intrinsic::llvm;
|
||||
impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
||||
pub fn get_or_insert_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {
|
||||
if self.globals.borrow().contains_key(name) {
|
||||
// TODO: use [] instead of .get().expect()?
|
||||
let typ = self.globals.borrow().get(name).expect("global").get_type();
|
||||
let global = self.context.new_global(None, GlobalKind::Imported, typ, name);
|
||||
if is_tls {
|
||||
@ -103,7 +104,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
||||
/// update the declaration and return existing Value instead.
|
||||
fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*llvm::CallConv*/, return_type: Type<'gcc>, param_types: &[Type<'gcc>], variadic: bool) -> Function<'gcc> {
|
||||
if name.starts_with("llvm.") {
|
||||
return llvm::intrinsic(name, cx);
|
||||
let intrinsic = llvm::intrinsic(name, cx);
|
||||
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
|
||||
return intrinsic;
|
||||
}
|
||||
let func =
|
||||
if cx.functions.borrow().contains_key(name) {
|
||||
|
@ -4275,5 +4275,8 @@ match name {
|
||||
"llvm.xcore.getid" => "__builtin_getid",
|
||||
"llvm.xcore.getps" => "__builtin_getps",
|
||||
"llvm.xcore.setps" => "__builtin_setps",
|
||||
_ => unimplemented!("***** unsupported LLVM intrinsic {}", name),
|
||||
_ => {
|
||||
println!("***** unsupported LLVM intrinsic {}", name);
|
||||
""
|
||||
},
|
||||
}
|
||||
|
@ -21,6 +21,24 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
||||
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
|
||||
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
|
||||
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
|
||||
"llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
|
||||
"llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
|
||||
"llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
|
||||
"llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
|
||||
"llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
|
||||
"llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
|
||||
"llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
|
||||
"llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
|
||||
"llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
|
||||
"llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
|
||||
"llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
|
||||
"llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
|
||||
"llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
|
||||
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
|
||||
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
|
||||
// The above doc points to unknown builtins for the following, so override them:
|
||||
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
|
||||
|
Loading…
x
Reference in New Issue
Block a user