Add more SIMD

This commit is contained in:
Antoni Boucher 2022-06-04 23:03:03 -04:00
parent df85771b34
commit fe606eb444
2 changed files with 44 additions and 7 deletions

View File

@ -275,19 +275,19 @@ fn function_call(&mut self, func: RValue<'gcc>, args: &[RValue<'gcc>], _funclet:
}
fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
let args = self.check_ptr_call("call", func_ptr, args);
let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
let func_name = format!("{:?}", func_ptr);
let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args.into(), &func_name);
let args = self.check_ptr_call("call", func_ptr, &*args);
// gccjit requires to use the result of functions, even when it's not used.
// That's why we assign the result to a local or call add_eval().
let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
let return_type = gcc_func.get_return_type();
let void_type = self.context.new_type::<()>();
let current_func = self.block.get_function();
if return_type != void_type {
unsafe { RETURN_VALUE_COUNT += 1 };
let func_name = format!("{:?}", func_ptr);
let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args, &func_name);
let return_value = self.cx.context.new_call_through_ptr(None, func_ptr, &args);
let return_value = llvm::adjust_intrinsic_return_value(&self, return_value, &func_name, &args);
let result = current_func.new_local(None, return_value.get_type(), &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));

View File

@ -49,6 +49,10 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
| "__builtin_ia32_prorvd256_mask" | "__builtin_ia32_prorvd128_mask" | "__builtin_ia32_prolvq256_mask"
| "__builtin_ia32_prolvq128_mask" | "__builtin_ia32_prorvq256_mask" | "__builtin_ia32_prorvq128_mask"
| "__builtin_ia32_permvardi256_mask" | "__builtin_ia32_permvardf512_mask" | "__builtin_ia32_permvardf256_mask"
| "__builtin_ia32_pmulhuw512_mask" | "__builtin_ia32_pmulhw512_mask" | "__builtin_ia32_pmulhrsw512_mask"
| "__builtin_ia32_pmaxuw512_mask" | "__builtin_ia32_pmaxub512_mask" | "__builtin_ia32_pmaxsw512_mask"
| "__builtin_ia32_pmaxsb512_mask" | "__builtin_ia32_pminuw512_mask" | "__builtin_ia32_pminub512_mask"
| "__builtin_ia32_pminsw512_mask" | "__builtin_ia32_pminsb512_mask"
=> {
let mut new_args = args.to_vec();
let arg3_type = gcc_func.get_param_type(2);
@ -63,8 +67,22 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
args = new_args.into();
},
"__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask"
| "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask"
| "__builtin_ia32_vpconflictsi_512_mask" | "__builtin_ia32_vpconflictsi_256_mask"
| "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => {
let mut new_args = args.to_vec();
// Remove last arg as it doesn't seem to be used in GCC and is always false.
new_args.pop();
let arg2_type = gcc_func.get_param_type(1);
let vector_type = arg2_type.dyncast_vector().expect("vector type");
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
let num_units = vector_type.get_num_units();
let first_arg = builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
new_args.push(first_arg);
let arg3_type = gcc_func.get_param_type(2);
let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
new_args.push(minus_one);
args = new_args.into();
},
"__builtin_ia32_vpconflictsi_512_mask" | "__builtin_ia32_vpconflictsi_256_mask"
| "__builtin_ia32_vpconflictsi_128_mask" | "__builtin_ia32_vpconflictdi_512_mask"
| "__builtin_ia32_vpconflictdi_256_mask" | "__builtin_ia32_vpconflictdi_128_mask" => {
let mut new_args = args.to_vec();
@ -177,7 +195,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
},
"__builtin_ia32_vpermt2varqi512_mask" | "__builtin_ia32_vpermt2varqi256_mask"
| "__builtin_ia32_vpermt2varqi128_mask" => {
let mut new_args = args.to_vec();
let new_args = args.to_vec();
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
args = vec![new_args[1], new_args[0], new_args[2], minus_one].into();
@ -282,6 +300,12 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
},
// NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => return true,
"__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask"
| "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => {
if index == args_len - 1 {
return true;
}
},
_ => (),
}
@ -418,6 +442,14 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.avx512.pmultishift.qb.512" => "__builtin_ia32_vpmultishiftqb512_mask",
"llvm.x86.avx512.pmultishift.qb.256" => "__builtin_ia32_vpmultishiftqb256_mask",
"llvm.x86.avx512.pmultishift.qb.128" => "__builtin_ia32_vpmultishiftqb128_mask",
"llvm.ctpop.v16i16" => "__builtin_ia32_vpopcountw_v16hi",
"llvm.ctpop.v8i16" => "__builtin_ia32_vpopcountw_v8hi",
"llvm.ctpop.v64i8" => "__builtin_ia32_vpopcountb_v64qi",
"llvm.ctpop.v32i8" => "__builtin_ia32_vpopcountb_v32qi",
"llvm.ctpop.v16i8" => "__builtin_ia32_vpopcountb_v16qi",
"llvm.x86.avx512.mask.vpshufbitqmb.512" => "__builtin_ia32_vpshufbitqmb512_mask",
"llvm.x86.avx512.mask.vpshufbitqmb.256" => "__builtin_ia32_vpshufbitqmb256_mask",
"llvm.x86.avx512.mask.vpshufbitqmb.128" => "__builtin_ia32_vpshufbitqmb128_mask",
// The above doc points to unknown builtins for the following, so override them:
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
@ -506,6 +538,11 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.avx512bf16.dpbf16ps.128" => "__builtin_ia32_dpbf16ps_v4sf",
"llvm.x86.avx512bf16.dpbf16ps.256" => "__builtin_ia32_dpbf16ps_v8sf",
"llvm.x86.avx512bf16.dpbf16ps.512" => "__builtin_ia32_dpbf16ps_v16sf",
"llvm.x86.pclmulqdq.512" => "__builtin_ia32_vpclmulqdq_v8di",
"llvm.x86.pclmulqdq.256" => "__builtin_ia32_vpclmulqdq_v4di",
"llvm.x86.avx512.pmulhu.w.512" => "__builtin_ia32_pmulhuw512_mask",
"llvm.x86.avx512.pmulh.w.512" => "__builtin_ia32_pmulhw512_mask",
"llvm.x86.avx512.pmul.hr.sw.512" => "__builtin_ia32_pmulhrsw512_mask",
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
_ => include!("archs.rs"),