Add more SIMD

This commit is contained in:
Antoni Boucher 2022-06-09 21:11:30 -04:00
parent 2ba5845c52
commit ee4755afdb
5 changed files with 24 additions and 8 deletions

View File

@ -127,6 +127,10 @@ To get the `rustc` command to run in `gdb`, add the `--verbose` flag to `cargo b
* Build the stage2 compiler (`rustup toolchain link debug-current build/x86_64-unknown-linux-gnu/stage2`).
* Clean and rebuild the codegen with `debug-current` in the file `rust-toolchain`.
### How to use [mem-trace](https://github.com/antoyo/mem-trace)
`rustc` needs to be built without `jemalloc` so that `mem-trace` can overload `malloc` since `jemalloc` is linked statically, so a `LD_PRELOAD`-ed library won't a chance to intercept the calls to `malloc`.
### How to build a cross-compiling libgccjit
#### Building libgccjit

View File

@ -81,11 +81,17 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_
// TODO(antoyo): only add the following cli argument if the feature is supported.
context.add_command_line_option("-msse2");
context.add_command_line_option("-mavx2");
context.add_command_line_option("-msha");
context.add_command_line_option("-mpclmul");
// FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
// Only add if the CPU supports it.
//context.add_command_line_option("-mavx512f");
/*context.add_command_line_option("-mavx512f");
context.add_command_line_option("-msha");
context.add_command_line_option("-mpclmul");
context.add_command_line_option("-mfma");
context.add_command_line_option("-mfma4");
context.add_command_line_option("-mavx512vpopcntdq");
context.add_command_line_option("-mavx512vl");
context.add_command_line_option("-m64");
context.add_command_line_option("-mbmi");*/
for arg in &tcx.sess.opts.cg.llvm_args {
context.add_command_line_option(arg);
}

View File

@ -213,7 +213,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
let actual_ty = actual_val.get_type();
if expected_ty != actual_ty {
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() {
self.context.new_cast(None, actual_val, expected_ty)
}
else if on_stack_param_indices.contains(&index) {
@ -1390,18 +1390,20 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
{
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
let element_type = vector_type.get_element_type();
let mask_element_type = self.type_ix(element_type.get_size() as u64 * 8);
let element_count = vector_type.get_num_units();
let mut vector_elements = vec![];
for i in 0..element_count {
vector_elements.push(i);
}
let mask_type = self.context.new_vector_type(self.int_type, element_count as u64);
let mask_type = self.context.new_vector_type(mask_element_type, element_count as u64);
let mut shift = 1;
let mut res = src;
while shift < element_count {
let vector_elements: Vec<_> =
vector_elements.iter()
.map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32))
.map(|i| self.context.new_rvalue_from_int(mask_element_type, ((i + shift) % element_count) as i32))
.collect();
let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements);
let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask);

View File

@ -288,7 +288,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc,
match func_name {
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
let zero = builder.context.new_rvalue_zero(builder.int_type);
return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
#[cfg(feature="master")]
{
return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
}
},
"__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
// Both llvm.x86.addcarry.32 and llvm.x86.addcarryx.u32 points to the same GCC builtin,

View File

@ -216,7 +216,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
let variable = bx.current_func().new_local(None, vector.get_type(), "new_vector");
bx.llbb().add_assignment(None, variable, vector);
let lvalue = bx.context.new_vector_access(None, variable.to_rvalue(), index);
// TODO: si simd_insert est constant, utiliser BIT_REF…
// TODO: if simd_insert is constant, use BIT_REF.
bx.llbb().add_assignment(None, lvalue, value);
return Ok(variable.to_rvalue());
}
@ -252,6 +252,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
}
#[cfg(feature="master")]
if name == sym::simd_cast {
require_simd!(ret_ty, "return");
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());