Add more SIMD

2022-06-09 21:11:30 -04:00 · 2022-06-09 21:11:30 -04:00 · ee4755afdb
commit ee4755afdb
parent 2ba5845c52
5 changed files with 24 additions and 8 deletions
--- a/Readme.md
+++ b/Readme.md
@ -127,6 +127,10 @@ To get the `rustc` command to run in `gdb`, add the `--verbose` flag to `cargo b
 * Build the stage2 compiler (`rustup toolchain link debug-current build/x86_64-unknown-linux-gnu/stage2`).
 * Clean and rebuild the codegen with `debug-current` in the file `rust-toolchain`.

+### How to use [mem-trace](https://github.com/antoyo/mem-trace)
+
+`rustc` needs to be built without `jemalloc` so that `mem-trace` can overload `malloc` since `jemalloc` is linked statically, so a `LD_PRELOAD`-ed library won't a chance to intercept the calls to `malloc`.
+
 ### How to build a cross-compiling libgccjit

 #### Building libgccjit
--- a/src/base.rs
+++ b/src/base.rs
@ -81,11 +81,17 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_
        // TODO(antoyo): only add the following cli argument if the feature is supported.
        context.add_command_line_option("-msse2");
        context.add_command_line_option("-mavx2");
-        context.add_command_line_option("-msha");
-        context.add_command_line_option("-mpclmul");
        // FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
        // Only add if the CPU supports it.
-        //context.add_command_line_option("-mavx512f");
+        /*context.add_command_line_option("-mavx512f");
+        context.add_command_line_option("-msha");
+        context.add_command_line_option("-mpclmul");
+        context.add_command_line_option("-mfma");
+        context.add_command_line_option("-mfma4");
+        context.add_command_line_option("-mavx512vpopcntdq");
+        context.add_command_line_option("-mavx512vl");
+        context.add_command_line_option("-m64");
+        context.add_command_line_option("-mbmi");*/
        for arg in &tcx.sess.opts.cg.llvm_args {
            context.add_command_line_option(arg);
        }
--- a/src/builder.rs
+++ b/src/builder.rs
@ -213,7 +213,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {

                let actual_ty = actual_val.get_type();
                if expected_ty != actual_ty {
-                    if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
+                    if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() {
                        self.context.new_cast(None, actual_val, expected_ty)
                    }
                    else if on_stack_param_indices.contains(&index) {
@ -1390,18 +1390,20 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
    where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
    {
        let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
+        let element_type = vector_type.get_element_type();
+        let mask_element_type = self.type_ix(element_type.get_size() as u64 * 8);
        let element_count = vector_type.get_num_units();
        let mut vector_elements = vec![];
        for i in 0..element_count {
            vector_elements.push(i);
        }
-        let mask_type = self.context.new_vector_type(self.int_type, element_count as u64);
+        let mask_type = self.context.new_vector_type(mask_element_type, element_count as u64);
        let mut shift = 1;
        let mut res = src;
        while shift < element_count {
            let vector_elements: Vec<_> =
                vector_elements.iter()
-                    .map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32))
+                    .map(|i| self.context.new_rvalue_from_int(mask_element_type, ((i + shift) % element_count) as i32))
                    .collect();
            let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements);
            let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask);
--- a/src/intrinsic/llvm.rs
+++ b/src/intrinsic/llvm.rs
@ -288,7 +288,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc,
    match func_name {
        "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
            let zero = builder.context.new_rvalue_zero(builder.int_type);
-            return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
+            #[cfg(feature="master")]
+            {
+                return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
+            }
        },
        "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
            // Both llvm.x86.addcarry.32 and llvm.x86.addcarryx.u32 points to the same GCC builtin,
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@ -216,7 +216,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
        let variable = bx.current_func().new_local(None, vector.get_type(), "new_vector");
        bx.llbb().add_assignment(None, variable, vector);
        let lvalue = bx.context.new_vector_access(None, variable.to_rvalue(), index);
-        // TODO: si simd_insert est constant, utiliser BIT_REF…
+        // TODO: if simd_insert is constant, use BIT_REF.
        bx.llbb().add_assignment(None, lvalue, value);
        return Ok(variable.to_rvalue());
    }
@ -252,6 +252,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
        return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
    }

+    #[cfg(feature="master")]
    if name == sym::simd_cast {
        require_simd!(ret_ty, "return");
        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());