Merge pull request #382 from sadlerap/impl-generic-arithmetic-pass

simd: implement missing intrinsics from simd/generic-arithmetic-pass.rs
2023-12-19 13:00:35 -05:00 · 2023-12-19 13:00:35 -05:00 · db494375ab
commit db494375ab
parent 0a67e9cd60 17b2c46c88
5 changed files with 214 additions and 5 deletions
--- a/failing-ui-tests.txt
+++ b/failing-ui-tests.txt
@ -13,7 +13,6 @@ tests/ui/sepcomp/sepcomp-extern.rs
 tests/ui/sepcomp/sepcomp-fns-backwards.rs
 tests/ui/sepcomp/sepcomp-fns.rs
 tests/ui/sepcomp/sepcomp-statics.rs
 tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
 tests/ui/asm/x86_64/may_unwind.rs
 tests/ui/backtrace.rs
 tests/ui/catch-unwind-bang.rs
@ -57,7 +56,6 @@ tests/ui/coroutine/panic-safe.rs
 tests/ui/issues/issue-14875.rs
 tests/ui/issues/issue-29948.rs
 tests/ui/panics/nested_panic_caught.rs
 tests/ui/simd/intrinsic/generic-bswap-byte.rs
 tests/ui/const_prop/ice-issue-111353.rs
 tests/ui/process/println-with-broken-pipe.rs
 tests/ui/panic-runtime/lto-abort.rs
--- a/failing-ui-tests12.txt
+++ b/failing-ui-tests12.txt
@ -9,6 +9,7 @@ tests/ui/packed/packed-struct-vec.rs
 tests/ui/packed/packed-tuple-struct-layout.rs
 tests/ui/simd/array-type.rs
 tests/ui/simd/intrinsic/float-minmax-pass.rs
 tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
 tests/ui/simd/intrinsic/generic-arithmetic-saturating-pass.rs
 tests/ui/simd/intrinsic/generic-as.rs
 tests/ui/simd/intrinsic/generic-cast-pass.rs
--- a/src/builder.rs
+++ b/src/builder.rs
@ -606,12 +606,29 @@ fn frem(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
        //     ../../../gcc/gcc/cfgexpand.cc:6069
        // 0x7f0101bf9194 execute
        //     ../../../gcc/gcc/cfgexpand.cc:6795
-        if a.get_type().is_compatible_with(self.cx.float_type) {
+        let a_type = a.get_type();
        let a_type_unqualified = a_type.unqualified();
        if a_type.is_compatible_with(self.cx.float_type) {
            let fmodf = self.context.get_builtin_function("fmodf");
            // FIXME(antoyo): this seems to produce the wrong result.
            return self.context.new_call(None, fmodf, &[a, b]);
        }
-        assert_eq!(a.get_type().unqualified(), self.cx.double_type);
+        else if let Some(vector_type) = a_type_unqualified.dyncast_vector() {
            assert_eq!(a_type_unqualified, b.get_type().unqualified());
            let num_units = vector_type.get_num_units();
            let new_elements: Vec<_> = (0..num_units)
                .map(|i| {
                    let index = self.context.new_rvalue_from_long(self.cx.type_u32(), i as _);
                    let x = self.extract_element(a, index).to_rvalue();
                    let y = self.extract_element(b, index).to_rvalue();
                    self.frem(x, y)
                })
                .collect();
            return self.context.new_rvalue_from_vector(None, a_type, &new_elements)
        }
        assert_eq!(a_type_unqualified, self.cx.double_type);
        let fmod = self.context.get_builtin_function("fmod");
        return self.context.new_call(None, fmod, &[a, b]);
--- a/src/int.rs
+++ b/src/int.rs
@ -48,7 +48,7 @@ pub fn gcc_not(&self, a: RValue<'gcc>) -> RValue<'gcc> {
    pub fn gcc_neg(&self, a: RValue<'gcc>) -> RValue<'gcc> {
        let a_type = a.get_type();
-        if self.is_native_int_type(a_type) {
+        if self.is_native_int_type(a_type) || a_type.is_vector() {
            self.cx.context.new_unary_op(None, UnaryOp::Minus, a.get_type(), a)
        }
        else {
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@ -1,3 +1,5 @@
 use std::iter::FromIterator;
 use gccjit::ToRValue;
 use gccjit::{BinaryOp, RValue, Type};
 #[cfg(feature = "master")]
@ -21,6 +23,8 @@
 use crate::builder::Builder;
 #[cfg(feature = "master")]
 use crate::context::CodegenCx;
 #[cfg(not(feature = "master"))]
 use crate::common::SignType;
 pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
    bx: &mut Builder<'a, 'gcc, 'tcx>,
@ -156,6 +160,195 @@ macro_rules! require_simd {
        return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op));
    }
    let simd_bswap = |bx: &mut Builder<'a, 'gcc, 'tcx>, vector: RValue<'gcc>| -> RValue<'gcc> {
        let v_type = vector.get_type();
        let vector_type = v_type.unqualified().dyncast_vector().expect("vector type");
        let elem_type = vector_type.get_element_type();
        let elem_size_bytes = elem_type.get_size();
        if elem_size_bytes == 1 {
            return vector;
        }
        let type_size_bytes = elem_size_bytes as u64 * in_len;
        let shuffle_indices = Vec::from_iter(0..type_size_bytes);
        let byte_vector_type = bx.context.new_vector_type(bx.type_u8(), type_size_bytes);
        let byte_vector = bx.context.new_bitcast(None, args[0].immediate(), byte_vector_type);
        #[cfg(not(feature = "master"))]
        let shuffled = {
            let new_elements: Vec<_> = shuffle_indices.chunks_exact(elem_size_bytes as _)
                .flat_map(|x| x.iter().rev())
                .map(|&i| {
                    let index = bx.context.new_rvalue_from_long(bx.u64_type, i as _);
                    bx.extract_element(byte_vector, index)
                })
                .collect();
            bx.context.new_rvalue_from_vector(None, byte_vector_type, &new_elements)
        };
        #[cfg(feature = "master")]
        let shuffled = {
            let indices: Vec<_> = shuffle_indices.chunks_exact(elem_size_bytes as _)
                .flat_map(|x| x.iter().rev())
                .map(|&i| bx.context.new_rvalue_from_int(bx.u8_type, i as _))
                .collect();
            let mask = bx.context.new_rvalue_from_vector(None, byte_vector_type, &indices);
            bx.context.new_rvalue_vector_perm(None, byte_vector, byte_vector, mask)
        };
        bx.context.new_bitcast(None, shuffled, v_type)
    };
    if name == sym::simd_bswap || name == sym::simd_bitreverse {
        require!(
            bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
            InvalidMonomorphization::UnsupportedOperation {
                span,
                name,
                in_ty,
                in_elem,
            }
        );
    }
    if name == sym::simd_bswap {
        return Ok(simd_bswap(bx, args[0].immediate()));
    }
    // We use a different algorithm from non-vector bitreverse to take advantage of most
    // processors' vector shuffle units.  It works like this:
    // 1. Generate pre-reversed low and high nibbles as a vector.
    // 2. Byte-swap the input.
    // 3. Mask off the low and high nibbles of each byte in the byte-swapped input.
    // 4. Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask.
    // 5. Combine the results of the shuffle back together and cast back to the original type.
    #[cfg(feature = "master")]
    if name == sym::simd_bitreverse {
        let vector = args[0].immediate();
        let v_type = vector.get_type();
        let vector_type = v_type.unqualified().dyncast_vector().expect("vector type");
        let elem_type = vector_type.get_element_type();
        let elem_size_bytes = elem_type.get_size();
        let type_size_bytes = elem_size_bytes as u64 * in_len;
        // We need to ensure at least 16 entries in our vector type, since the pre-reversed vectors
        // we generate below have 16 entries in them.  `new_rvalue_vector_perm` requires the mask
        // vector to be of the same length as the source vectors.
        let byte_vector_type_size = type_size_bytes.max(16);
        let byte_vector_type = bx.context.new_vector_type(bx.u8_type, type_size_bytes);
        let long_byte_vector_type = bx.context.new_vector_type(bx.u8_type, byte_vector_type_size);
        // Step 1: Generate pre-reversed low and high nibbles as a vector.
        let zero_byte = bx.context.new_rvalue_zero(bx.u8_type);
        let hi_nibble_elements: Vec<_> = (0u8..16)
            .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, x.reverse_bits() as _))
            .chain((16..byte_vector_type_size).map(|_| zero_byte))
            .collect();
        let hi_nibble = bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &hi_nibble_elements);
        let lo_nibble_elements: Vec<_> = (0u8..16)
            .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, (x.reverse_bits() >> 4) as _))
            .chain((16..byte_vector_type_size).map(|_| zero_byte))
            .collect();
        let lo_nibble = bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &lo_nibble_elements);
        let mask = bx.context.new_rvalue_from_vector(
            None,
            long_byte_vector_type,
            &vec![bx.context.new_rvalue_from_int(bx.u8_type, 0x0f); byte_vector_type_size as _]);
        let four_vec = bx.context.new_rvalue_from_vector(
            None,
            long_byte_vector_type,
            &vec![bx.context.new_rvalue_from_int(bx.u8_type, 4); byte_vector_type_size as _]);
        // Step 2: Byte-swap the input.
        let swapped = simd_bswap(bx, args[0].immediate());
        let byte_vector = bx.context.new_bitcast(None, swapped, byte_vector_type);
        // We're going to need to extend the vector with zeros to make sure that the types are the
        // same, since that's what new_rvalue_vector_perm expects.
        let byte_vector = if byte_vector_type_size > type_size_bytes {
            let mut byte_vector_elements = Vec::with_capacity(byte_vector_type_size as _);
            for i in 0..type_size_bytes {
                let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _);
                let val = bx.extract_element(byte_vector, idx);
                byte_vector_elements.push(val);
            }
            for _ in type_size_bytes..byte_vector_type_size {
                byte_vector_elements.push(zero_byte);
            }
            bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &byte_vector_elements)
        } else {
            bx.context.new_bitcast(None, byte_vector, long_byte_vector_type)
        };
        // Step 3: Mask off the low and high nibbles of each byte in the byte-swapped input.
        let masked_hi = (byte_vector >> four_vec) & mask;
        let masked_lo = byte_vector & mask;
        // Step 4: Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask.
        let hi = bx.context.new_rvalue_vector_perm(None, hi_nibble, hi_nibble, masked_lo);
        let lo = bx.context.new_rvalue_vector_perm(None, lo_nibble, lo_nibble, masked_hi);
        // Step 5: Combine the results of the shuffle back together and cast back to the original type.
        let result = hi | lo;
        let cast_ty = bx.context.new_vector_type(elem_type, byte_vector_type_size / (elem_size_bytes as u64));
        // we might need to truncate if sizeof(v_type) < sizeof(cast_type)
        if type_size_bytes < byte_vector_type_size {
            let cast_result = bx.context.new_bitcast(None, result, cast_ty);
            let elems: Vec<_> = (0..in_len)
                .map(|i| {
                    let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _);
                    bx.extract_element(cast_result, idx)
                })
                .collect();
            return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems))
        } else {
            // avoid the unnecessary truncation as an optimization.
            return Ok(bx.context.new_bitcast(None, result, v_type));
        }
    }
    // since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to
    // component-wise bitreverses if they're not available.
    #[cfg(not(feature = "master"))]
    if name == sym::simd_bitreverse {
        let vector = args[0].immediate();
        let vector_ty = vector.get_type();
        let vector_type = vector_ty.unqualified().dyncast_vector().expect("vector type");
        let num_elements = vector_type.get_num_units();
        let elem_type = vector_type.get_element_type();
        let elem_size_bytes = elem_type.get_size();
        let num_type = elem_type.to_unsigned(bx.cx);
        let new_elements: Vec<_> = (0..num_elements)
            .map(|idx| {
                let index = bx.context.new_rvalue_from_long(num_type, idx as _);
                let extracted_value = bx.extract_element(vector, index).to_rvalue();
                bx.bit_reverse(elem_size_bytes as u64 * 8, extracted_value)
            })
            .collect();
        return Ok(bx.context.new_rvalue_from_vector(None, vector_ty, &new_elements));
    }
    if name == sym::simd_ctlz || name == sym::simd_cttz {
        let vector = args[0].immediate();
        let elements: Vec<_> = (0..in_len)
            .map(|i| {
                let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
                let value = bx.extract_element(vector, index).to_rvalue();
                if name == sym::simd_ctlz {
                    bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value)
                } else {
                    bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value)
                }
            })
            .collect();
        return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements));
    }
    if name == sym::simd_shuffle {
        // Make sure this is actually an array, since typeck only checks the length-suffixed
        // version of this intrinsic.