diff --git a/.gitignore b/.gitignore index efda74b2633..0b611d05b5c 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ perf.data.old /rust /simple-raytracer /regex +/rand gimple* *asm res diff --git a/Cargo.lock b/Cargo.lock index a1d9f2f5e38..f66c9874269 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,7 +41,7 @@ dependencies = [ [[package]] name = "gccjit" version = "1.0.0" -source = "git+https://github.com/antoyo/gccjit.rs#bdecdecfb8a02ec861a39a350f990faa33bd31c3" +source = "git+https://github.com/antoyo/gccjit.rs#f24e1f49d99430941d8a747275b41c9a7930e049" dependencies = [ "gccjit_sys", ] @@ -49,7 +49,7 @@ dependencies = [ [[package]] name = "gccjit_sys" version = "0.0.1" -source = "git+https://github.com/antoyo/gccjit.rs#bdecdecfb8a02ec861a39a350f990faa33bd31c3" +source = "git+https://github.com/antoyo/gccjit.rs#f24e1f49d99430941d8a747275b41c9a7930e049" dependencies = [ "libc 0.1.12", ] diff --git a/config.sh b/config.sh index a932c1c8372..b25e215fb9e 100644 --- a/config.sh +++ b/config.sh @@ -2,7 +2,7 @@ set -e export CARGO_INCREMENTAL=0 -if [ -f ./gcc_path ]; then +if [ -f ./gcc_path ]; then export GCC_PATH=$(cat gcc_path) else echo 'Please put the path to your custom build of libgccjit in the file `gcc_path`, see Readme.md for details' @@ -38,7 +38,7 @@ if [[ "$HOST_TRIPLE" != "$TARGET_TRIPLE" ]]; then fi fi -export RUSTFLAGS="$linker -Cpanic=abort -Csymbol-mangling-version=v0 -Cdebuginfo=2 -Clto=off -Zpanic-abort-tests -Zcodegen-backend=$(pwd)/target/${CHANNEL:-debug}/librustc_codegen_gcc.$dylib_ext --sysroot $(pwd)/build_sysroot/sysroot" +export RUSTFLAGS="$CG_RUSTFLAGS $linker -Cpanic=abort -Csymbol-mangling-version=v0 -Cdebuginfo=2 -Clto=off -Zpanic-abort-tests -Zcodegen-backend=$(pwd)/target/${CHANNEL:-debug}/librustc_codegen_gcc.$dylib_ext --sysroot $(pwd)/build_sysroot/sysroot" # FIXME(antoyo): remove once the atomic shim is gone if [[ `uname` == 'Darwin' ]]; then diff --git a/crate_patches/0002-rand-Disable-failing-test.patch b/crate_patches/0002-rand-Disable-failing-test.patch new file mode 100644 index 00000000000..449ca5f6e29 --- /dev/null +++ b/crate_patches/0002-rand-Disable-failing-test.patch @@ -0,0 +1,32 @@ +From a8fb97120d71252538b6b026695df40d02696bdb Mon Sep 17 00:00:00 2001 +From: bjorn3 +Date: Sat, 15 Aug 2020 20:04:38 +0200 +Subject: [PATCH] [rand] Disable failing test + +--- + src/distributions/uniform.rs | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs +index 480b859..c80bb6f 100644 +--- a/src/distributions/uniform.rs ++++ b/src/distributions/uniform.rs +@@ -1085,7 +1085,7 @@ mod tests { + _ => panic!("`UniformDurationMode` was not serialized/deserialized correctly") + } + } +- ++ + #[test] + #[cfg(feature = "serde1")] + fn test_uniform_serialization() { +@@ -1314,6 +1314,7 @@ mod tests { + not(target_arch = "wasm32"), + not(target_arch = "asmjs") + ))] ++ #[ignore] // FIXME + fn test_float_assertions() { + use super::SampleUniform; + use std::panic::catch_unwind; +-- +2.20.1 diff --git a/example/std_example.rs b/example/std_example.rs index eba0eb82896..722666f7e16 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -93,9 +93,9 @@ fn main() { println!("{:?}", std::intrinsics::caller_location()); - /*unsafe { + unsafe { test_simd(); - }*/ + } Box::pin(move |mut _task_context| { yield (); @@ -104,7 +104,7 @@ fn main() { println!("End"); } -/*#[target_feature(enable = "sse2")] +#[target_feature(enable = "sse2")] unsafe fn test_simd() { let x = _mm_setzero_si128(); let y = _mm_set1_epi16(7); @@ -112,7 +112,7 @@ unsafe fn test_simd() { let cmp_eq = _mm_cmpeq_epi8(y, y); let cmp_lt = _mm_cmplt_epi8(y, y); - /*assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]); + assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]); assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]); assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]); @@ -124,14 +124,14 @@ unsafe fn test_simd() { test_mm_cvtepi8_epi16(); test_mm_cvtsi128_si64(); - // FIXME(#666) implement `#[rustc_arg_required_const(..)]` support - //test_mm_extract_epi8(); + test_mm_extract_epi8(); + test_mm_insert_epi16(); let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); - assert_eq!(mask1, 1);*/ -}*/ + assert_eq!(mask1, 1); +} -/*#[target_feature(enable = "sse2")] +#[target_feature(enable = "sse2")] unsafe fn test_mm_slli_si128() { #[rustfmt::skip] let a = _mm_setr_epi8( @@ -155,22 +155,9 @@ unsafe fn test_mm_slli_si128() { ); let r = _mm_slli_si128(a, 16); assert_eq_m128i(r, _mm_set1_epi8(0)); - - #[rustfmt::skip] - let a = _mm_setr_epi8( - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - ); - let r = _mm_slli_si128(a, -1); - assert_eq_m128i(_mm_set1_epi8(0), r); - - #[rustfmt::skip] - let a = _mm_setr_epi8( - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - ); - let r = _mm_slli_si128(a, -0x80000000); - assert_eq_m128i(r, _mm_set1_epi8(0)); } + #[target_feature(enable = "sse2")] unsafe fn test_mm_movemask_epi8() { #[rustfmt::skip] @@ -254,10 +241,19 @@ unsafe fn test_mm_extract_epi8() { 8, 9, 10, 11, 12, 13, 14, 15 ); let r1 = _mm_extract_epi8(a, 0); - let r2 = _mm_extract_epi8(a, 19); + let r2 = _mm_extract_epi8(a, 3); assert_eq!(r1, 0xFF); assert_eq!(r2, 3); -}*/ +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "sse2")] +unsafe fn test_mm_insert_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_insert_epi16(a, 9, 0); + let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); +} #[derive(PartialEq)] enum LoopState { diff --git a/patches/0024-core-Disable-portable-simd-test.patch b/patches/0024-core-Disable-portable-simd-test.patch index 03900ba101a..d5fa1cec061 100644 --- a/patches/0024-core-Disable-portable-simd-test.patch +++ b/patches/0024-core-Disable-portable-simd-test.patch @@ -7,167 +7,6 @@ Subject: [PATCH] [core] Disable portable-simd test library/core/tests/lib.rs | 1 - 1 file changed, 1 deletion(-) -diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs -index aa1ad93..95fbf55 100644 ---- a/library/core/src/lib.rs -+++ b/library/core/src/lib.rs -@@ -398,23 +398,4 @@ pub mod arch { - } - } - --// Pull in the `core_simd` crate directly into libcore. The contents of --// `core_simd` are in a different repository: rust-lang/portable-simd. --// --// `core_simd` depends on libcore, but the contents of this module are --// set up in such a way that directly pulling it here works such that the --// crate uses this crate as its libcore. --#[path = "../../portable-simd/crates/core_simd/src/mod.rs"] --#[allow(missing_debug_implementations, dead_code, unsafe_op_in_unsafe_fn, unused_unsafe)] --#[allow(rustdoc::bare_urls)] --#[unstable(feature = "portable_simd", issue = "86656")] --mod core_simd; -- --#[doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")] --#[unstable(feature = "portable_simd", issue = "86656")] --pub mod simd { -- #[unstable(feature = "portable_simd", issue = "86656")] -- pub use crate::core_simd::simd::*; --} -- - include!("primitive_docs.rs"); -diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs -index cd38c3a..ad632dc 100644 ---- a/library/core/src/slice/mod.rs -+++ b/library/core/src/slice/mod.rs -@@ -17,6 +17,5 @@ use crate::ptr; - use crate::result::Result; - use crate::result::Result::{Err, Ok}; --use crate::simd::{self, Simd}; - use crate::slice; - - #[unstable( -@@ -3475,121 +3474,6 @@ impl [T] { - } - } - -- /// Split a slice into a prefix, a middle of aligned SIMD types, and a suffix. -- /// -- /// This is a safe wrapper around [`slice::align_to`], so has the same weak -- /// postconditions as that method. You're only assured that -- /// `self.len() == prefix.len() + middle.len() * LANES + suffix.len()`. -- /// -- /// Notably, all of the following are possible: -- /// - `prefix.len() >= LANES`. -- /// - `middle.is_empty()` despite `self.len() >= 3 * LANES`. -- /// - `suffix.len() >= LANES`. -- /// -- /// That said, this is a safe method, so if you're only writing safe code, -- /// then this can at most cause incorrect logic, not unsoundness. -- /// -- /// # Panics -- /// -- /// This will panic if the size of the SIMD type is different from -- /// `LANES` times that of the scalar. -- /// -- /// At the time of writing, the trait restrictions on `Simd` keeps -- /// that from ever happening, as only power-of-two numbers of lanes are -- /// supported. It's possible that, in the future, those restrictions might -- /// be lifted in a way that would make it possible to see panics from this -- /// method for something like `LANES == 3`. -- /// -- /// # Examples -- /// -- /// ``` -- /// #![feature(portable_simd)] -- /// -- /// let short = &[1, 2, 3]; -- /// let (prefix, middle, suffix) = short.as_simd::<4>(); -- /// assert_eq!(middle, []); // Not enough elements for anything in the middle -- /// -- /// // They might be split in any possible way between prefix and suffix -- /// let it = prefix.iter().chain(suffix).copied(); -- /// assert_eq!(it.collect::>(), vec![1, 2, 3]); -- /// -- /// fn basic_simd_sum(x: &[f32]) -> f32 { -- /// use std::ops::Add; -- /// use std::simd::f32x4; -- /// let (prefix, middle, suffix) = x.as_simd(); -- /// let sums = f32x4::from_array([ -- /// prefix.iter().copied().sum(), -- /// 0.0, -- /// 0.0, -- /// suffix.iter().copied().sum(), -- /// ]); -- /// let sums = middle.iter().copied().fold(sums, f32x4::add); -- /// sums.reduce_sum() -- /// } -- /// -- /// let numbers: Vec = (1..101).map(|x| x as _).collect(); -- /// assert_eq!(basic_simd_sum(&numbers[1..99]), 4949.0); -- /// ``` -- #[unstable(feature = "portable_simd", issue = "86656")] -- pub fn as_simd(&self) -> (&[T], &[Simd], &[T]) -- where -- Simd: AsRef<[T; LANES]>, -- T: simd::SimdElement, -- simd::LaneCount: simd::SupportedLaneCount, -- { -- // These are expected to always match, as vector types are laid out like -- // arrays per , but we -- // might as well double-check since it'll optimize away anyhow. -- assert_eq!(mem::size_of::>(), mem::size_of::<[T; LANES]>()); -- -- // SAFETY: The simd types have the same layout as arrays, just with -- // potentially-higher alignment, so the de-facto transmutes are sound. -- unsafe { self.align_to() } -- } -- -- /// Split a slice into a prefix, a middle of aligned SIMD types, and a suffix. -- /// -- /// This is a safe wrapper around [`slice::align_to_mut`], so has the same weak -- /// postconditions as that method. You're only assured that -- /// `self.len() == prefix.len() + middle.len() * LANES + suffix.len()`. -- /// -- /// Notably, all of the following are possible: -- /// - `prefix.len() >= LANES`. -- /// - `middle.is_empty()` despite `self.len() >= 3 * LANES`. -- /// - `suffix.len() >= LANES`. -- /// -- /// That said, this is a safe method, so if you're only writing safe code, -- /// then this can at most cause incorrect logic, not unsoundness. -- /// -- /// This is the mutable version of [`slice::as_simd`]; see that for examples. -- /// -- /// # Panics -- /// -- /// This will panic if the size of the SIMD type is different from -- /// `LANES` times that of the scalar. -- /// -- /// At the time of writing, the trait restrictions on `Simd` keeps -- /// that from ever happening, as only power-of-two numbers of lanes are -- /// supported. It's possible that, in the future, those restrictions might -- /// be lifted in a way that would make it possible to see panics from this -- /// method for something like `LANES == 3`. -- #[unstable(feature = "portable_simd", issue = "86656")] -- pub fn as_simd_mut(&mut self) -> (&mut [T], &mut [Simd], &mut [T]) -- where -- Simd: AsMut<[T; LANES]>, -- T: simd::SimdElement, -- simd::LaneCount: simd::SupportedLaneCount, -- { -- // These are expected to always match, as vector types are laid out like -- // arrays per , but we -- // might as well double-check since it'll optimize away anyhow. -- assert_eq!(mem::size_of::>(), mem::size_of::<[T; LANES]>()); -- -- // SAFETY: The simd types have the same layout as arrays, just with -- // potentially-higher alignment, so the de-facto transmutes are sound. -- unsafe { self.align_to_mut() } -- } -- - /// Checks if the elements of this slice are sorted. - /// - /// That is, for each element `a` and its following element `b`, `a <= b` must hold. If the diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs index 06c7be0..359e2e7 100644 --- a/library/core/tests/lib.rs @@ -188,41 +27,3 @@ index 06c7be0..359e2e7 100644 mod slice; mod str; mod str_lossy; -diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs -index 5dc586d..b6fc48f 100644 ---- a/library/std/src/lib.rs -+++ b/library/std/src/lib.rs -@@ -312,6 +312,5 @@ - #![feature(panic_can_unwind)] - #![feature(panic_unwind)] - #![feature(platform_intrinsics)] --#![feature(portable_simd)] - #![feature(prelude_import)] - #![feature(ptr_as_uninit)] -@@ -508,23 +508,6 @@ pub mod time; - #[unstable(feature = "once_cell", issue = "74465")] - pub mod lazy; - --// Pull in `std_float` crate into libstd. The contents of --// `std_float` are in a different repository: rust-lang/portable-simd. --#[path = "../../portable-simd/crates/std_float/src/lib.rs"] --#[allow(missing_debug_implementations, dead_code, unsafe_op_in_unsafe_fn, unused_unsafe)] --#[allow(rustdoc::bare_urls)] --#[unstable(feature = "portable_simd", issue = "86656")] --mod std_float; -- --#[doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")] --#[unstable(feature = "portable_simd", issue = "86656")] --pub mod simd { -- #[doc(inline)] -- pub use crate::std_float::StdFloat; -- #[doc(inline)] -- pub use core::simd::*; --} -- - #[stable(feature = "futures_api", since = "1.36.0")] - pub mod task { - //! Types and Traits for working with asynchronous tasks. --- -2.26.2.7.g19db9cfb68 - diff --git a/prepare.sh b/prepare.sh index 503fa29b362..d39f43f5e1b 100755 --- a/prepare.sh +++ b/prepare.sh @@ -5,6 +5,13 @@ source prepare_build.sh cargo install hyperfine || echo "Skipping hyperfine install" +git clone https://github.com/rust-random/rand.git || echo "rust-random/rand has already been cloned" +pushd rand +git checkout -- . +git checkout 0f933f9c7176e53b2a3c7952ded484e1783f0bf1 +git am ../crate_patches/*-rand-*.patch +popd + git clone https://github.com/rust-lang/regex.git || echo "rust-lang/regex has already been cloned" pushd regex git checkout -- . diff --git a/rustc_patches/compile_test.patch b/rustc_patches/compile_test.patch new file mode 100644 index 00000000000..59143eac37b --- /dev/null +++ b/rustc_patches/compile_test.patch @@ -0,0 +1,14 @@ +diff --git a/src/tools/compiletest/src/header.rs b/src/tools/compiletest/src/header.rs +index 887d27fd6dca4..2c2239f2b83d1 100644 +--- a/src/tools/compiletest/src/header.rs ++++ b/src/tools/compiletest/src/header.rs +@@ -806,8 +806,8 @@ pub fn make_test_description( + cfg: Option<&str>, + ) -> test::TestDesc { + let mut ignore = false; + #[cfg(not(bootstrap))] +- let ignore_message: Option = None; ++ let ignore_message: Option<&str> = None; + let mut should_fail = false; + + let rustc_has_profiler_support = env::var_os("RUSTC_PROFILER_SUPPORT").is_some(); diff --git a/src/base.rs b/src/base.rs index f5aca35cdcb..4ce5cdaccd3 100644 --- a/src/base.rs +++ b/src/base.rs @@ -78,6 +78,11 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_ let context = Context::default(); // TODO(antoyo): only set on x86 platforms. context.add_command_line_option("-masm=intel"); + // TODO(antoyo): only add the following cli argument if the feature is supported. + context.add_command_line_option("-mavx2"); + // FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU. + // Only add if the CPU supports it. + //context.add_command_line_option("-mavx512f"); for arg in &tcx.sess.opts.cg.llvm_args { context.add_command_line_option(arg); } diff --git a/src/builder.rs b/src/builder.rs index b2f46e92ecc..d53e1712dc8 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -3,7 +3,6 @@ use std::cell::Cell; use std::convert::TryFrom; use std::ops::Deref; -use gccjit::FunctionType; use gccjit::{ BinaryOp, Block, @@ -224,10 +223,14 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { .map(|(index, (expected_ty, &actual_val))| { let actual_ty = actual_val.get_type(); if expected_ty != actual_ty { - if on_stack_param_indices.contains(&index) { + if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() { + self.context.new_cast(None, actual_val, expected_ty) + } + else if on_stack_param_indices.contains(&index) { actual_val.dereference(None).to_rvalue() } else { + assert!(!((actual_ty.is_vector() && !expected_ty.is_vector()) || (!actual_ty.is_vector() && expected_ty.is_vector())), "{:?} ({}) -> {:?} ({}), index: {:?}[{}]", actual_ty, actual_ty.is_vector(), expected_ty, expected_ty.is_vector(), func_ptr, index); self.bitcast(actual_val, expected_ty) } } @@ -286,15 +289,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // gccjit requires to use the result of functions, even when it's not used. // That's why we assign the result to a local or call add_eval(). let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr"); - let mut return_type = gcc_func.get_return_type(); + let return_type = gcc_func.get_return_type(); let void_type = self.context.new_type::<()>(); let current_func = self.block.get_function(); - // FIXME(antoyo): As a temporary workaround for unsupported LLVM intrinsics. - if gcc_func.get_param_count() == 0 && format!("{:?}", func_ptr) == "__builtin_ia32_pmovmskb128" { - return_type = self.int_type; - } - if return_type != void_type { unsafe { RETURN_VALUE_COUNT += 1 }; let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT })); @@ -302,13 +300,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { result.to_rvalue() } else { - if gcc_func.get_param_count() == 0 { - // FIXME(antoyo): As a temporary workaround for unsupported LLVM intrinsics. - self.block.add_eval(None, self.cx.context.new_call_through_ptr(None, func_ptr, &[])); - } - else { - self.block.add_eval(None, self.cx.context.new_call_through_ptr(None, func_ptr, &args)); - } + self.block.add_eval(None, self.cx.context.new_call_through_ptr(None, func_ptr, &args)); // Return dummy value when not having return value. let result = current_func.new_local(None, self.isize_type, "dummyValueThatShouldNeverBeUsed"); self.block.add_assignment(None, result, self.context.new_rvalue_from_long(self.isize_type, 0)); @@ -529,12 +521,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { } fn frem(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> { - if a.get_type() == self.cx.float_type { + if a.get_type().is_compatible_with(self.cx.float_type) { let fmodf = self.context.get_builtin_function("fmodf"); // FIXME(antoyo): this seems to produce the wrong result. return self.context.new_call(None, fmodf, &[a, b]); } - assert_eq!(a.get_type(), self.cx.double_type); + assert_eq!(a.get_type().unqualified(), self.cx.double_type); let fmod = self.context.get_builtin_function("fmod"); return self.context.new_call(None, fmod, &[a, b]); @@ -657,7 +649,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // NOTE: instead of returning the dereference here, we have to assign it to a variable in // the current basic block. Otherwise, it could be used in another basic block, causing a // dereference after a drop, for instance. - // TODO(antoyo): handle align. + // TODO(antoyo): handle align of the load instruction. let deref = ptr.dereference(None).to_rvalue(); let value_type = deref.get_type(); unsafe { RETURN_VALUE_COUNT += 1 }; @@ -797,9 +789,16 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { self.store_with_flags(val, ptr, align, MemFlags::empty()) } - fn store_with_flags(&mut self, val: RValue<'gcc>, ptr: RValue<'gcc>, _align: Align, _flags: MemFlags) -> RValue<'gcc> { + fn store_with_flags(&mut self, val: RValue<'gcc>, ptr: RValue<'gcc>, align: Align, _flags: MemFlags) -> RValue<'gcc> { let ptr = self.check_store(val, ptr); - self.llbb().add_assignment(None, ptr.dereference(None), val); + let destination = ptr.dereference(None); + // NOTE: libgccjit does not support specifying the alignment on the assignment, so we cast + // to type so it gets the proper alignment. + let destination_type = destination.to_rvalue().get_type().unqualified(); + let aligned_type = destination_type.get_aligned(align.bytes()).make_pointer(); + let aligned_destination = self.cx.context.new_bitcast(None, ptr, aligned_type); + let aligned_destination = aligned_destination.dereference(None); + self.llbb().add_assignment(None, aligned_destination, val); // TODO(antoyo): handle align and flags. // NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here? self.cx.context.new_rvalue_zero(self.type_i32()) @@ -1288,14 +1287,75 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { pub fn shuffle_vector(&mut self, v1: RValue<'gcc>, v2: RValue<'gcc>, mask: RValue<'gcc>) -> RValue<'gcc> { - let return_type = v1.get_type(); - let params = [ - self.context.new_parameter(None, return_type, "v1"), - self.context.new_parameter(None, return_type, "v2"), - self.context.new_parameter(None, mask.get_type(), "mask"), - ]; - let shuffle = self.context.new_function(None, FunctionType::Extern, return_type, ¶ms, "_mm_shuffle_epi8", false); - self.context.new_call(None, shuffle, &[v1, v2, mask]) + let struct_type = mask.get_type().is_struct().expect("mask of struct type"); + + // TODO(antoyo): use a recursive unqualified() here. + let vector_type = v1.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_type = vector_type.get_element_type(); + let vec_num_units = vector_type.get_num_units(); + + let mask_num_units = struct_type.get_field_count(); + let mut vector_elements = vec![]; + let mask_element_type = + if element_type.is_integral() { + element_type + } + else { + self.int_type + }; + for i in 0..mask_num_units { + let field = struct_type.get_field(i as i32); + vector_elements.push(self.context.new_cast(None, mask.access_field(None, field).to_rvalue(), mask_element_type)); + } + + // NOTE: the mask needs to be the same length as the input vectors, so add the missing + // elements in the mask if needed. + for _ in mask_num_units..vec_num_units { + vector_elements.push(self.context.new_rvalue_zero(mask_element_type)); + } + + let array_type = self.context.new_array_type(None, element_type, vec_num_units as i32); + let result_type = self.context.new_vector_type(element_type, mask_num_units as u64); + let (v1, v2) = + if vec_num_units < mask_num_units { + // NOTE: the mask needs to be the same length as the input vectors, so join the 2 + // vectors and create a dummy second vector. + let array = self.context.new_bitcast(None, v1, array_type); + let mut elements = vec![]; + for i in 0..vec_num_units { + elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue()); + } + let array = self.context.new_bitcast(None, v2, array_type); + for i in 0..vec_num_units { + elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue()); + } + let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements); + let zero = self.context.new_rvalue_zero(element_type); + let v2 = self.context.new_rvalue_from_vector(None, result_type, &vec![zero; mask_num_units]); + (v1, v2) + } + else { + (v1, v2) + }; + + let new_mask_num_units = std::cmp::max(mask_num_units, vec_num_units); + let mask_type = self.context.new_vector_type(mask_element_type, new_mask_num_units as u64); + let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements); + let result = self.context.new_rvalue_vector_perm(None, v1, v2, mask); + + if vec_num_units != mask_num_units { + // NOTE: if padding was added, only select the number of elements of the masks to + // remove that padding in the result. + let mut elements = vec![]; + let array = self.context.new_bitcast(None, result, array_type); + for i in 0..mask_num_units { + elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue()); + } + self.context.new_rvalue_from_vector(None, result_type, &elements) + } + else { + result + } } } diff --git a/src/common.rs b/src/common.rs index 61709dd92de..703e20947fe 100644 --- a/src/common.rs +++ b/src/common.rs @@ -322,6 +322,8 @@ pub trait TypeReflection<'gcc, 'tcx> { fn is_f32(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool; fn is_f64(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool; + + fn is_vector(&self) -> bool; } impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> { @@ -392,4 +394,21 @@ impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> { fn is_f64(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool { self.unqualified() == cx.context.new_type::() } + + fn is_vector(&self) -> bool { + let mut typ = self.clone(); + loop { + if typ.dyncast_vector().is_some() { + return true; + } + + let old_type = typ; + typ = typ.unqualified(); + if old_type == typ { + break; + } + } + + false + } } diff --git a/src/consts.rs b/src/consts.rs index de52f3ea225..4350c00e94a 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -25,7 +25,14 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { } } } - self.context.new_bitcast(None, value, typ) + // NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some + // SIMD builtins require a constant value. + if value.get_type() != typ { + self.context.new_bitcast(None, value, typ) + } + else { + value + } } } @@ -171,8 +178,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { Some(kind) if !self.tcx.sess.fewer_names() => { let name = self.generate_local_symbol_name(kind); // TODO(antoyo): check if it's okay that no link_section is set. - // TODO(antoyo): set alignment here as well. - let global = self.declare_private_global(&name[..], self.val_ty(cv)); + + let typ = self.val_ty(cv).get_aligned(align.bytes()); + let global = self.declare_private_global(&name[..], typ); global } _ => { diff --git a/src/context.rs b/src/context.rs index 5e5b9e7e9b1..83c4683a668 100644 --- a/src/context.rs +++ b/src/context.rs @@ -269,11 +269,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { } pub fn is_native_int_type_or_bool(&self, typ: Type<'gcc>) -> bool { - self.is_native_int_type(typ) || typ == self.bool_type + self.is_native_int_type(typ) || typ.is_compatible_with(self.bool_type) } pub fn is_int_type_or_bool(&self, typ: Type<'gcc>) -> bool { - self.is_native_int_type(typ) || self.is_non_native_int_type(typ) || typ == self.bool_type + self.is_native_int_type(typ) || self.is_non_native_int_type(typ) || typ.is_compatible_with(self.bool_type) } pub fn sess(&self) -> &Session { diff --git a/src/int.rs b/src/int.rs index c3ed71ff730..ed779d5d888 100644 --- a/src/int.rs +++ b/src/int.rs @@ -153,8 +153,14 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let a_type = a.get_type(); let b_type = b.get_type(); if self.is_native_int_type_or_bool(a_type) && self.is_native_int_type_or_bool(b_type) { - if a.get_type() != b.get_type() { - b = self.context.new_cast(None, b, a.get_type()); + if a_type != b_type { + if a_type.is_vector() { + // Vector types need to be bitcast. + b = self.context.new_bitcast(None, b, a.get_type()); + } + else { + b = self.context.new_cast(None, b, a.get_type()); + } } self.context.new_binary_op(None, operation, a_type, a, b) } @@ -593,7 +599,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { let b_type = b.get_type(); let a_native = self.is_native_int_type_or_bool(a_type); let b_native = self.is_native_int_type_or_bool(b_type); - if a_native && b_native { + if a_type.is_vector() && b_type.is_vector() { + self.context.new_binary_op(None, operation, a_type, a, b) + } + else if a_native && b_native { if a_type != b_type { b = self.context.new_cast(None, b, a_type); } @@ -639,6 +648,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { else { // Since u128 and i128 are the only types that can be unsupported, we know the type of // value and the destination type have the same size, so a bitcast is fine. + + // TODO(antoyo): perhaps use __builtin_convertvector for vector casting. (This is elsewhere, + // though.) self.context.new_bitcast(None, value, dest_typ) } } diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index b074febc521..1a2a352b5a3 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -3,20 +3,122 @@ use gccjit::Function; use crate::context::CodegenCx; pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> { - let _gcc_name = + let gcc_name = match name { - "llvm.x86.xgetbv" => { - let gcc_name = "__builtin_trap"; - let func = cx.context.get_builtin_function(gcc_name); - cx.functions.borrow_mut().insert(gcc_name.to_string(), func); - return func; - }, + "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv", // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html + "llvm.x86.sse2.pmovmskb.128" => "__builtin_ia32_pmovmskb128", + "llvm.x86.avx2.pmovmskb" => "__builtin_ia32_pmovmskb256", "llvm.x86.sse2.cmp.pd" => "__builtin_ia32_cmppd", "llvm.x86.sse2.movmsk.pd" => "__builtin_ia32_movmskpd", - "llvm.x86.sse2.pmovmskb.128" => "__builtin_ia32_pmovmskb128", - _ => unimplemented!("unsupported LLVM intrinsic {}", name) + "llvm.x86.ssse3.pshuf.b.128" => "__builtin_ia32_pshufb128", + "llvm.x86.sse2.pause" => "__builtin_ia32_pause", + "llvm.x86.avx2.pshuf.b" => "__builtin_ia32_pshufb256", + "llvm.x86.avx2.pslli.d" => "__builtin_ia32_pslldi256", + "llvm.x86.avx2.psrli.d" => "__builtin_ia32_psrldi256", + "llvm.x86.avx.vzeroupper" => "__builtin_ia32_vzeroupper", + "llvm.x86.avx2.vperm2i128" => "__builtin_ia32_permti256", + "llvm.x86.avx2.psrli.w" => "__builtin_ia32_psrlwi256", + "llvm.x86.sse2.storeu.dq" => "__builtin_ia32_storedqu", + "llvm.x86.sse2.psrli.w" => "__builtin_ia32_psrlwi128", + "llvm.x86.avx2.pabs.d" => "__builtin_ia32_pabsd256", + "llvm.x86.sse2.psrli.q" => "__builtin_ia32_psrlqi128", + "llvm.x86.avx2.pabs.w" => "__builtin_ia32_pabsw256", + "llvm.x86.avx2.pblendvb" => "__builtin_ia32_pblendvb256", + "llvm.x86.avx2.pabs.b" => "__builtin_ia32_pabsb256", + "llvm.x86.avx2.psrli.q" => "__builtin_ia32_psrlqi256", + "llvm.x86.sse41.pblendvb" => "__builtin_ia32_pblendvb128", + "llvm.x86.avx2.pavg.w" => "__builtin_ia32_pavgw256", + "llvm.x86.avx2.pavg.b" => "__builtin_ia32_pavgb256", + "llvm.x86.avx2.phadd.w" => "__builtin_ia32_phaddw256", + "llvm.x86.avx2.phadd.d" => "__builtin_ia32_phaddd256", + "llvm.x86.avx2.phadd.sw" => "__builtin_ia32_phaddsw256", + "llvm.x86.avx2.phsub.w" => "__builtin_ia32_phsubw256", + "llvm.x86.avx2.phsub.d" => "__builtin_ia32_phsubd256", + "llvm.x86.avx2.phsub.sw" => "__builtin_ia32_phsubsw256", + "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gatherd_d", + "llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gatherd_d256", + "llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gatherd_ps", + "llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gatherd_ps256", + "llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gatherd_q", + "llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gatherd_q256", + "llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gatherd_pd", + "llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gatherd_pd256", + "llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherq_d", + "llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherq_d256", + "llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherq_ps", + "llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherq_ps256", + "llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherq_q", + "llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherq_q256", + "llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherq_pd", + "llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherq_pd256", + "llvm.x86.avx2.pmadd.wd" => "__builtin_ia32_pmaddwd256", + "llvm.x86.avx2.pmadd.ub.sw" => "__builtin_ia32_pmaddubsw256", + "llvm.x86.avx2.maskload.d" => "__builtin_ia32_maskloadd", + "llvm.x86.avx2.maskload.d.256" => "__builtin_ia32_maskloadd256", + "llvm.x86.avx2.maskload.q" => "__builtin_ia32_maskloadq", + "llvm.x86.avx2.maskload.q.256" => "__builtin_ia32_maskloadq256", + "llvm.x86.avx2.maskstore.d" => "__builtin_ia32_maskstored", + "llvm.x86.avx2.maskstore.d.256" => "__builtin_ia32_maskstored256", + "llvm.x86.avx2.maskstore.q" => "__builtin_ia32_maskstoreq", + "llvm.x86.avx2.maskstore.q.256" => "__builtin_ia32_maskstoreq256", + "llvm.x86.avx2.pmaxs.w" => "__builtin_ia32_pmaxsw256", + "llvm.x86.avx2.pmaxs.d" => "__builtin_ia32_pmaxsd256", + "llvm.x86.avx2.pmaxs.b" => "__builtin_ia32_pmaxsb256", + "llvm.x86.avx2.pmaxu.w" => "__builtin_ia32_pmaxuw256", + "llvm.x86.avx2.pmaxu.d" => "__builtin_ia32_pmaxud256", + "llvm.x86.avx2.pmaxu.b" => "__builtin_ia32_pmaxub256", + "llvm.x86.avx2.pmins.w" => "__builtin_ia32_pminsw256", + "llvm.x86.avx2.pmins.d" => "__builtin_ia32_pminsd256", + "llvm.x86.avx2.pmins.b" => "__builtin_ia32_pminsb256", + "llvm.x86.avx2.pminu.w" => "__builtin_ia32_pminuw256", + "llvm.x86.avx2.pminu.d" => "__builtin_ia32_pminud256", + "llvm.x86.avx2.pminu.b" => "__builtin_ia32_pminub256", + "llvm.x86.avx2.mpsadbw" => "__builtin_ia32_mpsadbw256", + "llvm.x86.avx2.pmul.dq" => "__builtin_ia32_pmuldq256", + "llvm.x86.avx2.pmulu.dq" => "__builtin_ia32_pmuludq256", + "llvm.x86.avx2.pmulh.w" => "__builtin_ia32_pmulhw256", + "llvm.x86.avx2.pmulhu.w" => "__builtin_ia32_pmulhuw256", + "llvm.x86.avx2.pmul.hr.sw" => "__builtin_ia32_pmulhrsw256", + "llvm.x86.avx2.packsswb" => "__builtin_ia32_packsswb256", + "llvm.x86.avx2.packssdw" => "__builtin_ia32_packssdw256", + "llvm.x86.avx2.packuswb" => "__builtin_ia32_packuswb256", + "llvm.x86.avx2.packusdw" => "__builtin_ia32_packusdw256", + "llvm.x86.avx2.permd" => "__builtin_ia32_permvarsi256", + "llvm.x86.avx2.permps" => "__builtin_ia32_permvarsf256", + "llvm.x86.avx2.psad.bw" => "__builtin_ia32_psadbw256", + "llvm.x86.avx2.psign.w" => "__builtin_ia32_psignw256", + "llvm.x86.avx2.psign.d" => "__builtin_ia32_psignd256", + "llvm.x86.avx2.psign.b" => "__builtin_ia32_psignb256", + "llvm.x86.avx2.psll.w" => "__builtin_ia32_psllw256", + "llvm.x86.avx2.psll.d" => "__builtin_ia32_pslld256", + "llvm.x86.avx2.psll.q" => "__builtin_ia32_psllq256", + "llvm.x86.avx2.pslli.w" => "__builtin_ia32_psllwi256", + "llvm.x86.avx2.pslli.q" => "__builtin_ia32_psllqi256", + "llvm.x86.avx2.psllv.d" => "__builtin_ia32_psllv4si", + "llvm.x86.avx2.psllv.d.256" => "__builtin_ia32_psllv8si", + "llvm.x86.avx2.psllv.q" => "__builtin_ia32_psllv2di", + "llvm.x86.avx2.psllv.q.256" => "__builtin_ia32_psllv4di", + "llvm.x86.avx2.psra.w" => "__builtin_ia32_psraw256", + "llvm.x86.avx2.psra.d" => "__builtin_ia32_psrad256", + "llvm.x86.avx2.psrai.w" => "__builtin_ia32_psrawi256", + "llvm.x86.avx2.psrai.d" => "__builtin_ia32_psradi256", + "llvm.x86.avx2.psrav.d" => "__builtin_ia32_psrav4si", + "llvm.x86.avx2.psrav.d.256" => "__builtin_ia32_psrav8si", + "llvm.x86.avx2.psrl.w" => "__builtin_ia32_psrlw256", + "llvm.x86.avx2.psrl.d" => "__builtin_ia32_psrld256", + "llvm.x86.avx2.psrl.q" => "__builtin_ia32_psrlq256", + "llvm.x86.avx2.psrlv.d" => "__builtin_ia32_psrlv4si", + "llvm.x86.avx2.psrlv.d.256" => "__builtin_ia32_psrlv8si", + "llvm.x86.avx2.psrlv.q" => "__builtin_ia32_psrlv2di", + "llvm.x86.avx2.psrlv.q.256" => "__builtin_ia32_psrlv4di", + "llvm.x86.sse.sqrt.ss" => "__builtin_ia32_sqrtss", + + "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd", + _ => unimplemented!("***** unsupported LLVM intrinsic {}", name), }; - unimplemented!(); + let func = cx.context.get_target_builtin_function(gcc_name); + cx.functions.borrow_mut().insert(gcc_name.to_string(), func); + func } diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index 7d7811c8782..11f1e7dd999 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -1,4 +1,6 @@ -use gccjit::{RValue, Type}; +use std::cmp::Ordering; + +use gccjit::{RValue, Type, ToRValue}; use rustc_codegen_ssa::base::compare_simd_types; use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error}; use rustc_codegen_ssa::mir::operand::OperandRef; @@ -10,6 +12,7 @@ use rustc_middle::ty::{self, Ty}; use rustc_span::{Span, Symbol, sym}; use crate::builder::Builder; +use crate::intrinsic; pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, name: Symbol, callee_ty: Ty<'tcx>, args: &[OperandRef<'tcx, RValue<'gcc>>], ret_ty: Ty<'tcx>, llret_ty: Type<'gcc>, span: Span) -> Result, ()> { // macros for error handling: @@ -100,9 +103,27 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, } if let Some(stripped) = name_str.strip_prefix("simd_shuffle") { - let n: u64 = stripped.parse().unwrap_or_else(|_| { - span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?") - }); + let n: u64 = + if stripped.is_empty() { + // Make sure this is actually an array, since typeck only checks the length-suffixed + // version of this intrinsic. + match args[2].layout.ty.kind() { + ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => { + len.try_eval_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(|| { + span_bug!(span, "could not evaluate shuffle index array length") + }) + } + _ => return_error!( + "simd_shuffle index must be an array of `u32`, got `{}`", + args[2].layout.ty + ), + } + } + else { + stripped.parse().unwrap_or_else(|_| { + span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?") + }) + }; require_simd!(ret_ty, "return"); @@ -133,6 +154,202 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, )); } + if name == sym::simd_insert { + require!( + in_elem == arg_tys[2], + "expected inserted type `{}` (element of input `{}`), found `{}`", + in_elem, + in_ty, + arg_tys[2] + ); + let vector = args[0].immediate(); + let index = args[1].immediate(); + let value = args[2].immediate(); + // TODO(antoyo): use a recursive unqualified() here. + let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_type = vector_type.get_element_type(); + // NOTE: we cannot cast to an array and assign to its element here because the value might + // not be an l-value. So, call a builtin to set the element. + // TODO(antoyo): perhaps we could create a new vector or maybe there's a GIMPLE instruction for that? + let func_name = + match in_len { + 2 => { + if element_type == bx.i64_type { + "__builtin_ia32_vec_set_v2di" + } + else { + unimplemented!(); + } + }, + 4 => { + if element_type == bx.i32_type { + "__builtin_ia32_vec_set_v4si" + } + else { + unimplemented!(); + } + }, + 8 => { + if element_type == bx.i16_type { + "__builtin_ia32_vec_set_v8hi" + } + else { + unimplemented!(); + } + }, + _ => unimplemented!("Len: {}", in_len), + }; + let builtin = bx.context.get_target_builtin_function(func_name); + let param1_type = builtin.get_param(0).to_rvalue().get_type(); + let vector = + if vector.get_type() != param1_type { + bx.context.new_bitcast(None, vector, param1_type) + } + else { + vector + }; + let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]); + return Ok(bx.context.new_bitcast(None, result, vector.get_type())); + } + if name == sym::simd_extract { + require!( + ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, + in_ty, + ret_ty + ); + let vector = args[0].immediate(); + return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue()); + } + + if name == sym::simd_cast { + require_simd!(ret_ty, "return"); + let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx()); + require!( + in_len == out_len, + "expected return type with length {} (same as input type `{}`), \ + found `{}` with length {}", + in_len, + in_ty, + ret_ty, + out_len + ); + // casting cares about nominal type, not just structural type + if in_elem == out_elem { + return Ok(args[0].immediate()); + } + + enum Style { + Float, + Int(/* is signed? */ bool), + Unsupported, + } + + let (in_style, in_width) = match in_elem.kind() { + // vectors of pointer-sized integers should've been + // disallowed before here, so this unwrap is safe. + ty::Int(i) => ( + Style::Int(true), + i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), + ), + ty::Uint(u) => ( + Style::Int(false), + u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), + ), + ty::Float(f) => (Style::Float, f.bit_width()), + _ => (Style::Unsupported, 0), + }; + let (out_style, out_width) = match out_elem.kind() { + ty::Int(i) => ( + Style::Int(true), + i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), + ), + ty::Uint(u) => ( + Style::Int(false), + u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), + ), + ty::Float(f) => (Style::Float, f.bit_width()), + _ => (Style::Unsupported, 0), + }; + + let extend = |in_type, out_type| { + let vector_type = bx.context.new_vector_type(out_type, 8); + let vector = args[0].immediate(); + let array_type = bx.context.new_array_type(None, in_type, 8); + let array = bx.context.new_bitcast(None, vector, array_type); + + let cast_vec_element = |index| { + let index = bx.context.new_rvalue_from_int(bx.int_type, index); + bx.context.new_cast(None, bx.context.new_array_access(None, array, index).to_rvalue(), out_type) + }; + + bx.context.new_rvalue_from_vector(None, vector_type, &[ + cast_vec_element(0), + cast_vec_element(1), + cast_vec_element(2), + cast_vec_element(3), + cast_vec_element(4), + cast_vec_element(5), + cast_vec_element(6), + cast_vec_element(7), + ]) + }; + + match (in_style, out_style) { + (Style::Int(in_is_signed), Style::Int(_)) => { + return Ok(match in_width.cmp(&out_width) { + Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty), + Ordering::Equal => args[0].immediate(), + Ordering::Less => { + if in_is_signed { + match (in_width, out_width) { + // FIXME(antoyo): the function _mm_cvtepi8_epi16 should directly + // call an intrinsic equivalent to __builtin_ia32_pmovsxbw128 so that + // we can generate a call to it. + (8, 16) => extend(bx.i8_type, bx.i16_type), + (8, 32) => extend(bx.i8_type, bx.i32_type), + (8, 64) => extend(bx.i8_type, bx.i64_type), + (16, 32) => extend(bx.i16_type, bx.i32_type), + (32, 64) => extend(bx.i32_type, bx.i64_type), + (16, 64) => extend(bx.i16_type, bx.i64_type), + _ => unimplemented!("in: {}, out: {}", in_width, out_width), + } + } else { + match (in_width, out_width) { + (8, 16) => extend(bx.u8_type, bx.u16_type), + (8, 32) => extend(bx.u8_type, bx.u32_type), + (8, 64) => extend(bx.u8_type, bx.u64_type), + (16, 32) => extend(bx.u16_type, bx.u32_type), + (16, 64) => extend(bx.u16_type, bx.u64_type), + (32, 64) => extend(bx.u32_type, bx.u64_type), + _ => unimplemented!("in: {}, out: {}", in_width, out_width), + } + } + } + }); + } + (Style::Int(_), Style::Float) => { + unimplemented!(); + } + (Style::Float, Style::Int(_)) => { + unimplemented!(); + } + (Style::Float, Style::Float) => { + unimplemented!(); + } + _ => { /* Unsupported. Fallthrough. */ } + } + require!( + false, + "unsupported cast from `{}` with element `{}` to `{}` with element `{}`", + in_ty, + in_elem, + ret_ty, + out_elem + ); + } + macro_rules! arith_binary { ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => { $(if name == sym::$name { @@ -150,6 +367,105 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, } } + fn simd_simple_float_intrinsic<'gcc, 'tcx>( + name: Symbol, + in_elem: Ty<'_>, + in_ty: Ty<'_>, + in_len: u64, + bx: &mut Builder<'_, 'gcc, 'tcx>, + span: Span, + args: &[OperandRef<'tcx, RValue<'gcc>>], + ) -> Result, ()> { + macro_rules! emit_error { + ($msg: tt) => { + emit_error!($msg, ) + }; + ($msg: tt, $($fmt: tt)*) => { + span_invalid_monomorphization_error( + bx.sess(), span, + &format!(concat!("invalid monomorphization of `{}` intrinsic: ", $msg), + name, $($fmt)*)); + } + } + macro_rules! return_error { + ($($fmt: tt)*) => { + { + emit_error!($($fmt)*); + return Err(()); + } + } + } + + let (elem_ty_str, elem_ty) = + if let ty::Float(f) = in_elem.kind() { + let elem_ty = bx.cx.type_float_from_ty(*f); + match f.bit_width() { + 32 => ("f32", elem_ty), + 64 => ("f64", elem_ty), + _ => { + return_error!( + "unsupported element type `{}` of floating-point vector `{}`", + f.name_str(), + in_ty + ); + } + } + } + else { + return_error!("`{}` is not a floating-point type", in_ty); + }; + + let vec_ty = bx.cx.type_vector(elem_ty, in_len); + + let (intr_name, fn_ty) = + match name { + sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)), // TODO(antoyo): pand with 170141183420855150465331762880109871103 + sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)), + sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)), + sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)), + sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)), + sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)), + _ => return_error!("unrecognized intrinsic `{}`", name), + }; + let llvm_name = &format!("llvm.{0}.v{1}{2}", intr_name, in_len, elem_ty_str); + let function = intrinsic::llvm::intrinsic(llvm_name, &bx.cx); + let function: RValue<'gcc> = unsafe { std::mem::transmute(function) }; + let c = bx.call(fn_ty, function, &args.iter().map(|arg| arg.immediate()).collect::>(), None); + Ok(c) + } + + if std::matches!( + name, + sym::simd_ceil + | sym::simd_fabs + | sym::simd_fcos + | sym::simd_fexp2 + | sym::simd_fexp + | sym::simd_flog10 + | sym::simd_flog2 + | sym::simd_flog + | sym::simd_floor + | sym::simd_fma + | sym::simd_fpow + | sym::simd_fpowi + | sym::simd_fsin + | sym::simd_fsqrt + | sym::simd_round + | sym::simd_trunc + ) { + return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args); + } + arith_binary! { simd_add: Uint, Int => add, Float => fadd; simd_sub: Uint, Int => sub, Float => fsub; @@ -184,5 +500,41 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, simd_neg: Int => neg, Float => fneg; } + if name == sym::simd_saturating_add || name == sym::simd_saturating_sub { + let lhs = args[0].immediate(); + let rhs = args[1].immediate(); + let is_add = name == sym::simd_saturating_add; + let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _; + let (signed, elem_width, elem_ty) = match *in_elem.kind() { + ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_int_from_ty(i)), + ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_uint_from_ty(i)), + _ => { + return_error!( + "expected element type `{}` of vector type `{}` \ + to be a signed or unsigned integer type", + arg_tys[0].simd_size_and_type(bx.tcx()).1, + arg_tys[0] + ); + } + }; + let builtin_name = + match (signed, is_add, in_len, elem_width) { + (true, true, 32, 8) => "__builtin_ia32_paddsb256", // TODO(antoyo): cast arguments to unsigned. + (false, true, 32, 8) => "__builtin_ia32_paddusb256", + (true, true, 16, 16) => "__builtin_ia32_paddsw256", + (false, true, 16, 16) => "__builtin_ia32_paddusw256", + (true, false, 16, 16) => "__builtin_ia32_psubsw256", + (false, false, 16, 16) => "__builtin_ia32_psubusw256", + (true, false, 32, 8) => "__builtin_ia32_psubsb256", + (false, false, 32, 8) => "__builtin_ia32_psubusb256", + _ => unimplemented!("signed: {}, is_add: {}, in_len: {}, elem_width: {}", signed, is_add, in_len, elem_width), + }; + let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64); + + let func = bx.context.get_target_builtin_function(builtin_name); + let result = bx.context.new_call(None, func, &[lhs, rhs]); + return Ok(bx.context.new_bitcast(None, result, vec_ty)); + } + unimplemented!("simd {}", name); } diff --git a/src/lib.rs b/src/lib.rs index eac4a06226c..a8029f0425a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -205,7 +205,7 @@ impl WriteBackendMethods for GccCodegenBackend { fn run_fat_lto(_cgcx: &CodegenContext, mut modules: Vec>, _cached_modules: Vec<(SerializedModule, WorkProduct)>) -> Result, FatalError> { // TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins. // NOTE: implemented elsewhere. - // TODO: what is implemented elsewhere ^ ? + // TODO(antoyo): what is implemented elsewhere ^ ? let module = match modules.remove(0) { FatLTOInput::InMemory(module) => module, @@ -299,9 +299,17 @@ pub fn target_features(sess: &Session) -> Vec { if sess.is_nightly_build() || gate.is_none() { Some(feature) } else { None } }, ) - .filter(|_feature| { + .filter(|feature| { // TODO(antoyo): implement a way to get enabled feature in libgccjit. - false + // Probably using the equivalent of __builtin_cpu_supports. + feature.contains("sse") || feature.contains("avx") + /* + adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512gfni, + avx512ifma, avx512pf, avx512vaes, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpclmulqdq, + avx512vpopcntdq, bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm, + sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, xsave, xsavec, xsaveopt, xsaves + */ + //false }) .map(|feature| Symbol::intern(feature)) .collect() diff --git a/src/type_.rs b/src/type_.rs index e9505808521..8a17d94da41 100644 --- a/src/type_.rs +++ b/src/type_.rs @@ -3,10 +3,11 @@ use std::convert::TryInto; use gccjit::{RValue, Struct, Type}; use rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods}; use rustc_codegen_ssa::common::TypeKind; -use rustc_middle::bug; +use rustc_middle::{bug, ty}; use rustc_middle::ty::layout::TyAndLayout; use rustc_target::abi::{AddressSpace, Align, Integer, Size}; +use crate::common::TypeReflection; use crate::context::CodegenCx; use crate::type_of::LayoutGccExt; @@ -60,6 +61,17 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { let ity = Integer::approximate_align(self, align); self.type_from_integer(ity) } + + pub fn type_vector(&self, ty: Type<'gcc>, len: u64) -> Type<'gcc> { + self.context.new_vector_type(ty, len) + } + + pub fn type_float_from_ty(&self, t: ty::FloatTy) -> Type<'gcc> { + match t { + ty::FloatTy::F32 => self.type_f32(), + ty::FloatTy::F64 => self.type_f64(), + } + } } impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> { @@ -127,7 +139,7 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> { else if typ.is_compatible_with(self.double_type) { TypeKind::Double } - else if typ.dyncast_vector().is_some() { + else if typ.is_vector() { TypeKind::Vector } else { @@ -141,7 +153,7 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> { } fn type_ptr_to_ext(&self, ty: Type<'gcc>, _address_space: AddressSpace) -> Type<'gcc> { - // TODO(antoyo): use address_space + // TODO(antoyo): use address_space, perhaps with TYPE_ADDR_SPACE? ty.make_pointer() } @@ -167,10 +179,10 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> { fn float_width(&self, typ: Type<'gcc>) -> usize { let f32 = self.context.new_type::(); let f64 = self.context.new_type::(); - if typ == f32 { + if typ.is_compatible_with(f32) { 32 } - else if typ == f64 { + else if typ.is_compatible_with(f64) { 64 } else { diff --git a/src/type_of.rs b/src/type_of.rs index ed8f0445ca3..c6d6f91a742 100644 --- a/src/type_of.rs +++ b/src/type_of.rs @@ -24,6 +24,28 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { I128 => self.type_u128(), } } + + pub fn type_int_from_ty(&self, t: ty::IntTy) -> Type<'gcc> { + match t { + ty::IntTy::Isize => self.type_isize(), + ty::IntTy::I8 => self.type_i8(), + ty::IntTy::I16 => self.type_i16(), + ty::IntTy::I32 => self.type_i32(), + ty::IntTy::I64 => self.type_i64(), + ty::IntTy::I128 => self.type_i128(), + } + } + + pub fn type_uint_from_ty(&self, t: ty::UintTy) -> Type<'gcc> { + match t { + ty::UintTy::Usize => self.type_isize(), + ty::UintTy::U8 => self.type_i8(), + ty::UintTy::U16 => self.type_i16(), + ty::UintTy::U32 => self.type_i32(), + ty::UintTy::U64 => self.type_i64(), + ty::UintTy::U128 => self.type_i128(), + } + } } pub fn uncached_gcc_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>, defer: &mut Option<(Struct<'gcc>, TyAndLayout<'tcx>)>) -> Type<'gcc> { diff --git a/test.sh b/test.sh index 1beeee136df..1d2fbd0a24c 100755 --- a/test.sh +++ b/test.sh @@ -97,25 +97,6 @@ function std_tests() { #echo "[BUILD] sysroot in release mode" #./build_sysroot/build_sysroot.sh --release -# TODO(antoyo): uncomment when it works. -#pushd simple-raytracer -#if [[ "$HOST_TRIPLE" = "$TARGET_TRIPLE" ]]; then - #echo "[BENCH COMPILE] ebobby/simple-raytracer" - #hyperfine --runs ${RUN_RUNS:-10} --warmup 1 --prepare "rm -r target/*/debug || true" \ - #"RUSTFLAGS='' cargo build --target $TARGET_TRIPLE" \ - #"../cargo.sh build" - - #echo "[BENCH RUN] ebobby/simple-raytracer" - #cp ./target/*/debug/main ./raytracer_cg_gccjit - #hyperfine --runs ${RUN_RUNS:-10} ./raytracer_cg_llvm ./raytracer_cg_gccjit -#else - #echo "[BENCH COMPILE] ebobby/simple-raytracer (skipped)" - #echo "[COMPILE] ebobby/simple-raytracer" - #../cargo.sh build - #echo "[BENCH RUN] ebobby/simple-raytracer (skipped)" -#fi -#popd - function test_libcore() { pushd build_sysroot/sysroot_src/library/core/tests echo "[TEST] libcore" @@ -124,19 +105,6 @@ function test_libcore() { popd } -# TODO(antoyo): uncomment when it works. -#pushd regex -#echo "[TEST] rust-lang/regex example shootout-regex-dna" -#../cargo.sh clean -## Make sure `[codegen mono items] start` doesn't poison the diff -#../cargo.sh build --example shootout-regex-dna -#cat examples/regexdna-input.txt | ../cargo.sh run --example shootout-regex-dna | grep -v "Spawned thread" > res.txt -#diff -u res.txt examples/regexdna-output.txt - -#echo "[TEST] rust-lang/regex tests" -#../cargo.sh test --tests -- --exclude-should-panic --test-threads 1 -Zunstable-options -#popd - #echo #echo "[BENCH COMPILE] mod_bench" @@ -153,6 +121,40 @@ function test_libcore() { #echo "[BENCH RUN] mod_bench" #hyperfine --runs ${RUN_RUNS:-10} ./target/out/mod_bench{,_inline} ./target/out/mod_bench_llvm_* +function extended_sysroot_tests() { + pushd rand + cargo clean + echo "[TEST] rust-random/rand" + ../cargo.sh test --workspace + popd + + #pushd simple-raytracer + #echo "[BENCH COMPILE] ebobby/simple-raytracer" + #hyperfine --runs "${RUN_RUNS:-10}" --warmup 1 --prepare "cargo clean" \ + #"RUSTC=rustc RUSTFLAGS='' cargo build" \ + #"../cargo.sh build" + + #echo "[BENCH RUN] ebobby/simple-raytracer" + #cp ./target/debug/main ./raytracer_cg_gcc + #hyperfine --runs "${RUN_RUNS:-10}" ./raytracer_cg_llvm ./raytracer_cg_gcc + #popd + + pushd regex + echo "[TEST] rust-lang/regex example shootout-regex-dna" + cargo clean + export CG_RUSTFLAGS="--cap-lints warn" # newer aho_corasick versions throw a deprecation warning + # Make sure `[codegen mono items] start` doesn't poison the diff + ../cargo.sh build --example shootout-regex-dna + cat examples/regexdna-input.txt \ + | ../cargo.sh run --example shootout-regex-dna \ + | grep -v "Spawned thread" > res.txt + diff -u res.txt examples/regexdna-output.txt + + echo "[TEST] rust-lang/regex tests" + ../cargo.sh test --tests -- --exclude-should-panic --test-threads 1 -Zunstable-options -q + popd +} + function test_rustc() { echo echo "[TEST] rust-lang/rust" @@ -165,23 +167,7 @@ function test_rustc() { git checkout $(rustc -V | cut -d' ' -f3 | tr -d '(') export RUSTFLAGS= - git apply - <( - cfg: Option<&str>, - ) -> test::TestDesc { - let mut ignore = false; - #[cfg(not(bootstrap))] -- let ignore_message: Option = None; -+ let ignore_message: Option<&str> = None; - let mut should_fail = false; - - let rustc_has_profiler_support = env::var_os("RUSTC_PROFILER_SUPPORT").is_some(); - -EOF + git apply ../rustc_patches/compile_test.patch || true rm config.toml || true @@ -205,7 +191,7 @@ EOF git checkout -- src/test/ui/issues/auxiliary/issue-3136-a.rs # contains //~ERROR, but shouldn't be removed - rm -r src/test/ui/{abi*,extern/,panic-runtime/,panics/,unsized-locals/,proc-macro/,threads-sendsync/,thinlto/,simd*,borrowck/,test*,*lto*.rs} || true + rm -r src/test/ui/{abi*,extern/,panic-runtime/,panics/,unsized-locals/,proc-macro/,threads-sendsync/,thinlto/,borrowck/,test*,*lto*.rs} || true for test in $(rg --files-with-matches "catch_unwind|should_panic|thread|lto" src/test/ui); do rm $test done @@ -239,6 +225,10 @@ case $1 in std_tests ;; + "--extended-tests") + extended_sysroot_tests + ;; + "--build-sysroot") build_sysroot ;; @@ -249,6 +239,7 @@ case $1 in build_sysroot std_tests test_libcore + extended_sysroot_tests test_rustc ;; esac