diff --git a/.github/workflows/stdarch.yml b/.github/workflows/stdarch.yml index 42fb35e738f..556c6444833 100644 --- a/.github/workflows/stdarch.yml +++ b/.github/workflows/stdarch.yml @@ -20,9 +20,9 @@ jobs: matrix: libgccjit_version: - { gcc: "libgccjit.so", artifacts_branch: "master" } - commands: [ - "--test-successful-rustc --nb-parts 2 --current-part 0", - "--test-successful-rustc --nb-parts 2 --current-part 1", + cargo_runner: [ + "sde -future -rtm_mode full --", + "", ] steps: @@ -36,6 +36,20 @@ jobs: - name: Install packages run: sudo apt-get install ninja-build ripgrep + - name: Install Intel Software Development Emulator + if: ${{ matrix.cargo_runner }} + run: | + mkdir intel-sde + cd intel-sde + dir=sde-external-9.14.0-2022-10-25-lin + file=$dir.tar.xz + wget https://downloadmirror.intel.com/751535/$file + tar xvf $file + sudo mkdir /usr/share/intel-sde + sudo cp -r $dir/* /usr/share/intel-sde + sudo ln -s /usr/share/intel-sde/sde /usr/bin/sde + sudo ln -s /usr/share/intel-sde/sde64 /usr/bin/sde64 + - name: Download artifact uses: dawidd6/action-download-artifact@v2 with: @@ -91,6 +105,10 @@ jobs: ./prepare_build.sh ./build.sh --release --release-sysroot cargo test + + - name: Clean + if: ${{ !matrix.cargo_runner }} + run: | ./clean_all.sh - name: Prepare dependencies @@ -107,10 +125,18 @@ jobs: args: --release - name: Run tests + if: ${{ !matrix.cargo_runner }} run: | ./test.sh --release --clean --release-sysroot --build-sysroot --mini-tests --std-tests --test-libcore - name: Run stdarch tests + if: ${{ !matrix.cargo_runner }} run: | cd build_sysroot/sysroot_src/library/stdarch/ CHANNEL=release TARGET=x86_64-unknown-linux-gnu ../../../../cargo.sh test + + - name: Run stdarch tests + if: ${{ matrix.cargo_runner }} + run: | + cd build_sysroot/sysroot_src/library/stdarch/ + STDARCH_TEST_EVERYTHING=1 CHANNEL=release CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="${{ matrix.cargo_runner }}" TARGET=x86_64-unknown-linux-gnu ../../../../cargo.sh test -- --skip rtm --skip tbm --skip sse4a diff --git a/.gitignore b/.gitignore index 12ed5667563..c5ed7de200c 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ benchmarks tools/llvm-project tools/llvmint tools/llvmint-2 +# The `llvm` folder is generated by the `tools/generate_intrinsics.py` script to update intrinsics. +llvm diff --git a/Cargo.lock b/Cargo.lock index 1c8754bf675..3c5357eec10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,9 +217,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.10.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "tempfile" diff --git a/Cargo.toml b/Cargo.toml index 81066d9ce1f..7285e3eaf51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ master = ["gccjit/master"] gccjit = { git = "https://github.com/antoyo/gccjit.rs" } # Local copy. -#gccjit = { path = "../gccjit.rs" } +# gccjit = { path = "../gccjit.rs" } smallvec = { version = "1.6.1", features = ["union", "may_dangle"] } diff --git a/example/mini_core.rs b/example/mini_core.rs index f0347db00cc..835419efc3a 100644 --- a/example/mini_core.rs +++ b/example/mini_core.rs @@ -489,7 +489,7 @@ impl DispatchFromDyn> for Unique where T: Uns #[lang = "owned_box"] pub struct Box(Unique, A); -impl, U: ?Sized> CoerceUnsized> for Box {} +impl, U: ?Sized, A: Allocator> CoerceUnsized> for Box {} impl Box { pub fn new(val: T) -> Box { diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index cff26077740..b93d6859706 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -168,6 +168,9 @@ fn main() { world as Box; assert_eq!(intrinsics::bitreverse(0b10101000u8), 0b00010101u8); + assert_eq!(intrinsics::bitreverse(0xddccu16), 0x33bbu16); + assert_eq!(intrinsics::bitreverse(0xffee_ddccu32), 0x33bb77ffu32); + assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddccu64), 0x33bb77ff1e6a2c48u64); assert_eq!(intrinsics::bswap(0xabu8), 0xabu8); assert_eq!(intrinsics::bswap(0xddccu16), 0xccddu16); diff --git a/example/std_example.rs b/example/std_example.rs index 5c171c49fd1..18f2ddcde12 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -58,6 +58,7 @@ fn main() { assert_eq!(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128.leading_zeros(), 26); assert_eq!(0b0000000000000000000000000010000000000000000000000000000000000000_0000000000000000000000000000000000001000000000000000000010000000u128.trailing_zeros(), 7); + assert_eq!(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128.reverse_bits(), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128); let _d = 0i128.checked_div(2i128); let _d = 0u128.checked_div(2u128); diff --git a/locales/en-US.ftl b/locales/en-US.ftl new file mode 100644 index 00000000000..2181d49eeef --- /dev/null +++ b/locales/en-US.ftl @@ -0,0 +1,65 @@ +codegen_gcc_unwinding_inline_asm = + GCC backend does not support unwinding from inline asm + +codegen_gcc_lto_not_supported = + LTO is not supported. You may get a linker error. + +codegen_gcc_invalid_monomorphization_basic_integer = + invalid monomorphization of `{$name}` intrinsic: expected basic integer type, found `{$ty}` + +codegen_gcc_invalid_monomorphization_invalid_float_vector = + invalid monomorphization of `{$name}` intrinsic: unsupported element type `{$elem_ty}` of floating-point vector `{$vec_ty}` + +codegen_gcc_invalid_monomorphization_not_float = + invalid monomorphization of `{$name}` intrinsic: `{$ty}` is not a floating-point type + +codegen_gcc_invalid_monomorphization_unrecognized = + invalid monomorphization of `{$name}` intrinsic: unrecognized intrinsic `{$name}` + +codegen_gcc_invalid_monomorphization_expected_signed_unsigned = + invalid monomorphization of `{$name}` intrinsic: expected element type `{$elem_ty}` of vector type `{$vec_ty}` to be a signed or unsigned integer type + +codegen_gcc_invalid_monomorphization_unsupported_element = + invalid monomorphization of `{$name}` intrinsic: unsupported {$name} from `{$in_ty}` with element `{$elem_ty}` to `{$ret_ty}` + +codegen_gcc_invalid_monomorphization_invalid_bitmask = + invalid monomorphization of `{$name}` intrinsic: invalid bitmask `{$ty}`, expected `u{$expected_int_bits}` or `[u8; {$expected_bytes}]` + +codegen_gcc_invalid_monomorphization_simd_shuffle = + invalid monomorphization of `{$name}` intrinsic: simd_shuffle index must be an array of `u32`, got `{$ty}` + +codegen_gcc_invalid_monomorphization_expected_simd = + invalid monomorphization of `{$name}` intrinsic: expected SIMD {$expected_ty} type, found non-SIMD `{$found_ty}` + +codegen_gcc_invalid_monomorphization_mask_type = + invalid monomorphization of `{$name}` intrinsic: mask element type is `{$ty}`, expected `i_` + +codegen_gcc_invalid_monomorphization_return_length = + invalid monomorphization of `{$name}` intrinsic: expected return type of length {$in_len}, found `{$ret_ty}` with length {$out_len} + +codegen_gcc_invalid_monomorphization_return_length_input_type = + invalid monomorphization of `{$name}` intrinsic: expected return type with length {$in_len} (same as input type `{$in_ty}`), found `{$ret_ty}` with length {$out_len} + +codegen_gcc_invalid_monomorphization_return_element = + invalid monomorphization of `{$name}` intrinsic: expected return element type `{$in_elem}` (element of input `{$in_ty}`), found `{$ret_ty}` with element type `{$out_ty}` + +codegen_gcc_invalid_monomorphization_return_type = + invalid monomorphization of `{$name}` intrinsic: expected return type `{$in_elem}` (element of input `{$in_ty}`), found `{$ret_ty}` + +codegen_gcc_invalid_monomorphization_inserted_type = + invalid monomorphization of `{$name}` intrinsic: expected inserted type `{$in_elem}` (element of input `{$in_ty}`), found `{$out_ty}` + +codegen_gcc_invalid_monomorphization_return_integer_type = + invalid monomorphization of `{$name}` intrinsic: expected return type with integer elements, found `{$ret_ty}` with non-integer `{$out_ty}` + +codegen_gcc_invalid_monomorphization_mismatched_lengths = + invalid monomorphization of `{$name}` intrinsic: mismatched lengths: mask length `{$m_len}` != other vector length `{$v_len}` + +codegen_gcc_invalid_monomorphization_unsupported_cast = + invalid monomorphization of `{$name}` intrinsic: unsupported cast from `{$in_ty}` with element `{$in_elem}` to `{$ret_ty}` with element `{$out_elem}` + +codegen_gcc_invalid_monomorphization_unsupported_operation = + invalid monomorphization of `{$name}` intrinsic: unsupported operation on `{$in_ty}` with element `{$in_elem}` + +codegen_gcc_invalid_minimum_alignment = + invalid minimum global alignment: {$err} diff --git a/src/asm.rs b/src/asm.rs index 250aa79f8d6..be7ae603ca6 100644 --- a/src/asm.rs +++ b/src/asm.rs @@ -502,49 +502,49 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { let builtin_unreachable = self.context.get_builtin_function("__builtin_unreachable"); let builtin_unreachable: RValue<'gcc> = unsafe { std::mem::transmute(builtin_unreachable) }; self.call(self.type_void(), None, None, builtin_unreachable, &[], None); - } - - // Write results to outputs. - // - // We need to do this because: - // 1. Turning `PlaceRef` into `RValue` is error-prone and has nasty edge cases - // (especially with current `rustc_backend_ssa` API). - // 2. Not every output operand has an `out_place`, and it's required by `add_output_operand`. - // - // Instead, we generate a temporary output variable for each output operand, and then this loop, - // generates `out_place = tmp_var;` assignments if out_place exists. - for op in &outputs { - if let Some(place) = op.out_place { - OperandValue::Immediate(op.tmp_var.to_rvalue()).store(self, place); - } - } - } + + // Write results to outputs. + // + // We need to do this because: + // 1. Turning `PlaceRef` into `RValue` is error-prone and has nasty edge cases + // (especially with current `rustc_backend_ssa` API). + // 2. Not every output operand has an `out_place`, and it's required by `add_output_operand`. + // + // Instead, we generate a temporary output variable for each output operand, and then this loop, + // generates `out_place = tmp_var;` assignments if out_place exists. + for op in &outputs { + if let Some(place) = op.out_place { + OperandValue::Immediate(op.tmp_var.to_rvalue()).store(self, place); + } + } + +} } fn estimate_template_length(template: &[InlineAsmTemplatePiece], constants_len: usize, att_dialect: bool) -> usize { - let len: usize = template.iter().map(|piece| { - match *piece { - InlineAsmTemplatePiece::String(ref string) => { - string.len() - } - InlineAsmTemplatePiece::Placeholder { .. } => { - // '%' + 1 char modifier + 1 char index - 3 - } +let len: usize = template.iter().map(|piece| { + match *piece { + InlineAsmTemplatePiece::String(ref string) => { + string.len() + } + InlineAsmTemplatePiece::Placeholder { .. } => { + // '%' + 1 char modifier + 1 char index + 3 } - }) - .sum(); - - // increase it by 5% to account for possible '%' signs that'll be duplicated - // I pulled the number out of blue, but should be fair enough - // as the upper bound - let mut res = (len as f32 * 1.05) as usize + constants_len; - - if att_dialect { - res += INTEL_SYNTAX_INS.len() + ATT_SYNTAX_INS.len(); } - res +}) +.sum(); + +// increase it by 5% to account for possible '%' signs that'll be duplicated +// I pulled the number out of blue, but should be fair enough +// as the upper bound +let mut res = (len as f32 * 1.05) as usize + constants_len; + +if att_dialect { + res += INTEL_SYNTAX_INS.len() + ATT_SYNTAX_INS.len(); +} +res } /// Converts a register class to a GCC constraint code. diff --git a/src/attributes.rs b/src/attributes.rs index db841b1b524..eb0cce19b85 100644 --- a/src/attributes.rs +++ b/src/attributes.rs @@ -2,9 +2,13 @@ use gccjit::FnAttribute; use gccjit::Function; use rustc_attr::InstructionSetAttr; +#[cfg(feature="master")] +use rustc_attr::InlineAttr; use rustc_codegen_ssa::target_features::tied_target_features; use rustc_data_structures::fx::FxHashMap; use rustc_middle::ty; +#[cfg(feature="master")] +use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_session::Session; use rustc_span::symbol::sym; use smallvec::{smallvec, SmallVec}; @@ -67,6 +71,24 @@ fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> { } } +/// Get GCC attribute for the provided inline heuristic. +#[cfg(feature="master")] +#[inline] +fn inline_attr<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, inline: InlineAttr) -> Option> { + match inline { + InlineAttr::Hint => Some(FnAttribute::Inline), + InlineAttr::Always => Some(FnAttribute::AlwaysInline), + InlineAttr::Never => { + if cx.sess().target.arch != "amdgpu" { + Some(FnAttribute::NoInline) + } else { + None + } + } + InlineAttr::None => None, + } +} + /// Composite function which sets GCC attributes for function depending on its AST (`#[attribute]`) /// attributes. pub fn from_fn_attrs<'gcc, 'tcx>( @@ -77,6 +99,23 @@ pub fn from_fn_attrs<'gcc, 'tcx>( ) { let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id()); + #[cfg(feature="master")] + { + let inline = + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) { + InlineAttr::Never + } + else if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) { + InlineAttr::Hint + } + else { + codegen_fn_attrs.inline + }; + if let Some(attr) = inline_attr(cx, inline) { + func.add_attribute(attr); + } + } + let function_features = codegen_fn_attrs.target_features.iter().map(|features| features.as_str()).collect::>(); diff --git a/src/common.rs b/src/common.rs index bad87db4732..7fa986e2737 100644 --- a/src/common.rs +++ b/src/common.rs @@ -254,7 +254,7 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> { // SIMD builtins require a constant value. self.bitcast_if_needed(value, typ) } - + fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value { self.context.new_array_access(None, base_addr, self.const_usize(offset.bytes())).get_address(None) } diff --git a/src/consts.rs b/src/consts.rs index 33e3b0baa92..d8a1fd315c0 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -1,5 +1,5 @@ #[cfg(feature = "master")] -use gccjit::FnAttribute; +use gccjit::{FnAttribute, VarAttribute, Visibility}; use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue}; use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods}; use rustc_middle::span_bug; @@ -234,7 +234,8 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { ); if !self.tcx.is_reachable_non_generic(def_id) { - // TODO(antoyo): set visibility. + #[cfg(feature = "master")] + global.add_attribute(VarAttribute::Visibility(Visibility::Hidden)); } global diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs index 8a4559355ea..438eab78943 100644 --- a/src/intrinsic/archs.rs +++ b/src/intrinsic/archs.rs @@ -2967,10 +2967,6 @@ match name { "llvm.nvvm.clz.ll" => "__nvvm_clz_ll", "llvm.nvvm.cos.approx.f" => "__nvvm_cos_approx_f", "llvm.nvvm.cos.approx.ftz.f" => "__nvvm_cos_approx_ftz_f", - "llvm.nvvm.cp.async.ca.shared.global.16" => "__nvvm_cp_async_ca_shared_global_16", - "llvm.nvvm.cp.async.ca.shared.global.4" => "__nvvm_cp_async_ca_shared_global_4", - "llvm.nvvm.cp.async.ca.shared.global.8" => "__nvvm_cp_async_ca_shared_global_8", - "llvm.nvvm.cp.async.cg.shared.global.16" => "__nvvm_cp_async_cg_shared_global_16", "llvm.nvvm.cp.async.commit.group" => "__nvvm_cp_async_commit_group", "llvm.nvvm.cp.async.mbarrier.arrive" => "__nvvm_cp_async_mbarrier_arrive", "llvm.nvvm.cp.async.mbarrier.arrive.noinc" => "__nvvm_cp_async_mbarrier_arrive_noinc", @@ -3086,18 +3082,8 @@ match name { "llvm.nvvm.fma.rn.f16" => "__nvvm_fma_rn_f16", "llvm.nvvm.fma.rn.f16x2" => "__nvvm_fma_rn_f16x2", "llvm.nvvm.fma.rn.ftz.f" => "__nvvm_fma_rn_ftz_f", - "llvm.nvvm.fma.rn.ftz.f16" => "__nvvm_fma_rn_ftz_f16", - "llvm.nvvm.fma.rn.ftz.f16x2" => "__nvvm_fma_rn_ftz_f16x2", - "llvm.nvvm.fma.rn.ftz.relu.f16" => "__nvvm_fma_rn_ftz_relu_f16", - "llvm.nvvm.fma.rn.ftz.relu.f16x2" => "__nvvm_fma_rn_ftz_relu_f16x2", - "llvm.nvvm.fma.rn.ftz.sat.f16" => "__nvvm_fma_rn_ftz_sat_f16", - "llvm.nvvm.fma.rn.ftz.sat.f16x2" => "__nvvm_fma_rn_ftz_sat_f16x2", "llvm.nvvm.fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16", "llvm.nvvm.fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2", - "llvm.nvvm.fma.rn.relu.f16" => "__nvvm_fma_rn_relu_f16", - "llvm.nvvm.fma.rn.relu.f16x2" => "__nvvm_fma_rn_relu_f16x2", - "llvm.nvvm.fma.rn.sat.f16" => "__nvvm_fma_rn_sat_f16", - "llvm.nvvm.fma.rn.sat.f16x2" => "__nvvm_fma_rn_sat_f16x2", "llvm.nvvm.fma.rp.d" => "__nvvm_fma_rp_d", "llvm.nvvm.fma.rp.f" => "__nvvm_fma_rp_f", "llvm.nvvm.fma.rp.ftz.f" => "__nvvm_fma_rp_ftz_f", @@ -3111,32 +3097,18 @@ match name { "llvm.nvvm.fmax.f16" => "__nvvm_fmax_f16", "llvm.nvvm.fmax.f16x2" => "__nvvm_fmax_f16x2", "llvm.nvvm.fmax.ftz.f" => "__nvvm_fmax_ftz_f", - "llvm.nvvm.fmax.ftz.f16" => "__nvvm_fmax_ftz_f16", - "llvm.nvvm.fmax.ftz.f16x2" => "__nvvm_fmax_ftz_f16x2", "llvm.nvvm.fmax.ftz.nan.f" => "__nvvm_fmax_ftz_nan_f", - "llvm.nvvm.fmax.ftz.nan.f16" => "__nvvm_fmax_ftz_nan_f16", - "llvm.nvvm.fmax.ftz.nan.f16x2" => "__nvvm_fmax_ftz_nan_f16x2", "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f" => "__nvvm_fmax_ftz_nan_xorsign_abs_f", - "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16", - "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16x2", "llvm.nvvm.fmax.ftz.xorsign.abs.f" => "__nvvm_fmax_ftz_xorsign_abs_f", - "llvm.nvvm.fmax.ftz.xorsign.abs.f16" => "__nvvm_fmax_ftz_xorsign_abs_f16", - "llvm.nvvm.fmax.ftz.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_xorsign_abs_f16x2", "llvm.nvvm.fmax.nan.bf16" => "__nvvm_fmax_nan_bf16", "llvm.nvvm.fmax.nan.bf16x2" => "__nvvm_fmax_nan_bf16x2", "llvm.nvvm.fmax.nan.f" => "__nvvm_fmax_nan_f", - "llvm.nvvm.fmax.nan.f16" => "__nvvm_fmax_nan_f16", - "llvm.nvvm.fmax.nan.f16x2" => "__nvvm_fmax_nan_f16x2", "llvm.nvvm.fmax.nan.xorsign.abs.bf16" => "__nvvm_fmax_nan_xorsign_abs_bf16", "llvm.nvvm.fmax.nan.xorsign.abs.bf16x2" => "__nvvm_fmax_nan_xorsign_abs_bf16x2", "llvm.nvvm.fmax.nan.xorsign.abs.f" => "__nvvm_fmax_nan_xorsign_abs_f", - "llvm.nvvm.fmax.nan.xorsign.abs.f16" => "__nvvm_fmax_nan_xorsign_abs_f16", - "llvm.nvvm.fmax.nan.xorsign.abs.f16x2" => "__nvvm_fmax_nan_xorsign_abs_f16x2", "llvm.nvvm.fmax.xorsign.abs.bf16" => "__nvvm_fmax_xorsign_abs_bf16", "llvm.nvvm.fmax.xorsign.abs.bf16x2" => "__nvvm_fmax_xorsign_abs_bf16x2", "llvm.nvvm.fmax.xorsign.abs.f" => "__nvvm_fmax_xorsign_abs_f", - "llvm.nvvm.fmax.xorsign.abs.f16" => "__nvvm_fmax_xorsign_abs_f16", - "llvm.nvvm.fmax.xorsign.abs.f16x2" => "__nvvm_fmax_xorsign_abs_f16x2", "llvm.nvvm.fmin.bf16" => "__nvvm_fmin_bf16", "llvm.nvvm.fmin.bf16x2" => "__nvvm_fmin_bf16x2", "llvm.nvvm.fmin.d" => "__nvvm_fmin_d", @@ -3144,32 +3116,18 @@ match name { "llvm.nvvm.fmin.f16" => "__nvvm_fmin_f16", "llvm.nvvm.fmin.f16x2" => "__nvvm_fmin_f16x2", "llvm.nvvm.fmin.ftz.f" => "__nvvm_fmin_ftz_f", - "llvm.nvvm.fmin.ftz.f16" => "__nvvm_fmin_ftz_f16", - "llvm.nvvm.fmin.ftz.f16x2" => "__nvvm_fmin_ftz_f16x2", "llvm.nvvm.fmin.ftz.nan.f" => "__nvvm_fmin_ftz_nan_f", - "llvm.nvvm.fmin.ftz.nan.f16" => "__nvvm_fmin_ftz_nan_f16", - "llvm.nvvm.fmin.ftz.nan.f16x2" => "__nvvm_fmin_ftz_nan_f16x2", "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f" => "__nvvm_fmin_ftz_nan_xorsign_abs_f", - "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16", - "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16x2", "llvm.nvvm.fmin.ftz.xorsign.abs.f" => "__nvvm_fmin_ftz_xorsign_abs_f", - "llvm.nvvm.fmin.ftz.xorsign.abs.f16" => "__nvvm_fmin_ftz_xorsign_abs_f16", - "llvm.nvvm.fmin.ftz.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_xorsign_abs_f16x2", "llvm.nvvm.fmin.nan.bf16" => "__nvvm_fmin_nan_bf16", "llvm.nvvm.fmin.nan.bf16x2" => "__nvvm_fmin_nan_bf16x2", "llvm.nvvm.fmin.nan.f" => "__nvvm_fmin_nan_f", - "llvm.nvvm.fmin.nan.f16" => "__nvvm_fmin_nan_f16", - "llvm.nvvm.fmin.nan.f16x2" => "__nvvm_fmin_nan_f16x2", "llvm.nvvm.fmin.nan.xorsign.abs.bf16" => "__nvvm_fmin_nan_xorsign_abs_bf16", "llvm.nvvm.fmin.nan.xorsign.abs.bf16x2" => "__nvvm_fmin_nan_xorsign_abs_bf16x2", "llvm.nvvm.fmin.nan.xorsign.abs.f" => "__nvvm_fmin_nan_xorsign_abs_f", - "llvm.nvvm.fmin.nan.xorsign.abs.f16" => "__nvvm_fmin_nan_xorsign_abs_f16", - "llvm.nvvm.fmin.nan.xorsign.abs.f16x2" => "__nvvm_fmin_nan_xorsign_abs_f16x2", "llvm.nvvm.fmin.xorsign.abs.bf16" => "__nvvm_fmin_xorsign_abs_bf16", "llvm.nvvm.fmin.xorsign.abs.bf16x2" => "__nvvm_fmin_xorsign_abs_bf16x2", "llvm.nvvm.fmin.xorsign.abs.f" => "__nvvm_fmin_xorsign_abs_f", - "llvm.nvvm.fmin.xorsign.abs.f16" => "__nvvm_fmin_xorsign_abs_f16", - "llvm.nvvm.fmin.xorsign.abs.f16x2" => "__nvvm_fmin_xorsign_abs_f16x2", "llvm.nvvm.fns" => "__nvvm_fns", "llvm.nvvm.h2f" => "__nvvm_h2f", "llvm.nvvm.i2d.rm" => "__nvvm_i2d_rm", @@ -7895,6 +7853,10 @@ match name { "llvm.x86.subborrow.u64" => "__builtin_ia32_subborrow_u64", "llvm.x86.tbm.bextri.u32" => "__builtin_ia32_bextri_u32", "llvm.x86.tbm.bextri.u64" => "__builtin_ia32_bextri_u64", + "llvm.x86.tcmmimfp16ps" => "__builtin_ia32_tcmmimfp16ps", + "llvm.x86.tcmmimfp16ps.internal" => "__builtin_ia32_tcmmimfp16ps_internal", + "llvm.x86.tcmmrlfp16ps" => "__builtin_ia32_tcmmrlfp16ps", + "llvm.x86.tcmmrlfp16ps.internal" => "__builtin_ia32_tcmmrlfp16ps_internal", "llvm.x86.tdpbf16ps" => "__builtin_ia32_tdpbf16ps", "llvm.x86.tdpbf16ps.internal" => "__builtin_ia32_tdpbf16ps_internal", "llvm.x86.tdpbssd" => "__builtin_ia32_tdpbssd", diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 0edec566be3..f28348380d7 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -313,6 +313,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let new_args = args.to_vec(); args = vec![new_args[1], new_args[0], new_args[2], new_args[3], new_args[4]].into(); }, + "__builtin_ia32_vpshrdv_v8di" | "__builtin_ia32_vpshrdv_v4di" | "__builtin_ia32_vpshrdv_v2di" | + "__builtin_ia32_vpshrdv_v16si" | "__builtin_ia32_vpshrdv_v8si" | "__builtin_ia32_vpshrdv_v4si" | + "__builtin_ia32_vpshrdv_v32hi" | "__builtin_ia32_vpshrdv_v16hi" | "__builtin_ia32_vpshrdv_v8hi" => { + // The first two arguments are reversed, compared to LLVM. + let new_args = args.to_vec(); + args = vec![new_args[1], new_args[0], new_args[2]].into(); + }, _ => (), } } diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index 60176874747..a31fee39918 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -551,141 +551,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let context = &self.cx.context; let result = match width { - 8 => { + 8 | 16 | 32 | 64 => { + let mask = ((1u128 << width) - 1) as u64; + let (m0, m1, m2) = if width > 16 { + ( + context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64), + context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64), + context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64), + ) + } else { + ( + context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32), + context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32), + context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32), + ) + }; + let one = context.new_rvalue_from_int(typ, 1); + let two = context.new_rvalue_from_int(typ, 2); + let four = context.new_rvalue_from_int(typ, 4); + // First step. - let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0)); - let left = self.lshr(left, context.new_rvalue_from_int(typ, 4)); - let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F)); - let right = self.shl(right, context.new_rvalue_from_int(typ, 4)); + let left = self.lshr(value, one); + let left = self.and(left, m0); + let right = self.and(value, m0); + let right = self.shl(right, one); let step1 = self.or(left, right); // Second step. - let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC)); - let left = self.lshr(left, context.new_rvalue_from_int(typ, 2)); - let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33)); - let right = self.shl(right, context.new_rvalue_from_int(typ, 2)); + let left = self.lshr(step1, two); + let left = self.and(left, m1); + let right = self.and(step1, m1); + let right = self.shl(right, two); let step2 = self.or(left, right); // Third step. - let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA)); - let left = self.lshr(left, context.new_rvalue_from_int(typ, 1)); - let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55)); - let right = self.shl(right, context.new_rvalue_from_int(typ, 1)); - let step3 = self.or(left, right); - - step3 - }, - 16 => { - // First step. - let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 1)); - let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 1)); - let step1 = self.or(left, right); - - // Second step. - let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 2)); - let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 2)); - let step2 = self.or(left, right); - - // Third step. - let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 4)); - let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 4)); + let left = self.lshr(step2, four); + let left = self.and(left, m2); + let right = self.and(step2, m2); + let right = self.shl(right, four); let step3 = self.or(left, right); // Fourth step. - let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 8)); - let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 8)); - let step4 = self.or(left, right); - - step4 - }, - 32 => { - // TODO(antoyo): Refactor with other implementations. - // First step. - let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 1)); - let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 1)); - let step1 = self.or(left, right); - - // Second step. - let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 2)); - let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 2)); - let step2 = self.or(left, right); - - // Third step. - let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 4)); - let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 4)); - let step3 = self.or(left, right); - - // Fourth step. - let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 8)); - let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 8)); - let step4 = self.or(left, right); - - // Fifth step. - let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 16)); - let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 16)); - let step5 = self.or(left, right); - - step5 - }, - 64 => { - // First step. - let left = self.shl(value, context.new_rvalue_from_long(typ, 32)); - let right = self.lshr(value, context.new_rvalue_from_long(typ, 32)); - let step1 = self.or(left, right); - - // Second step. - let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 15)); - let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead? - let right = self.lshr(right, context.new_rvalue_from_long(typ, 17)); - let step2 = self.or(left, right); - - // Third step. - let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10)); - let left = self.xor(step2, left); - let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F)); - - let left = self.shl(temp, context.new_rvalue_from_long(typ, 10)); - let left = self.or(temp, left); - let step3 = self.xor(left, step2); - - // Fourth step. - let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4)); - let left = self.xor(step3, left); - let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421)); - - let left = self.shl(temp, context.new_rvalue_from_long(typ, 4)); - let left = self.or(temp, left); - let step4 = self.xor(left, step3); - - // Fifth step. - let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2)); - let left = self.xor(step4, left); - let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842)); - - let left = self.shl(temp, context.new_rvalue_from_long(typ, 2)); - let left = self.or(temp, left); - let step5 = self.xor(left, step4); - - step5 + if width == 8 { + step3 + } else { + self.gcc_bswap(step3, width) + } }, 128 => { // TODO(antoyo): find a more efficient implementation? diff --git a/src/lib.rs b/src/lib.rs index ea013c4428c..204741ca3c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,7 +77,7 @@ use rustc_codegen_ssa::target_features::supported_target_features; use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods}; use rustc_data_structures::fx::FxIndexMap; use rustc_errors::{DiagnosticMessage, ErrorGuaranteed, Handler, SubdiagnosticMessage}; -use rustc_fluent_macro::fluent_messages; +use rustc_macros::fluent_messages; use rustc_metadata::EncodedMetadata; use rustc_middle::dep_graph::{WorkProduct, WorkProductId}; use rustc_middle::query::Providers; @@ -111,6 +111,8 @@ impl CodegenBackend for GccCodegenBackend { } fn init(&self, sess: &Session) { + #[cfg(feature="master")] + gccjit::set_global_personality_function_name(b"rust_eh_personality\0"); if sess.lto() != Lto::No { sess.emit_warning(LTONotSupported {}); }