Merge branch 'master' into sync_from_rust_2023_06_11
This commit is contained in:
commit
3d4c59ed75
32
.github/workflows/stdarch.yml
vendored
32
.github/workflows/stdarch.yml
vendored
@ -20,9 +20,9 @@ jobs:
|
||||
matrix:
|
||||
libgccjit_version:
|
||||
- { gcc: "libgccjit.so", artifacts_branch: "master" }
|
||||
commands: [
|
||||
"--test-successful-rustc --nb-parts 2 --current-part 0",
|
||||
"--test-successful-rustc --nb-parts 2 --current-part 1",
|
||||
cargo_runner: [
|
||||
"sde -future -rtm_mode full --",
|
||||
"",
|
||||
]
|
||||
|
||||
steps:
|
||||
@ -36,6 +36,20 @@ jobs:
|
||||
- name: Install packages
|
||||
run: sudo apt-get install ninja-build ripgrep
|
||||
|
||||
- name: Install Intel Software Development Emulator
|
||||
if: ${{ matrix.cargo_runner }}
|
||||
run: |
|
||||
mkdir intel-sde
|
||||
cd intel-sde
|
||||
dir=sde-external-9.14.0-2022-10-25-lin
|
||||
file=$dir.tar.xz
|
||||
wget https://downloadmirror.intel.com/751535/$file
|
||||
tar xvf $file
|
||||
sudo mkdir /usr/share/intel-sde
|
||||
sudo cp -r $dir/* /usr/share/intel-sde
|
||||
sudo ln -s /usr/share/intel-sde/sde /usr/bin/sde
|
||||
sudo ln -s /usr/share/intel-sde/sde64 /usr/bin/sde64
|
||||
|
||||
- name: Download artifact
|
||||
uses: dawidd6/action-download-artifact@v2
|
||||
with:
|
||||
@ -91,6 +105,10 @@ jobs:
|
||||
./prepare_build.sh
|
||||
./build.sh --release --release-sysroot
|
||||
cargo test
|
||||
|
||||
- name: Clean
|
||||
if: ${{ !matrix.cargo_runner }}
|
||||
run: |
|
||||
./clean_all.sh
|
||||
|
||||
- name: Prepare dependencies
|
||||
@ -107,10 +125,18 @@ jobs:
|
||||
args: --release
|
||||
|
||||
- name: Run tests
|
||||
if: ${{ !matrix.cargo_runner }}
|
||||
run: |
|
||||
./test.sh --release --clean --release-sysroot --build-sysroot --mini-tests --std-tests --test-libcore
|
||||
|
||||
- name: Run stdarch tests
|
||||
if: ${{ !matrix.cargo_runner }}
|
||||
run: |
|
||||
cd build_sysroot/sysroot_src/library/stdarch/
|
||||
CHANNEL=release TARGET=x86_64-unknown-linux-gnu ../../../../cargo.sh test
|
||||
|
||||
- name: Run stdarch tests
|
||||
if: ${{ matrix.cargo_runner }}
|
||||
run: |
|
||||
cd build_sysroot/sysroot_src/library/stdarch/
|
||||
STDARCH_TEST_EVERYTHING=1 CHANNEL=release CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="${{ matrix.cargo_runner }}" TARGET=x86_64-unknown-linux-gnu ../../../../cargo.sh test -- --skip rtm --skip tbm --skip sse4a
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -23,3 +23,5 @@ benchmarks
|
||||
tools/llvm-project
|
||||
tools/llvmint
|
||||
tools/llvmint-2
|
||||
# The `llvm` folder is generated by the `tools/generate_intrinsics.py` script to update intrinsics.
|
||||
llvm
|
||||
|
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -217,9 +217,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.10.0"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
||||
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
|
@ -25,7 +25,7 @@ master = ["gccjit/master"]
|
||||
gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
|
||||
|
||||
# Local copy.
|
||||
#gccjit = { path = "../gccjit.rs" }
|
||||
# gccjit = { path = "../gccjit.rs" }
|
||||
|
||||
smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }
|
||||
|
||||
|
@ -489,7 +489,7 @@ impl<T: ?Sized, U: ?Sized> DispatchFromDyn<Unique<U>> for Unique<T> where T: Uns
|
||||
#[lang = "owned_box"]
|
||||
pub struct Box<T: ?Sized, A: Allocator = Global>(Unique<T>, A);
|
||||
|
||||
impl<T: ?Sized + Unsize<U>, U: ?Sized> CoerceUnsized<Box<U>> for Box<T> {}
|
||||
impl<T: ?Sized + Unsize<U>, U: ?Sized, A: Allocator> CoerceUnsized<Box<U, A>> for Box<T, A> {}
|
||||
|
||||
impl<T> Box<T> {
|
||||
pub fn new(val: T) -> Box<T> {
|
||||
|
@ -168,6 +168,9 @@ fn main() {
|
||||
world as Box<dyn SomeTrait>;
|
||||
|
||||
assert_eq!(intrinsics::bitreverse(0b10101000u8), 0b00010101u8);
|
||||
assert_eq!(intrinsics::bitreverse(0xddccu16), 0x33bbu16);
|
||||
assert_eq!(intrinsics::bitreverse(0xffee_ddccu32), 0x33bb77ffu32);
|
||||
assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddccu64), 0x33bb77ff1e6a2c48u64);
|
||||
|
||||
assert_eq!(intrinsics::bswap(0xabu8), 0xabu8);
|
||||
assert_eq!(intrinsics::bswap(0xddccu16), 0xccddu16);
|
||||
|
@ -58,6 +58,7 @@ fn main() {
|
||||
|
||||
assert_eq!(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128.leading_zeros(), 26);
|
||||
assert_eq!(0b0000000000000000000000000010000000000000000000000000000000000000_0000000000000000000000000000000000001000000000000000000010000000u128.trailing_zeros(), 7);
|
||||
assert_eq!(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128.reverse_bits(), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128);
|
||||
|
||||
let _d = 0i128.checked_div(2i128);
|
||||
let _d = 0u128.checked_div(2u128);
|
||||
|
65
locales/en-US.ftl
Normal file
65
locales/en-US.ftl
Normal file
@ -0,0 +1,65 @@
|
||||
codegen_gcc_unwinding_inline_asm =
|
||||
GCC backend does not support unwinding from inline asm
|
||||
|
||||
codegen_gcc_lto_not_supported =
|
||||
LTO is not supported. You may get a linker error.
|
||||
|
||||
codegen_gcc_invalid_monomorphization_basic_integer =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected basic integer type, found `{$ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_invalid_float_vector =
|
||||
invalid monomorphization of `{$name}` intrinsic: unsupported element type `{$elem_ty}` of floating-point vector `{$vec_ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_not_float =
|
||||
invalid monomorphization of `{$name}` intrinsic: `{$ty}` is not a floating-point type
|
||||
|
||||
codegen_gcc_invalid_monomorphization_unrecognized =
|
||||
invalid monomorphization of `{$name}` intrinsic: unrecognized intrinsic `{$name}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_expected_signed_unsigned =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected element type `{$elem_ty}` of vector type `{$vec_ty}` to be a signed or unsigned integer type
|
||||
|
||||
codegen_gcc_invalid_monomorphization_unsupported_element =
|
||||
invalid monomorphization of `{$name}` intrinsic: unsupported {$name} from `{$in_ty}` with element `{$elem_ty}` to `{$ret_ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_invalid_bitmask =
|
||||
invalid monomorphization of `{$name}` intrinsic: invalid bitmask `{$ty}`, expected `u{$expected_int_bits}` or `[u8; {$expected_bytes}]`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_simd_shuffle =
|
||||
invalid monomorphization of `{$name}` intrinsic: simd_shuffle index must be an array of `u32`, got `{$ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_expected_simd =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected SIMD {$expected_ty} type, found non-SIMD `{$found_ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_mask_type =
|
||||
invalid monomorphization of `{$name}` intrinsic: mask element type is `{$ty}`, expected `i_`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_return_length =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected return type of length {$in_len}, found `{$ret_ty}` with length {$out_len}
|
||||
|
||||
codegen_gcc_invalid_monomorphization_return_length_input_type =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected return type with length {$in_len} (same as input type `{$in_ty}`), found `{$ret_ty}` with length {$out_len}
|
||||
|
||||
codegen_gcc_invalid_monomorphization_return_element =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected return element type `{$in_elem}` (element of input `{$in_ty}`), found `{$ret_ty}` with element type `{$out_ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_return_type =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected return type `{$in_elem}` (element of input `{$in_ty}`), found `{$ret_ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_inserted_type =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected inserted type `{$in_elem}` (element of input `{$in_ty}`), found `{$out_ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_return_integer_type =
|
||||
invalid monomorphization of `{$name}` intrinsic: expected return type with integer elements, found `{$ret_ty}` with non-integer `{$out_ty}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_mismatched_lengths =
|
||||
invalid monomorphization of `{$name}` intrinsic: mismatched lengths: mask length `{$m_len}` != other vector length `{$v_len}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_unsupported_cast =
|
||||
invalid monomorphization of `{$name}` intrinsic: unsupported cast from `{$in_ty}` with element `{$in_elem}` to `{$ret_ty}` with element `{$out_elem}`
|
||||
|
||||
codegen_gcc_invalid_monomorphization_unsupported_operation =
|
||||
invalid monomorphization of `{$name}` intrinsic: unsupported operation on `{$in_ty}` with element `{$in_elem}`
|
||||
|
||||
codegen_gcc_invalid_minimum_alignment =
|
||||
invalid minimum global alignment: {$err}
|
74
src/asm.rs
74
src/asm.rs
@ -502,49 +502,49 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
|
||||
let builtin_unreachable = self.context.get_builtin_function("__builtin_unreachable");
|
||||
let builtin_unreachable: RValue<'gcc> = unsafe { std::mem::transmute(builtin_unreachable) };
|
||||
self.call(self.type_void(), None, None, builtin_unreachable, &[], None);
|
||||
}
|
||||
|
||||
// Write results to outputs.
|
||||
//
|
||||
// We need to do this because:
|
||||
// 1. Turning `PlaceRef` into `RValue` is error-prone and has nasty edge cases
|
||||
// (especially with current `rustc_backend_ssa` API).
|
||||
// 2. Not every output operand has an `out_place`, and it's required by `add_output_operand`.
|
||||
//
|
||||
// Instead, we generate a temporary output variable for each output operand, and then this loop,
|
||||
// generates `out_place = tmp_var;` assignments if out_place exists.
|
||||
for op in &outputs {
|
||||
if let Some(place) = op.out_place {
|
||||
OperandValue::Immediate(op.tmp_var.to_rvalue()).store(self, place);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Write results to outputs.
|
||||
//
|
||||
// We need to do this because:
|
||||
// 1. Turning `PlaceRef` into `RValue` is error-prone and has nasty edge cases
|
||||
// (especially with current `rustc_backend_ssa` API).
|
||||
// 2. Not every output operand has an `out_place`, and it's required by `add_output_operand`.
|
||||
//
|
||||
// Instead, we generate a temporary output variable for each output operand, and then this loop,
|
||||
// generates `out_place = tmp_var;` assignments if out_place exists.
|
||||
for op in &outputs {
|
||||
if let Some(place) = op.out_place {
|
||||
OperandValue::Immediate(op.tmp_var.to_rvalue()).store(self, place);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_template_length(template: &[InlineAsmTemplatePiece], constants_len: usize, att_dialect: bool) -> usize {
|
||||
let len: usize = template.iter().map(|piece| {
|
||||
match *piece {
|
||||
InlineAsmTemplatePiece::String(ref string) => {
|
||||
string.len()
|
||||
}
|
||||
InlineAsmTemplatePiece::Placeholder { .. } => {
|
||||
// '%' + 1 char modifier + 1 char index
|
||||
3
|
||||
}
|
||||
let len: usize = template.iter().map(|piece| {
|
||||
match *piece {
|
||||
InlineAsmTemplatePiece::String(ref string) => {
|
||||
string.len()
|
||||
}
|
||||
InlineAsmTemplatePiece::Placeholder { .. } => {
|
||||
// '%' + 1 char modifier + 1 char index
|
||||
3
|
||||
}
|
||||
})
|
||||
.sum();
|
||||
|
||||
// increase it by 5% to account for possible '%' signs that'll be duplicated
|
||||
// I pulled the number out of blue, but should be fair enough
|
||||
// as the upper bound
|
||||
let mut res = (len as f32 * 1.05) as usize + constants_len;
|
||||
|
||||
if att_dialect {
|
||||
res += INTEL_SYNTAX_INS.len() + ATT_SYNTAX_INS.len();
|
||||
}
|
||||
res
|
||||
})
|
||||
.sum();
|
||||
|
||||
// increase it by 5% to account for possible '%' signs that'll be duplicated
|
||||
// I pulled the number out of blue, but should be fair enough
|
||||
// as the upper bound
|
||||
let mut res = (len as f32 * 1.05) as usize + constants_len;
|
||||
|
||||
if att_dialect {
|
||||
res += INTEL_SYNTAX_INS.len() + ATT_SYNTAX_INS.len();
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
/// Converts a register class to a GCC constraint code.
|
||||
|
@ -2,9 +2,13 @@
|
||||
use gccjit::FnAttribute;
|
||||
use gccjit::Function;
|
||||
use rustc_attr::InstructionSetAttr;
|
||||
#[cfg(feature="master")]
|
||||
use rustc_attr::InlineAttr;
|
||||
use rustc_codegen_ssa::target_features::tied_target_features;
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_middle::ty;
|
||||
#[cfg(feature="master")]
|
||||
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
|
||||
use rustc_session::Session;
|
||||
use rustc_span::symbol::sym;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
@ -67,6 +71,24 @@ fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get GCC attribute for the provided inline heuristic.
|
||||
#[cfg(feature="master")]
|
||||
#[inline]
|
||||
fn inline_attr<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, inline: InlineAttr) -> Option<FnAttribute<'gcc>> {
|
||||
match inline {
|
||||
InlineAttr::Hint => Some(FnAttribute::Inline),
|
||||
InlineAttr::Always => Some(FnAttribute::AlwaysInline),
|
||||
InlineAttr::Never => {
|
||||
if cx.sess().target.arch != "amdgpu" {
|
||||
Some(FnAttribute::NoInline)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
InlineAttr::None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Composite function which sets GCC attributes for function depending on its AST (`#[attribute]`)
|
||||
/// attributes.
|
||||
pub fn from_fn_attrs<'gcc, 'tcx>(
|
||||
@ -77,6 +99,23 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
|
||||
) {
|
||||
let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
|
||||
|
||||
#[cfg(feature="master")]
|
||||
{
|
||||
let inline =
|
||||
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
|
||||
InlineAttr::Never
|
||||
}
|
||||
else if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
|
||||
InlineAttr::Hint
|
||||
}
|
||||
else {
|
||||
codegen_fn_attrs.inline
|
||||
};
|
||||
if let Some(attr) = inline_attr(cx, inline) {
|
||||
func.add_attribute(attr);
|
||||
}
|
||||
}
|
||||
|
||||
let function_features =
|
||||
codegen_fn_attrs.target_features.iter().map(|features| features.as_str()).collect::<Vec<&str>>();
|
||||
|
||||
|
@ -254,7 +254,7 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
|
||||
// SIMD builtins require a constant value.
|
||||
self.bitcast_if_needed(value, typ)
|
||||
}
|
||||
|
||||
|
||||
fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value {
|
||||
self.context.new_array_access(None, base_addr, self.const_usize(offset.bytes())).get_address(None)
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
#[cfg(feature = "master")]
|
||||
use gccjit::FnAttribute;
|
||||
use gccjit::{FnAttribute, VarAttribute, Visibility};
|
||||
use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue};
|
||||
use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods};
|
||||
use rustc_middle::span_bug;
|
||||
@ -234,7 +234,8 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
||||
);
|
||||
|
||||
if !self.tcx.is_reachable_non_generic(def_id) {
|
||||
// TODO(antoyo): set visibility.
|
||||
#[cfg(feature = "master")]
|
||||
global.add_attribute(VarAttribute::Visibility(Visibility::Hidden));
|
||||
}
|
||||
|
||||
global
|
||||
|
@ -2967,10 +2967,6 @@ match name {
|
||||
"llvm.nvvm.clz.ll" => "__nvvm_clz_ll",
|
||||
"llvm.nvvm.cos.approx.f" => "__nvvm_cos_approx_f",
|
||||
"llvm.nvvm.cos.approx.ftz.f" => "__nvvm_cos_approx_ftz_f",
|
||||
"llvm.nvvm.cp.async.ca.shared.global.16" => "__nvvm_cp_async_ca_shared_global_16",
|
||||
"llvm.nvvm.cp.async.ca.shared.global.4" => "__nvvm_cp_async_ca_shared_global_4",
|
||||
"llvm.nvvm.cp.async.ca.shared.global.8" => "__nvvm_cp_async_ca_shared_global_8",
|
||||
"llvm.nvvm.cp.async.cg.shared.global.16" => "__nvvm_cp_async_cg_shared_global_16",
|
||||
"llvm.nvvm.cp.async.commit.group" => "__nvvm_cp_async_commit_group",
|
||||
"llvm.nvvm.cp.async.mbarrier.arrive" => "__nvvm_cp_async_mbarrier_arrive",
|
||||
"llvm.nvvm.cp.async.mbarrier.arrive.noinc" => "__nvvm_cp_async_mbarrier_arrive_noinc",
|
||||
@ -3086,18 +3082,8 @@ match name {
|
||||
"llvm.nvvm.fma.rn.f16" => "__nvvm_fma_rn_f16",
|
||||
"llvm.nvvm.fma.rn.f16x2" => "__nvvm_fma_rn_f16x2",
|
||||
"llvm.nvvm.fma.rn.ftz.f" => "__nvvm_fma_rn_ftz_f",
|
||||
"llvm.nvvm.fma.rn.ftz.f16" => "__nvvm_fma_rn_ftz_f16",
|
||||
"llvm.nvvm.fma.rn.ftz.f16x2" => "__nvvm_fma_rn_ftz_f16x2",
|
||||
"llvm.nvvm.fma.rn.ftz.relu.f16" => "__nvvm_fma_rn_ftz_relu_f16",
|
||||
"llvm.nvvm.fma.rn.ftz.relu.f16x2" => "__nvvm_fma_rn_ftz_relu_f16x2",
|
||||
"llvm.nvvm.fma.rn.ftz.sat.f16" => "__nvvm_fma_rn_ftz_sat_f16",
|
||||
"llvm.nvvm.fma.rn.ftz.sat.f16x2" => "__nvvm_fma_rn_ftz_sat_f16x2",
|
||||
"llvm.nvvm.fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16",
|
||||
"llvm.nvvm.fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2",
|
||||
"llvm.nvvm.fma.rn.relu.f16" => "__nvvm_fma_rn_relu_f16",
|
||||
"llvm.nvvm.fma.rn.relu.f16x2" => "__nvvm_fma_rn_relu_f16x2",
|
||||
"llvm.nvvm.fma.rn.sat.f16" => "__nvvm_fma_rn_sat_f16",
|
||||
"llvm.nvvm.fma.rn.sat.f16x2" => "__nvvm_fma_rn_sat_f16x2",
|
||||
"llvm.nvvm.fma.rp.d" => "__nvvm_fma_rp_d",
|
||||
"llvm.nvvm.fma.rp.f" => "__nvvm_fma_rp_f",
|
||||
"llvm.nvvm.fma.rp.ftz.f" => "__nvvm_fma_rp_ftz_f",
|
||||
@ -3111,32 +3097,18 @@ match name {
|
||||
"llvm.nvvm.fmax.f16" => "__nvvm_fmax_f16",
|
||||
"llvm.nvvm.fmax.f16x2" => "__nvvm_fmax_f16x2",
|
||||
"llvm.nvvm.fmax.ftz.f" => "__nvvm_fmax_ftz_f",
|
||||
"llvm.nvvm.fmax.ftz.f16" => "__nvvm_fmax_ftz_f16",
|
||||
"llvm.nvvm.fmax.ftz.f16x2" => "__nvvm_fmax_ftz_f16x2",
|
||||
"llvm.nvvm.fmax.ftz.nan.f" => "__nvvm_fmax_ftz_nan_f",
|
||||
"llvm.nvvm.fmax.ftz.nan.f16" => "__nvvm_fmax_ftz_nan_f16",
|
||||
"llvm.nvvm.fmax.ftz.nan.f16x2" => "__nvvm_fmax_ftz_nan_f16x2",
|
||||
"llvm.nvvm.fmax.ftz.nan.xorsign.abs.f" => "__nvvm_fmax_ftz_nan_xorsign_abs_f",
|
||||
"llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fmax.ftz.xorsign.abs.f" => "__nvvm_fmax_ftz_xorsign_abs_f",
|
||||
"llvm.nvvm.fmax.ftz.xorsign.abs.f16" => "__nvvm_fmax_ftz_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmax.ftz.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fmax.nan.bf16" => "__nvvm_fmax_nan_bf16",
|
||||
"llvm.nvvm.fmax.nan.bf16x2" => "__nvvm_fmax_nan_bf16x2",
|
||||
"llvm.nvvm.fmax.nan.f" => "__nvvm_fmax_nan_f",
|
||||
"llvm.nvvm.fmax.nan.f16" => "__nvvm_fmax_nan_f16",
|
||||
"llvm.nvvm.fmax.nan.f16x2" => "__nvvm_fmax_nan_f16x2",
|
||||
"llvm.nvvm.fmax.nan.xorsign.abs.bf16" => "__nvvm_fmax_nan_xorsign_abs_bf16",
|
||||
"llvm.nvvm.fmax.nan.xorsign.abs.bf16x2" => "__nvvm_fmax_nan_xorsign_abs_bf16x2",
|
||||
"llvm.nvvm.fmax.nan.xorsign.abs.f" => "__nvvm_fmax_nan_xorsign_abs_f",
|
||||
"llvm.nvvm.fmax.nan.xorsign.abs.f16" => "__nvvm_fmax_nan_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmax.nan.xorsign.abs.f16x2" => "__nvvm_fmax_nan_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fmax.xorsign.abs.bf16" => "__nvvm_fmax_xorsign_abs_bf16",
|
||||
"llvm.nvvm.fmax.xorsign.abs.bf16x2" => "__nvvm_fmax_xorsign_abs_bf16x2",
|
||||
"llvm.nvvm.fmax.xorsign.abs.f" => "__nvvm_fmax_xorsign_abs_f",
|
||||
"llvm.nvvm.fmax.xorsign.abs.f16" => "__nvvm_fmax_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmax.xorsign.abs.f16x2" => "__nvvm_fmax_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fmin.bf16" => "__nvvm_fmin_bf16",
|
||||
"llvm.nvvm.fmin.bf16x2" => "__nvvm_fmin_bf16x2",
|
||||
"llvm.nvvm.fmin.d" => "__nvvm_fmin_d",
|
||||
@ -3144,32 +3116,18 @@ match name {
|
||||
"llvm.nvvm.fmin.f16" => "__nvvm_fmin_f16",
|
||||
"llvm.nvvm.fmin.f16x2" => "__nvvm_fmin_f16x2",
|
||||
"llvm.nvvm.fmin.ftz.f" => "__nvvm_fmin_ftz_f",
|
||||
"llvm.nvvm.fmin.ftz.f16" => "__nvvm_fmin_ftz_f16",
|
||||
"llvm.nvvm.fmin.ftz.f16x2" => "__nvvm_fmin_ftz_f16x2",
|
||||
"llvm.nvvm.fmin.ftz.nan.f" => "__nvvm_fmin_ftz_nan_f",
|
||||
"llvm.nvvm.fmin.ftz.nan.f16" => "__nvvm_fmin_ftz_nan_f16",
|
||||
"llvm.nvvm.fmin.ftz.nan.f16x2" => "__nvvm_fmin_ftz_nan_f16x2",
|
||||
"llvm.nvvm.fmin.ftz.nan.xorsign.abs.f" => "__nvvm_fmin_ftz_nan_xorsign_abs_f",
|
||||
"llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fmin.ftz.xorsign.abs.f" => "__nvvm_fmin_ftz_xorsign_abs_f",
|
||||
"llvm.nvvm.fmin.ftz.xorsign.abs.f16" => "__nvvm_fmin_ftz_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmin.ftz.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fmin.nan.bf16" => "__nvvm_fmin_nan_bf16",
|
||||
"llvm.nvvm.fmin.nan.bf16x2" => "__nvvm_fmin_nan_bf16x2",
|
||||
"llvm.nvvm.fmin.nan.f" => "__nvvm_fmin_nan_f",
|
||||
"llvm.nvvm.fmin.nan.f16" => "__nvvm_fmin_nan_f16",
|
||||
"llvm.nvvm.fmin.nan.f16x2" => "__nvvm_fmin_nan_f16x2",
|
||||
"llvm.nvvm.fmin.nan.xorsign.abs.bf16" => "__nvvm_fmin_nan_xorsign_abs_bf16",
|
||||
"llvm.nvvm.fmin.nan.xorsign.abs.bf16x2" => "__nvvm_fmin_nan_xorsign_abs_bf16x2",
|
||||
"llvm.nvvm.fmin.nan.xorsign.abs.f" => "__nvvm_fmin_nan_xorsign_abs_f",
|
||||
"llvm.nvvm.fmin.nan.xorsign.abs.f16" => "__nvvm_fmin_nan_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmin.nan.xorsign.abs.f16x2" => "__nvvm_fmin_nan_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fmin.xorsign.abs.bf16" => "__nvvm_fmin_xorsign_abs_bf16",
|
||||
"llvm.nvvm.fmin.xorsign.abs.bf16x2" => "__nvvm_fmin_xorsign_abs_bf16x2",
|
||||
"llvm.nvvm.fmin.xorsign.abs.f" => "__nvvm_fmin_xorsign_abs_f",
|
||||
"llvm.nvvm.fmin.xorsign.abs.f16" => "__nvvm_fmin_xorsign_abs_f16",
|
||||
"llvm.nvvm.fmin.xorsign.abs.f16x2" => "__nvvm_fmin_xorsign_abs_f16x2",
|
||||
"llvm.nvvm.fns" => "__nvvm_fns",
|
||||
"llvm.nvvm.h2f" => "__nvvm_h2f",
|
||||
"llvm.nvvm.i2d.rm" => "__nvvm_i2d_rm",
|
||||
@ -7895,6 +7853,10 @@ match name {
|
||||
"llvm.x86.subborrow.u64" => "__builtin_ia32_subborrow_u64",
|
||||
"llvm.x86.tbm.bextri.u32" => "__builtin_ia32_bextri_u32",
|
||||
"llvm.x86.tbm.bextri.u64" => "__builtin_ia32_bextri_u64",
|
||||
"llvm.x86.tcmmimfp16ps" => "__builtin_ia32_tcmmimfp16ps",
|
||||
"llvm.x86.tcmmimfp16ps.internal" => "__builtin_ia32_tcmmimfp16ps_internal",
|
||||
"llvm.x86.tcmmrlfp16ps" => "__builtin_ia32_tcmmrlfp16ps",
|
||||
"llvm.x86.tcmmrlfp16ps.internal" => "__builtin_ia32_tcmmrlfp16ps_internal",
|
||||
"llvm.x86.tdpbf16ps" => "__builtin_ia32_tdpbf16ps",
|
||||
"llvm.x86.tdpbf16ps.internal" => "__builtin_ia32_tdpbf16ps_internal",
|
||||
"llvm.x86.tdpbssd" => "__builtin_ia32_tdpbssd",
|
||||
|
@ -313,6 +313,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
|
||||
let new_args = args.to_vec();
|
||||
args = vec![new_args[1], new_args[0], new_args[2], new_args[3], new_args[4]].into();
|
||||
},
|
||||
"__builtin_ia32_vpshrdv_v8di" | "__builtin_ia32_vpshrdv_v4di" | "__builtin_ia32_vpshrdv_v2di" |
|
||||
"__builtin_ia32_vpshrdv_v16si" | "__builtin_ia32_vpshrdv_v8si" | "__builtin_ia32_vpshrdv_v4si" |
|
||||
"__builtin_ia32_vpshrdv_v32hi" | "__builtin_ia32_vpshrdv_v16hi" | "__builtin_ia32_vpshrdv_v8hi" => {
|
||||
// The first two arguments are reversed, compared to LLVM.
|
||||
let new_args = args.to_vec();
|
||||
args = vec![new_args[1], new_args[0], new_args[2]].into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
@ -551,141 +551,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
||||
let context = &self.cx.context;
|
||||
let result =
|
||||
match width {
|
||||
8 => {
|
||||
8 | 16 | 32 | 64 => {
|
||||
let mask = ((1u128 << width) - 1) as u64;
|
||||
let (m0, m1, m2) = if width > 16 {
|
||||
(
|
||||
context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
|
||||
context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
|
||||
context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
|
||||
context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
|
||||
context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
|
||||
)
|
||||
};
|
||||
let one = context.new_rvalue_from_int(typ, 1);
|
||||
let two = context.new_rvalue_from_int(typ, 2);
|
||||
let four = context.new_rvalue_from_int(typ, 4);
|
||||
|
||||
// First step.
|
||||
let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
|
||||
let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
|
||||
let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
|
||||
let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
|
||||
let left = self.lshr(value, one);
|
||||
let left = self.and(left, m0);
|
||||
let right = self.and(value, m0);
|
||||
let right = self.shl(right, one);
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
|
||||
let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
|
||||
let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
|
||||
let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
|
||||
let left = self.lshr(step1, two);
|
||||
let left = self.and(left, m1);
|
||||
let right = self.and(step1, m1);
|
||||
let right = self.shl(right, two);
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
|
||||
let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
|
||||
let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
|
||||
let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
|
||||
let step3 = self.or(left, right);
|
||||
|
||||
step3
|
||||
},
|
||||
16 => {
|
||||
// First step.
|
||||
let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
|
||||
let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
|
||||
let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
|
||||
let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
|
||||
let left = self.lshr(step2, four);
|
||||
let left = self.and(left, m2);
|
||||
let right = self.and(step2, m2);
|
||||
let right = self.shl(right, four);
|
||||
let step3 = self.or(left, right);
|
||||
|
||||
// Fourth step.
|
||||
let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
|
||||
let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
|
||||
let step4 = self.or(left, right);
|
||||
|
||||
step4
|
||||
},
|
||||
32 => {
|
||||
// TODO(antoyo): Refactor with other implementations.
|
||||
// First step.
|
||||
let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
|
||||
let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
|
||||
let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
|
||||
let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
|
||||
let step3 = self.or(left, right);
|
||||
|
||||
// Fourth step.
|
||||
let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
|
||||
let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
|
||||
let step4 = self.or(left, right);
|
||||
|
||||
// Fifth step.
|
||||
let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
|
||||
let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
|
||||
let step5 = self.or(left, right);
|
||||
|
||||
step5
|
||||
},
|
||||
64 => {
|
||||
// First step.
|
||||
let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
|
||||
let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
|
||||
let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
|
||||
let left = self.xor(step2, left);
|
||||
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
|
||||
|
||||
let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
|
||||
let left = self.or(temp, left);
|
||||
let step3 = self.xor(left, step2);
|
||||
|
||||
// Fourth step.
|
||||
let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
|
||||
let left = self.xor(step3, left);
|
||||
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
|
||||
|
||||
let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
|
||||
let left = self.or(temp, left);
|
||||
let step4 = self.xor(left, step3);
|
||||
|
||||
// Fifth step.
|
||||
let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
|
||||
let left = self.xor(step4, left);
|
||||
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
|
||||
|
||||
let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
|
||||
let left = self.or(temp, left);
|
||||
let step5 = self.xor(left, step4);
|
||||
|
||||
step5
|
||||
if width == 8 {
|
||||
step3
|
||||
} else {
|
||||
self.gcc_bswap(step3, width)
|
||||
}
|
||||
},
|
||||
128 => {
|
||||
// TODO(antoyo): find a more efficient implementation?
|
||||
|
@ -77,7 +77,7 @@ use rustc_codegen_ssa::target_features::supported_target_features;
|
||||
use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods};
|
||||
use rustc_data_structures::fx::FxIndexMap;
|
||||
use rustc_errors::{DiagnosticMessage, ErrorGuaranteed, Handler, SubdiagnosticMessage};
|
||||
use rustc_fluent_macro::fluent_messages;
|
||||
use rustc_macros::fluent_messages;
|
||||
use rustc_metadata::EncodedMetadata;
|
||||
use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
|
||||
use rustc_middle::query::Providers;
|
||||
@ -111,6 +111,8 @@ impl CodegenBackend for GccCodegenBackend {
|
||||
}
|
||||
|
||||
fn init(&self, sess: &Session) {
|
||||
#[cfg(feature="master")]
|
||||
gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
|
||||
if sess.lto() != Lto::No {
|
||||
sess.emit_warning(LTONotSupported {});
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user