Auto merge of #117987 - bjorn3:sync_cg_clif-2023-11-16, r=bjorn3

Subtree sync for rustc_codegen_cranelift

The main highlights this time are support for AES and SHA256 crypto intrinsics on x86_64 by lowering to inline asm.

r? `@ghost`

`@rustbot` label +A-codegen +A-cranelift +T-compiler
This commit is contained in:
bors 2023-11-17 01:59:59 +00:00
commit 15a791fa35
15 changed files with 472 additions and 191 deletions

View File

@ -35,6 +35,10 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: CPU features
if: matrix.os == 'ubuntu-latest'
run: cat /proc/cpuinfo
- name: Cache cargo target dir - name: Cache cargo target dir
uses: actions/cache@v3 uses: actions/cache@v3
with: with:

View File

@ -66,6 +66,10 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: CPU features
if: matrix.os == 'ubuntu-latest'
run: cat /proc/cpuinfo
- name: Cache cargo target dir - name: Cache cargo target dir
uses: actions/cache@v3 uses: actions/cache@v3
with: with:
@ -136,6 +140,9 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: CPU features
run: cat /proc/cpuinfo
- name: Prepare dependencies - name: Prepare dependencies
run: ./y.sh prepare run: ./y.sh prepare
@ -159,6 +166,9 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: CPU features
run: cat /proc/cpuinfo
- name: Cache cargo target dir - name: Cache cargo target dir
uses: actions/cache@v3 uses: actions/cache@v3
with: with:

View File

@ -11,6 +11,9 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: CPU features
run: cat /proc/cpuinfo
- name: Cache cargo target dir - name: Cache cargo target dir
uses: actions/cache@v3 uses: actions/cache@v3
with: with:
@ -31,6 +34,9 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: CPU features
run: cat /proc/cpuinfo
- name: Cache cargo target dir - name: Cache cargo target dir
uses: actions/cache@v3 uses: actions/cache@v3
with: with:

View File

@ -5,8 +5,48 @@ This has the potential to improve compilation times in debug mode.
If your project doesn't use any of the things listed under "Not yet supported", it should work fine. If your project doesn't use any of the things listed under "Not yet supported", it should work fine.
If not please open an issue. If not please open an issue.
## Download using Rustup
The Cranelift codegen backend is distributed in nightly builds on Linux and x86_64 macOS. If you want to
install it using Rustup, you can do that by running:
```bash
$ rustup component add rustc-codegen-cranelift-preview --toolchain nightly
```
Once it is installed, you can enable it with one of the following approaches:
- `CARGO_PROFILE_DEV_CODEGEN_BACKEND=cranelift cargo +nightly build -Zcodegen-backend`
- `RUSTFLAGS="-Zcodegen-backend=cranelift" cargo +nightly build`
- Add the following to `.cargo/config.toml`:
```toml
[unstable]
codegen-backend = true
[profile.dev]
codegen-backend = "cranelift"
```
- Add the following to `Cargo.toml`:
```toml
# This line needs to come before anything else in Cargo.toml
cargo-features = ["codegen-backend"]
[profile.dev]
codegen-backend = "cranelift"
```
## Precompiled builds
You can also download a pre-built version from the [releases] page.
Extract the `dist` directory in the archive anywhere you want.
If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`.
(tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)).
[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev
## Building and testing ## Building and testing
If you want to build the backend manually, you can download it from GitHub and build it yourself:
```bash ```bash
$ git clone https://github.com/rust-lang/rustc_codegen_cranelift $ git clone https://github.com/rust-lang/rustc_codegen_cranelift
$ cd rustc_codegen_cranelift $ cd rustc_codegen_cranelift
@ -22,15 +62,6 @@ $ ./test.sh
For more docs on how to build and test see [build_system/usage.txt](build_system/usage.txt) or the help message of `./y.sh`. For more docs on how to build and test see [build_system/usage.txt](build_system/usage.txt) or the help message of `./y.sh`.
## Precompiled builds
Alternatively you can download a pre built version from the [releases] page.
Extract the `dist` directory in the archive anywhere you want.
If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`.
(tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)).
[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev
## Usage ## Usage
rustc_codegen_cranelift can be used as a near-drop-in replacement for `cargo build` or `cargo run` for existing projects. rustc_codegen_cranelift can be used as a near-drop-in replacement for `cargo build` or `cargo run` for existing projects.

View File

@ -1,3 +1,3 @@
[toolchain] [toolchain]
channel = "nightly-2023-11-10" channel = "nightly-2023-11-16"
components = ["rust-src", "rustc-dev", "llvm-tools"] components = ["rust-src", "rustc-dev", "llvm-tools"]

View File

@ -46,7 +46,7 @@ case $1 in
git pull origin master git pull origin master
branch=sync_cg_clif-$(date +%Y-%m-%d) branch=sync_cg_clif-$(date +%Y-%m-%d)
git checkout -b "$branch" git checkout -b "$branch"
"$cg_clif/git-fixed-subtree.sh" pull --prefix=compiler/rustc_codegen_cranelift/ https://github.com/bjorn3/rustc_codegen_cranelift.git master "$cg_clif/git-fixed-subtree.sh" pull --prefix=compiler/rustc_codegen_cranelift/ https://github.com/rust-lang/rustc_codegen_cranelift.git master
git push -u my "$branch" git push -u my "$branch"
# immediately merge the merge commit into cg_clif to prevent merge conflicts when syncing # immediately merge the merge commit into cg_clif to prevent merge conflicts when syncing

View File

@ -1,15 +1,17 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -e set -e
# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we
# the LLVM backend isn't compiled in here.
export CG_CLIF_FORCE_GNU_AS=1
# Compiletest expects all standard library paths to start with /rustc/FAKE_PREFIX. # Compiletest expects all standard library paths to start with /rustc/FAKE_PREFIX.
# CG_CLIF_STDLIB_REMAP_PATH_PREFIX will cause cg_clif's build system to pass # CG_CLIF_STDLIB_REMAP_PATH_PREFIX will cause cg_clif's build system to pass
# --remap-path-prefix to handle this. # --remap-path-prefix to handle this.
# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build
# the LLVM backend isn't compiled in here.
CG_CLIF_FORCE_GNU_AS=1 CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build
echo "[SETUP] Rust fork" echo "[SETUP] Rust fork"
git clone https://github.com/rust-lang/rust.git || true git clone https://github.com/rust-lang/rust.git --filter=tree:0 || true
pushd rust pushd rust
git fetch git fetch
git checkout -- . git checkout -- .

View File

@ -11,7 +11,5 @@ rm -r compiler/rustc_codegen_cranelift/{Cargo.*,src}
cp ../Cargo.* compiler/rustc_codegen_cranelift/ cp ../Cargo.* compiler/rustc_codegen_cranelift/
cp -r ../src compiler/rustc_codegen_cranelift/src cp -r ../src compiler/rustc_codegen_cranelift/src
# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we ./x.py build --stage 1 library/std
# the LLVM backend isn't compiled in here.
CG_CLIF_FORCE_GNU_AS=1 ./x.py build --stage 1 library/std
popd popd

View File

@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
args, args,
ret_place, ret_place,
target, target,
source_info.span,
); );
return; return;
} }

View File

@ -456,7 +456,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
); );
} }
crate::inline_asm::codegen_inline_asm( crate::inline_asm::codegen_inline_asm_terminator(
fx, fx,
source_info.span, source_info.span,
template, template,

View File

@ -1,10 +1,13 @@
//! Handling of `static`s, `const`s and promoted allocations //! Handling of `static`s, `const`s and promoted allocations
use std::cmp::Ordering;
use cranelift_module::*; use cranelift_module::*;
use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar}; use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar};
use rustc_middle::mir::ConstValue; use rustc_middle::mir::ConstValue;
use rustc_middle::ty::ScalarInt;
use crate::prelude::*; use crate::prelude::*;
@ -430,9 +433,9 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
pub(crate) fn mir_operand_get_const_val<'tcx>( pub(crate) fn mir_operand_get_const_val<'tcx>(
fx: &FunctionCx<'_, '_, 'tcx>, fx: &FunctionCx<'_, '_, 'tcx>,
operand: &Operand<'tcx>, operand: &Operand<'tcx>,
) -> Option<ConstValue<'tcx>> { ) -> Option<ScalarInt> {
match operand { match operand {
Operand::Constant(const_) => Some(eval_mir_constant(fx, const_).0), Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(),
// FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored // FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored
// inside a temporary before being passed to the intrinsic requiring the const argument. // inside a temporary before being passed to the intrinsic requiring the const argument.
// This code tries to find a single constant defining definition of the referenced local. // This code tries to find a single constant defining definition of the referenced local.
@ -440,7 +443,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
if !place.projection.is_empty() { if !place.projection.is_empty() {
return None; return None;
} }
let mut computed_const_val = None; let mut computed_scalar_int = None;
for bb_data in fx.mir.basic_blocks.iter() { for bb_data in fx.mir.basic_blocks.iter() {
for stmt in &bb_data.statements { for stmt in &bb_data.statements {
match &stmt.kind { match &stmt.kind {
@ -456,22 +459,38 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
operand, operand,
ty, ty,
) => { ) => {
if computed_const_val.is_some() { if computed_scalar_int.is_some() {
return None; // local assigned twice return None; // local assigned twice
} }
if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) { if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) {
return None; return None;
} }
let const_val = mir_operand_get_const_val(fx, operand)?; let scalar_int = mir_operand_get_const_val(fx, operand)?;
if fx.layout_of(*ty).size let scalar_int = match fx
!= const_val.try_to_scalar_int()?.size() .layout_of(*ty)
.size
.cmp(&scalar_int.size())
{ {
return None; Ordering::Equal => scalar_int,
} Ordering::Less => match ty.kind() {
computed_const_val = Some(const_val); ty::Uint(_) => ScalarInt::try_from_uint(
scalar_int.try_to_uint(scalar_int.size()).unwrap(),
fx.layout_of(*ty).size,
)
.unwrap(),
ty::Int(_) => ScalarInt::try_from_int(
scalar_int.try_to_int(scalar_int.size()).unwrap(),
fx.layout_of(*ty).size,
)
.unwrap(),
_ => unreachable!(),
},
Ordering::Greater => return None,
};
computed_scalar_int = Some(scalar_int);
} }
Rvalue::Use(operand) => { Rvalue::Use(operand) => {
computed_const_val = mir_operand_get_const_val(fx, operand) computed_scalar_int = mir_operand_get_const_val(fx, operand)
} }
_ => return None, _ => return None,
} }
@ -522,7 +541,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
TerminatorKind::Call { .. } => {} TerminatorKind::Call { .. } => {}
} }
} }
computed_const_val computed_scalar_int
} }
} }
} }

View File

@ -10,7 +10,7 @@ use target_lexicon::BinaryFormat;
use crate::prelude::*; use crate::prelude::*;
enum CInlineAsmOperand<'tcx> { pub(crate) enum CInlineAsmOperand<'tcx> {
In { In {
reg: InlineAsmRegOrRegClass, reg: InlineAsmRegOrRegClass,
value: Value, value: Value,
@ -34,7 +34,7 @@ enum CInlineAsmOperand<'tcx> {
}, },
} }
pub(crate) fn codegen_inline_asm<'tcx>( pub(crate) fn codegen_inline_asm_terminator<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>, fx: &mut FunctionCx<'_, '_, 'tcx>,
span: Span, span: Span,
template: &[InlineAsmTemplatePiece], template: &[InlineAsmTemplatePiece],
@ -42,8 +42,6 @@ pub(crate) fn codegen_inline_asm<'tcx>(
options: InlineAsmOptions, options: InlineAsmOptions,
destination: Option<mir::BasicBlock>, destination: Option<mir::BasicBlock>,
) { ) {
// FIXME add .eh_frame unwind info directives
// Used by panic_abort on Windows, but uses a syntax which only happens to work with // Used by panic_abort on Windows, but uses a syntax which only happens to work with
// asm!() by accident and breaks with the GNU assembler as well as global_asm!() for // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for
// the LLVM backend. // the LLVM backend.
@ -135,15 +133,33 @@ pub(crate) fn codegen_inline_asm<'tcx>(
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let mut inputs = Vec::new(); codegen_inline_asm_inner(fx, template, &operands, options);
let mut outputs = Vec::new();
match destination {
Some(destination) => {
let destination_block = fx.get_block(destination);
fx.bcx.ins().jump(destination_block, &[]);
}
None => {
fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
}
}
}
pub(crate) fn codegen_inline_asm_inner<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
template: &[InlineAsmTemplatePiece],
operands: &[CInlineAsmOperand<'tcx>],
options: InlineAsmOptions,
) {
// FIXME add .eh_frame unwind info directives
let mut asm_gen = InlineAssemblyGenerator { let mut asm_gen = InlineAssemblyGenerator {
tcx: fx.tcx, tcx: fx.tcx,
arch: fx.tcx.sess.asm_arch.unwrap(), arch: fx.tcx.sess.asm_arch.unwrap(),
enclosing_def_id: fx.instance.def_id(), enclosing_def_id: fx.instance.def_id(),
template, template,
operands: &operands, operands,
options, options,
registers: Vec::new(), registers: Vec::new(),
stack_slots_clobber: Vec::new(), stack_slots_clobber: Vec::new(),
@ -165,6 +181,8 @@ pub(crate) fn codegen_inline_asm<'tcx>(
let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
fx.cx.global_asm.push_str(&generated_asm); fx.cx.global_asm.push_str(&generated_asm);
let mut inputs = Vec::new();
let mut outputs = Vec::new();
for (i, operand) in operands.iter().enumerate() { for (i, operand) in operands.iter().enumerate() {
match operand { match operand {
CInlineAsmOperand::In { reg: _, value } => { CInlineAsmOperand::In { reg: _, value } => {
@ -186,16 +204,6 @@ pub(crate) fn codegen_inline_asm<'tcx>(
} }
call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
match destination {
Some(destination) => {
let destination_block = fx.get_block(destination);
fx.bcx.ins().jump(destination_block, &[]);
}
None => {
fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
}
}
} }
struct InlineAssemblyGenerator<'a, 'tcx> { struct InlineAssemblyGenerator<'a, 'tcx> {
@ -637,8 +645,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
) { ) {
match arch { match arch {
InlineAsmArch::X86_64 => { InlineAsmArch::X86_64 => {
match reg {
InlineAsmReg::X86(reg)
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
{
// rustc emits x0 rather than xmm0
write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
.unwrap();
}
_ => {
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
}
}
generated_asm.push('\n'); generated_asm.push('\n');
} }
InlineAsmArch::AArch64 => { InlineAsmArch::AArch64 => {
@ -663,8 +684,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
) { ) {
match arch { match arch {
InlineAsmArch::X86_64 => { InlineAsmArch::X86_64 => {
match reg {
InlineAsmReg::X86(reg)
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
{
// rustc emits x0 rather than xmm0
write!(
generated_asm,
" movups xmm{}",
reg as u32 - X86InlineAsmReg::xmm0 as u32
)
.unwrap();
}
_ => {
generated_asm.push_str(" mov "); generated_asm.push_str(" mov ");
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
}
}
writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap(); writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
} }
InlineAsmArch::AArch64 => { InlineAsmArch::AArch64 => {
@ -720,7 +757,12 @@ fn call_inline_asm<'tcx>(
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]); fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
for (offset, place) in outputs { for (offset, place) in outputs {
let ty = fx.clif_type(place.layout().ty).unwrap(); let ty = if place.layout().ty.is_simd() {
let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
} else {
fx.clif_type(place.layout().ty).unwrap()
};
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load( let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
fx, fx,
ty, ty,
@ -729,83 +771,3 @@ fn call_inline_asm<'tcx>(
place.write_cvalue(fx, CValue::by_val(value, place.layout())); place.write_cvalue(fx, CValue::by_val(value, place.layout()));
} }
} }
pub(crate) fn codegen_xgetbv<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
xcr_no: Value,
ret: CPlace<'tcx>,
) {
// FIXME add .eh_frame unwind info directives
let operands = vec![
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
value: xcr_no,
},
CInlineAsmOperand::Out {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
late: true,
place: Some(ret),
},
CInlineAsmOperand::Out {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
late: true,
place: None,
},
];
let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM;
let mut inputs = Vec::new();
let mut outputs = Vec::new();
let mut asm_gen = InlineAssemblyGenerator {
tcx: fx.tcx,
arch: fx.tcx.sess.asm_arch.unwrap(),
enclosing_def_id: fx.instance.def_id(),
template: &[InlineAsmTemplatePiece::String(
"
xgetbv
// out = rdx << 32 | rax
shl rdx, 32
or rax, rdx
"
.to_string(),
)],
operands: &operands,
options,
registers: Vec::new(),
stack_slots_clobber: Vec::new(),
stack_slots_input: Vec::new(),
stack_slots_output: Vec::new(),
stack_slot_size: Size::from_bytes(0),
};
asm_gen.allocate_registers();
asm_gen.allocate_stack_slots();
let inline_asm_index = fx.cx.inline_asm_index.get();
fx.cx.inline_asm_index.set(inline_asm_index + 1);
let asm_name = format!(
"__inline_asm_{}_n{}",
fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
inline_asm_index
);
let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
fx.cx.global_asm.push_str(&generated_asm);
for (i, operand) in operands.iter().enumerate() {
match operand {
CInlineAsmOperand::In { reg: _, value } => {
inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value));
}
CInlineAsmOperand::Out { reg: _, late: _, place } => {
if let Some(place) = place {
outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place));
}
}
_ => unreachable!(),
}
}
call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
}

View File

@ -12,6 +12,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args: &[mir::Operand<'tcx>], args: &[mir::Operand<'tcx>],
ret: CPlace<'tcx>, ret: CPlace<'tcx>,
target: Option<BasicBlock>, target: Option<BasicBlock>,
span: Span,
) { ) {
if intrinsic.starts_with("llvm.aarch64") { if intrinsic.starts_with("llvm.aarch64") {
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call( return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
@ -31,6 +32,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args, args,
ret, ret,
target, target,
span,
); );
} }

View File

@ -1,7 +1,10 @@
//! Emulate x86 LLVM intrinsics //! Emulate x86 LLVM intrinsics
use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
use rustc_middle::ty::GenericArgsRef; use rustc_middle::ty::GenericArgsRef;
use rustc_target::asm::*;
use crate::inline_asm::{codegen_inline_asm_inner, CInlineAsmOperand};
use crate::intrinsics::*; use crate::intrinsics::*;
use crate::prelude::*; use crate::prelude::*;
@ -12,6 +15,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
args: &[mir::Operand<'tcx>], args: &[mir::Operand<'tcx>],
ret: CPlace<'tcx>, ret: CPlace<'tcx>,
target: Option<BasicBlock>, target: Option<BasicBlock>,
span: Span,
) { ) {
match intrinsic { match intrinsic {
"llvm.x86.sse2.pause" | "llvm.aarch64.isb" => { "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
@ -24,7 +28,35 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
let xcr_no = xcr_no.load_scalar(fx); let xcr_no = xcr_no.load_scalar(fx);
crate::inline_asm::codegen_xgetbv(fx, xcr_no, ret); codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String(
"
xgetbv
// out = rdx << 32 | rax
shl rdx, 32
or rax, rdx
"
.to_string(),
)],
&[
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
value: xcr_no,
},
CInlineAsmOperand::Out {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
late: true,
place: Some(ret),
},
CInlineAsmOperand::Out {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
late: true,
place: None,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
} }
"llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => { "llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => {
@ -688,64 +720,278 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
"llvm.x86.pclmulqdq" => { "llvm.x86.pclmulqdq" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
intrinsic_args!(fx, args => (a, b, imm8); intrinsic); intrinsic_args!(fx, args => (a, b, _imm8); intrinsic);
assert_eq!(a.layout(), b.layout()); let a = a.load_scalar(fx);
let layout = a.layout(); let b = b.load_scalar(fx);
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[2])
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); {
assert_eq!(lane_ty, fx.tcx.types.i64); imm8
assert_eq!(ret_lane_ty, fx.tcx.types.i64); } else {
assert_eq!(lane_count, 2); fx.tcx.sess.span_fatal(
assert_eq!(ret_lane_count, 2); span,
"Index argument for `_mm_clmulepi64_si128` is not a constant",
);
};
let imm8 = imm8.load_scalar(fx); let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
let control0 = fx.bcx.ins().band_imm(imm8, 0b0000_0001); codegen_inline_asm_inner(
let a_lane0 = a.value_lane(fx, 0).load_scalar(fx); fx,
let a_lane1 = a.value_lane(fx, 1).load_scalar(fx); &[InlineAsmTemplatePiece::String(format!("pclmulqdq xmm0, xmm1, {imm8}"))],
let temp1 = fx.bcx.ins().select(control0, a_lane1, a_lane0); &[
CInlineAsmOperand::InOut {
let control4 = fx.bcx.ins().band_imm(imm8, 0b0001_0000); reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
let b_lane0 = b.value_lane(fx, 0).load_scalar(fx); _late: true,
let b_lane1 = b.value_lane(fx, 1).load_scalar(fx); in_value: a,
let temp2 = fx.bcx.ins().select(control4, b_lane1, b_lane0); out_place: Some(ret),
},
fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value { CInlineAsmOperand::In {
let tmp = fx.bcx.ins().ushr_imm(val, bit); reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
fx.bcx.ins().band_imm(tmp, 1) value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
} }
let mut res1 = fx.bcx.ins().iconst(types::I64, 0); "llvm.x86.aesni.aeskeygenassist" => {
for i in 0..=63 { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261
let x = extract_bit(fx, temp1, 0); intrinsic_args!(fx, args => (a, _imm8); intrinsic);
let y = extract_bit(fx, temp2, i);
let mut temp = fx.bcx.ins().band(x, y);
for j in 1..=i {
let x = extract_bit(fx, temp1, j);
let y = extract_bit(fx, temp2, i - j);
let z = fx.bcx.ins().band(x, y);
temp = fx.bcx.ins().bxor(temp, z);
}
let temp = fx.bcx.ins().ishl_imm(temp, i);
res1 = fx.bcx.ins().bor(res1, temp);
}
ret.place_lane(fx, 0).to_ptr().store(fx, res1, MemFlags::trusted());
let mut res2 = fx.bcx.ins().iconst(types::I64, 0); let a = a.load_scalar(fx);
for i in 64..=127 {
let mut temp = fx.bcx.ins().iconst(types::I64, 0); let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1])
for j in i - 63..=63 { {
let x = extract_bit(fx, temp1, j); imm8
let y = extract_bit(fx, temp2, i - j); } else {
let z = fx.bcx.ins().band(x, y); fx.tcx.sess.span_fatal(
temp = fx.bcx.ins().bxor(temp, z); span,
"Index argument for `_mm_aeskeygenassist_si128` is not a constant",
);
};
let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))],
&[CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
}],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
} }
let temp = fx.bcx.ins().ishl_imm(temp, i);
res2 = fx.bcx.ins().bor(res2, temp); "llvm.x86.aesni.aesimc" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260
intrinsic_args!(fx, args => (a); intrinsic);
let a = a.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())],
&[CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
}],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
} }
ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted());
"llvm.x86.aesni.aesenc" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesenclast" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesdec" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesdeclast" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha256rnds2" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977
intrinsic_args!(fx, args => (a, b, k); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
let k = k.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha256rnds2 xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
// Implicit argument to the sha256rnds2 instruction
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
value: k,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha256msg1" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32&ig_expand=5975
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha256msg1 xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha256msg2" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32&ig_expand=5976
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha256msg2 xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
} }
"llvm.x86.avx.ptestz.256" => { "llvm.x86.avx.ptestz.256" => {

View File

@ -282,11 +282,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant"); fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant");
}; };
let idx = idx_const let idx: u32 = idx_const
.try_to_bits(Size::from_bytes(4 /* u32*/)) .try_to_u32()
.unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx); let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx);
if idx >= lane_count.into() { if u64::from(idx) >= lane_count {
fx.tcx.sess.span_fatal( fx.tcx.sess.span_fatal(
fx.mir.span, fx.mir.span,
format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count), format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count),
@ -331,10 +331,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
}; };
let idx = idx_const let idx = idx_const
.try_to_bits(Size::from_bytes(4 /* u32*/)) .try_to_u32()
.unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx); let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx);
if idx >= lane_count.into() { if u64::from(idx) >= lane_count {
fx.tcx.sess.span_fatal( fx.tcx.sess.span_fatal(
fx.mir.span, fx.mir.span,
format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count), format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count),