Rollup merge of #72439 - westernmagic:master, r=Amanieu
NVPTX support for new asm! This PR implements the new `asm!` syntax for the `nvptx64-nvidia-cuda` target. r? @Amanieu
This commit is contained in:
commit
37894559ab
@ -468,12 +468,17 @@ Here is the list of currently supported register classes:
|
||||
| ARM | `qreg` | `q[0-15]` | `w` |
|
||||
| ARM | `qreg_low8` | `q[0-7]` | `t` |
|
||||
| ARM | `qreg_low4` | `q[0-3]` | `x` |
|
||||
| NVPTX | `reg16` | None\* | `h` |
|
||||
| NVPTX | `reg32` | None\* | `r` |
|
||||
| NVPTX | `reg64` | None\* | `l` |
|
||||
| RISC-V | `reg` | `x1`, `x[5-7]`, `x[9-15]`, `x[16-31]` (non-RV32E) | `r` |
|
||||
| RISC-V | `freg` | `f[0-31]` | `f` |
|
||||
|
||||
> **Note**: On x86 we treat `reg_byte` differently from `reg` because the compiler can allocate `al` and `ah` separately whereas `reg` reserves the whole register.
|
||||
>
|
||||
> Note #2: On x86-64 the high byte registers (e.g. `ah`) are only available when used as an explicit register. Specifying the `reg_byte` register class for an operand will always allocate a low byte register.
|
||||
>
|
||||
> Note #3: NVPTX doesn't have a fixed register set, so named registers are not supported.
|
||||
|
||||
Additional register classes may be added in the future based on demand (e.g. MMX, x87, etc).
|
||||
|
||||
@ -495,6 +500,9 @@ Each register class has constraints on which value types they can be used with.
|
||||
| ARM | `sreg` | `vfp2` | `i32`, `f32` |
|
||||
| ARM | `dreg` | `vfp2` | `i64`, `f64`, `i8x8`, `i16x4`, `i32x2`, `i64x1`, `f32x2` |
|
||||
| ARM | `qreg` | `neon` | `i8x16`, `i16x8`, `i32x4`, `i64x2`, `f32x4` |
|
||||
| NVPTX | `reg16` | None | `i8`, `i16` |
|
||||
| NVPTX | `reg32` | None | `i8`, `i16`, `i32`, `f32` |
|
||||
| NVPTX | `reg64` | None | `i8`, `i16`, `i32`, `f32`, `i64`, `f64` |
|
||||
| RISC-V32 | `reg` | None | `i8`, `i16`, `i32`, `f32` |
|
||||
| RISC-V64 | `reg` | None | `i8`, `i16`, `i32`, `f32`, `i64`, `f64` |
|
||||
| RISC-V | `freg` | `f` | `f32` |
|
||||
@ -610,6 +618,9 @@ The supported modifiers are a subset of LLVM's (and GCC's) [asm template argumen
|
||||
| ARM | `dreg` | None | `d0` | `P` |
|
||||
| ARM | `qreg` | None | `q0` | `q` |
|
||||
| ARM | `qreg` | `e` / `f` | `d0` / `d1` | `e` / `f` |
|
||||
| NVPTX | `reg16` | None | `rs0` | None |
|
||||
| NVPTX | `reg32` | None | `r0` | None |
|
||||
| NVPTX | `reg64` | None | `rd0` | None |
|
||||
| RISC-V | `reg` | None | `x1` | None |
|
||||
| RISC-V | `freg` | None | `f0` | None |
|
||||
|
||||
|
@ -254,6 +254,7 @@ impl AsmBuilderMethods<'tcx> for Builder<'a, 'll, 'tcx> {
|
||||
]);
|
||||
}
|
||||
InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {}
|
||||
InlineAsmArch::Nvptx64 => {}
|
||||
}
|
||||
}
|
||||
if !options.contains(InlineAsmOptions::NOMEM) {
|
||||
@ -410,6 +411,9 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass) -> String {
|
||||
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => "x",
|
||||
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::dreg)
|
||||
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg) => "w",
|
||||
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => "h",
|
||||
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => "r",
|
||||
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => "l",
|
||||
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => "r",
|
||||
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => "f",
|
||||
InlineAsmRegClass::X86(X86InlineAsmRegClass::reg) => "r",
|
||||
@ -452,6 +456,7 @@ fn modifier_to_llvm(
|
||||
modifier
|
||||
}
|
||||
}
|
||||
InlineAsmRegClass::Nvptx(_) => None,
|
||||
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg)
|
||||
| InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => None,
|
||||
InlineAsmRegClass::X86(X86InlineAsmRegClass::reg)
|
||||
@ -502,6 +507,9 @@ fn dummy_output_type(cx: &CodegenCx<'ll, 'tcx>, reg: InlineAsmRegClass) -> &'ll
|
||||
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => {
|
||||
cx.type_vector(cx.type_i64(), 2)
|
||||
}
|
||||
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => cx.type_i16(),
|
||||
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => cx.type_i32(),
|
||||
InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => cx.type_i64(),
|
||||
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(),
|
||||
InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(),
|
||||
InlineAsmRegClass::X86(X86InlineAsmRegClass::reg)
|
||||
|
@ -60,6 +60,7 @@ macro_rules! def_regs {
|
||||
#error = [$($bad_reg:literal),+] => $error:literal,
|
||||
)*
|
||||
}) => {
|
||||
#[allow(unreachable_code)]
|
||||
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Debug, Eq, PartialEq, Hash, HashStable_Generic)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum $arch_reg {
|
||||
@ -102,19 +103,20 @@ macro_rules! def_regs {
|
||||
pub(super) fn fill_reg_map(
|
||||
_arch: super::InlineAsmArch,
|
||||
mut _has_feature: impl FnMut(&str) -> bool,
|
||||
map: &mut rustc_data_structures::fx::FxHashMap<
|
||||
_map: &mut rustc_data_structures::fx::FxHashMap<
|
||||
super::InlineAsmRegClass,
|
||||
rustc_data_structures::fx::FxHashSet<super::InlineAsmReg>,
|
||||
>,
|
||||
) {
|
||||
#[allow(unused_imports)]
|
||||
use super::{InlineAsmReg, InlineAsmRegClass};
|
||||
$(
|
||||
if $($filter(_arch, &mut _has_feature, true).is_ok() &&)? true {
|
||||
if let Some(set) = map.get_mut(&InlineAsmRegClass::$arch($arch_regclass::$class)) {
|
||||
if let Some(set) = _map.get_mut(&InlineAsmRegClass::$arch($arch_regclass::$class)) {
|
||||
set.insert(InlineAsmReg::$arch($arch_reg::$reg));
|
||||
}
|
||||
$(
|
||||
if let Some(set) = map.get_mut(&InlineAsmRegClass::$arch($arch_regclass::$extra_class)) {
|
||||
if let Some(set) = _map.get_mut(&InlineAsmRegClass::$arch($arch_regclass::$extra_class)) {
|
||||
set.insert(InlineAsmReg::$arch($arch_reg::$reg));
|
||||
}
|
||||
)*
|
||||
@ -146,11 +148,13 @@ macro_rules! types {
|
||||
|
||||
mod aarch64;
|
||||
mod arm;
|
||||
mod nvptx;
|
||||
mod riscv;
|
||||
mod x86;
|
||||
|
||||
pub use aarch64::{AArch64InlineAsmReg, AArch64InlineAsmRegClass};
|
||||
pub use arm::{ArmInlineAsmReg, ArmInlineAsmRegClass};
|
||||
pub use nvptx::{NvptxInlineAsmReg, NvptxInlineAsmRegClass};
|
||||
pub use riscv::{RiscVInlineAsmReg, RiscVInlineAsmRegClass};
|
||||
pub use x86::{X86InlineAsmReg, X86InlineAsmRegClass};
|
||||
|
||||
@ -162,6 +166,7 @@ pub enum InlineAsmArch {
|
||||
AArch64,
|
||||
RiscV32,
|
||||
RiscV64,
|
||||
Nvptx64,
|
||||
}
|
||||
|
||||
impl FromStr for InlineAsmArch {
|
||||
@ -175,6 +180,7 @@ impl FromStr for InlineAsmArch {
|
||||
"aarch64" => Ok(Self::AArch64),
|
||||
"riscv32" => Ok(Self::RiscV32),
|
||||
"riscv64" => Ok(Self::RiscV64),
|
||||
"nvptx64" => Ok(Self::Nvptx64),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
@ -196,6 +202,7 @@ pub enum InlineAsmReg {
|
||||
Arm(ArmInlineAsmReg),
|
||||
AArch64(AArch64InlineAsmReg),
|
||||
RiscV(RiscVInlineAsmReg),
|
||||
Nvptx(NvptxInlineAsmReg),
|
||||
}
|
||||
|
||||
impl InlineAsmReg {
|
||||
@ -236,6 +243,9 @@ impl InlineAsmReg {
|
||||
InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {
|
||||
Self::RiscV(RiscVInlineAsmReg::parse(arch, has_feature, &name)?)
|
||||
}
|
||||
InlineAsmArch::Nvptx64 => {
|
||||
Self::Nvptx(NvptxInlineAsmReg::parse(arch, has_feature, &name)?)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@ -281,6 +291,7 @@ pub enum InlineAsmRegClass {
|
||||
Arm(ArmInlineAsmRegClass),
|
||||
AArch64(AArch64InlineAsmRegClass),
|
||||
RiscV(RiscVInlineAsmRegClass),
|
||||
Nvptx(NvptxInlineAsmRegClass),
|
||||
}
|
||||
|
||||
impl InlineAsmRegClass {
|
||||
@ -290,6 +301,7 @@ impl InlineAsmRegClass {
|
||||
Self::Arm(r) => r.name(),
|
||||
Self::AArch64(r) => r.name(),
|
||||
Self::RiscV(r) => r.name(),
|
||||
Self::Nvptx(r) => r.name(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -302,6 +314,7 @@ impl InlineAsmRegClass {
|
||||
Self::Arm(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Arm),
|
||||
Self::AArch64(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::AArch64),
|
||||
Self::RiscV(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::RiscV),
|
||||
Self::Nvptx(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Nvptx),
|
||||
}
|
||||
}
|
||||
|
||||
@ -321,6 +334,7 @@ impl InlineAsmRegClass {
|
||||
Self::Arm(r) => r.suggest_modifier(arch, ty),
|
||||
Self::AArch64(r) => r.suggest_modifier(arch, ty),
|
||||
Self::RiscV(r) => r.suggest_modifier(arch, ty),
|
||||
Self::Nvptx(r) => r.suggest_modifier(arch, ty),
|
||||
}
|
||||
}
|
||||
|
||||
@ -336,6 +350,7 @@ impl InlineAsmRegClass {
|
||||
Self::Arm(r) => r.default_modifier(arch),
|
||||
Self::AArch64(r) => r.default_modifier(arch),
|
||||
Self::RiscV(r) => r.default_modifier(arch),
|
||||
Self::Nvptx(r) => r.default_modifier(arch),
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,6 +365,7 @@ impl InlineAsmRegClass {
|
||||
Self::Arm(r) => r.supported_types(arch),
|
||||
Self::AArch64(r) => r.supported_types(arch),
|
||||
Self::RiscV(r) => r.supported_types(arch),
|
||||
Self::Nvptx(r) => r.supported_types(arch),
|
||||
}
|
||||
}
|
||||
|
||||
@ -367,6 +383,7 @@ impl InlineAsmRegClass {
|
||||
InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {
|
||||
Self::RiscV(RiscVInlineAsmRegClass::parse(arch, name)?)
|
||||
}
|
||||
InlineAsmArch::Nvptx64 => Self::Nvptx(NvptxInlineAsmRegClass::parse(arch, name)?),
|
||||
})
|
||||
})
|
||||
}
|
||||
@ -379,6 +396,7 @@ impl InlineAsmRegClass {
|
||||
Self::Arm(r) => r.valid_modifiers(arch),
|
||||
Self::AArch64(r) => r.valid_modifiers(arch),
|
||||
Self::RiscV(r) => r.valid_modifiers(arch),
|
||||
Self::Nvptx(r) => r.valid_modifiers(arch),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -518,5 +536,10 @@ pub fn allocatable_registers(
|
||||
riscv::fill_reg_map(arch, has_feature, &mut map);
|
||||
map
|
||||
}
|
||||
InlineAsmArch::Nvptx64 => {
|
||||
let mut map = nvptx::regclass_map();
|
||||
nvptx::fill_reg_map(arch, has_feature, &mut map);
|
||||
map
|
||||
}
|
||||
}
|
||||
}
|
||||
|
49
src/librustc_target/asm/nvptx.rs
Normal file
49
src/librustc_target/asm/nvptx.rs
Normal file
@ -0,0 +1,49 @@
|
||||
use super::{InlineAsmArch, InlineAsmType};
|
||||
use rustc_macros::HashStable_Generic;
|
||||
|
||||
def_reg_class! {
|
||||
Nvptx NvptxInlineAsmRegClass {
|
||||
reg16,
|
||||
reg32,
|
||||
reg64,
|
||||
}
|
||||
}
|
||||
|
||||
impl NvptxInlineAsmRegClass {
|
||||
pub fn valid_modifiers(self, _arch: InlineAsmArch) -> &'static [char] {
|
||||
&[]
|
||||
}
|
||||
|
||||
pub fn suggest_class(self, _arch: InlineAsmArch, _ty: InlineAsmType) -> Option<Self> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn suggest_modifier(
|
||||
self,
|
||||
_arch: InlineAsmArch,
|
||||
_ty: InlineAsmType,
|
||||
) -> Option<(char, &'static str)> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn default_modifier(self, _arch: InlineAsmArch) -> Option<(char, &'static str)> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn supported_types(
|
||||
self,
|
||||
_arch: InlineAsmArch,
|
||||
) -> &'static [(InlineAsmType, Option<&'static str>)] {
|
||||
match self {
|
||||
Self::reg16 => types! { _: I8, I16; },
|
||||
Self::reg32 => types! { _: I8, I16, I32, F32; },
|
||||
Self::reg64 => types! { _: I8, I16, I32, F32, I64, F64; },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def_regs! {
|
||||
// Registers in PTX are declared in the assembly.
|
||||
// There are no predefined registers that one can use.
|
||||
Nvptx NvptxInlineAsmReg NvptxInlineAsmRegClass {}
|
||||
}
|
133
src/test/assembly/asm/nvptx-types.rs
Normal file
133
src/test/assembly/asm/nvptx-types.rs
Normal file
@ -0,0 +1,133 @@
|
||||
// no-system-llvm
|
||||
// assembly-output: emit-asm
|
||||
// compile-flags: --target nvptx64-nvidia-cuda
|
||||
// compile-flags: --crate-type cdylib
|
||||
|
||||
#![feature(no_core, lang_items, rustc_attrs)]
|
||||
#![no_core]
|
||||
|
||||
#[rustc_builtin_macro]
|
||||
macro_rules! asm {
|
||||
() => {};
|
||||
}
|
||||
#[rustc_builtin_macro]
|
||||
macro_rules! concat {
|
||||
() => {};
|
||||
}
|
||||
|
||||
#[lang = "sized"]
|
||||
trait Sized {}
|
||||
#[lang = "copy"]
|
||||
trait Copy {}
|
||||
|
||||
type ptr = *mut u8;
|
||||
|
||||
impl Copy for i8 {}
|
||||
impl Copy for i16 {}
|
||||
impl Copy for i32 {}
|
||||
impl Copy for f32 {}
|
||||
impl Copy for i64 {}
|
||||
impl Copy for f64 {}
|
||||
impl Copy for ptr {}
|
||||
|
||||
// NVPTX does not support static variables
|
||||
#[no_mangle]
|
||||
fn extern_func() {}
|
||||
|
||||
// CHECK-LABEL: .visible .func sym_fn()
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: call extern_func;
|
||||
// CHECK: // end inline asm
|
||||
#[no_mangle]
|
||||
pub unsafe fn sym_fn() {
|
||||
asm!("call {};", sym extern_func);
|
||||
}
|
||||
|
||||
macro_rules! check {
|
||||
($func:ident $ty:ident $class:ident $mov:literal) => {
|
||||
#[no_mangle]
|
||||
pub unsafe fn $func(x: $ty) -> $ty {
|
||||
let y;
|
||||
asm!(concat!($mov, " {}, {};"), out($class) y, in($class) x);
|
||||
y
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg16_i8
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i16 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg16_i8 i8 reg16 "mov.i16");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg16_i16
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i16 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg16_i16 i16 reg16 "mov.i16");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg32_i8
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i32 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg32_i8 i8 reg32 "mov.i32");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg32_i16
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i32 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg32_i16 i16 reg32 "mov.i32");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg32_i32
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i32 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg32_i32 i32 reg32 "mov.i32");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg32_f32
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i32 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg32_f32 f32 reg32 "mov.i32");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg64_i8
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i64 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg64_i8 i8 reg64 "mov.i64");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg64_i16
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i64 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg64_i16 i16 reg64 "mov.i64");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg64_i32
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i64 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg64_i32 i32 reg64 "mov.i64");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) reg64_f32
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i64 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg64_f32 f32 reg64 "mov.i64");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b64 func_retval0) reg64_i64
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i64 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg64_i64 i64 reg64 "mov.i64");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b64 func_retval0) reg64_f64
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i64 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg64_f64 f64 reg64 "mov.i64");
|
||||
|
||||
// CHECK-LABEL: .visible .func (.param .b64 func_retval0) reg64_ptr
|
||||
// CHECK: // begin inline asm
|
||||
// CHECK: mov.i64 %{{[a-z0-9]+}}, %{{[a-z0-9]+}};
|
||||
// CHECK: // end inline asm
|
||||
check!(reg64_ptr ptr reg64 "mov.i64");
|
Loading…
x
Reference in New Issue
Block a user