Implement AES-NI intrinsics using inline asm

This commit is contained in:
bjorn3 2023-11-11 20:59:15 +00:00
parent 6ef877c8c2
commit dc60334777
4 changed files with 198 additions and 5 deletions

View File

@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
args,
ret_place,
target,
source_info.span,
);
return;
}

View File

@ -645,8 +645,21 @@ fn save_register(
) {
match arch {
InlineAsmArch::X86_64 => {
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
match reg {
InlineAsmReg::X86(reg)
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
{
// rustc emits x0 rather than xmm0
write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
.unwrap();
}
_ => {
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
}
}
generated_asm.push('\n');
}
InlineAsmArch::AArch64 => {
@ -671,8 +684,24 @@ fn restore_register(
) {
match arch {
InlineAsmArch::X86_64 => {
generated_asm.push_str(" mov ");
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
match reg {
InlineAsmReg::X86(reg)
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
{
// rustc emits x0 rather than xmm0
write!(
generated_asm,
" movups xmm{}",
reg as u32 - X86InlineAsmReg::xmm0 as u32
)
.unwrap();
}
_ => {
generated_asm.push_str(" mov ");
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
}
}
writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
}
InlineAsmArch::AArch64 => {
@ -728,7 +757,12 @@ fn call_inline_asm<'tcx>(
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
for (offset, place) in outputs {
let ty = fx.clif_type(place.layout().ty).unwrap();
let ty = if place.layout().ty.is_simd() {
let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
} else {
fx.clif_type(place.layout().ty).unwrap()
};
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
fx,
ty,

View File

@ -12,6 +12,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args: &[mir::Operand<'tcx>],
ret: CPlace<'tcx>,
target: Option<BasicBlock>,
span: Span,
) {
if intrinsic.starts_with("llvm.aarch64") {
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
@ -31,6 +32,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args,
ret,
target,
span,
);
}

View File

@ -15,6 +15,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
args: &[mir::Operand<'tcx>],
ret: CPlace<'tcx>,
target: Option<BasicBlock>,
span: Span,
) {
match intrinsic {
"llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
@ -718,6 +719,7 @@ fn select4(
}
"llvm.x86.pclmulqdq" => {
// FIXME use inline asm
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
intrinsic_args!(fx, args => (a, b, imm8); intrinsic);
@ -779,6 +781,160 @@ fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value {
ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted());
}
"llvm.x86.aesni.aeskeygenassist" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261
intrinsic_args!(fx, args => (a, _imm8); intrinsic);
let a = a.load_scalar(fx);
let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1])
{
imm8
} else {
fx.tcx.sess.span_fatal(
span,
"Index argument for `_mm_aeskeygenassist_si128` is not a constant",
);
};
let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))],
&[CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
}],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesimc" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260
intrinsic_args!(fx, args => (a); intrinsic);
let a = a.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())],
&[CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
}],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesenc" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesenclast" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesdec" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.aesni.aesdeclast" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247
intrinsic_args!(fx, args => (a, round_key); intrinsic);
let a = a.load_scalar(fx);
let round_key = round_key.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
value: round_key,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.avx.ptestz.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256&ig_expand=6945
intrinsic_args!(fx, args => (a, b); intrinsic);