Implement AES-NI intrinsics using inline asm
This commit is contained in:
parent
6ef877c8c2
commit
dc60334777
@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
|
||||
args,
|
||||
ret_place,
|
||||
target,
|
||||
source_info.span,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
@ -645,8 +645,21 @@ fn save_register(
|
||||
) {
|
||||
match arch {
|
||||
InlineAsmArch::X86_64 => {
|
||||
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
|
||||
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
|
||||
match reg {
|
||||
InlineAsmReg::X86(reg)
|
||||
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
|
||||
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
|
||||
{
|
||||
// rustc emits x0 rather than xmm0
|
||||
write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
|
||||
write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
|
||||
.unwrap();
|
||||
}
|
||||
_ => {
|
||||
write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
|
||||
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
|
||||
}
|
||||
}
|
||||
generated_asm.push('\n');
|
||||
}
|
||||
InlineAsmArch::AArch64 => {
|
||||
@ -671,8 +684,24 @@ fn restore_register(
|
||||
) {
|
||||
match arch {
|
||||
InlineAsmArch::X86_64 => {
|
||||
generated_asm.push_str(" mov ");
|
||||
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
|
||||
match reg {
|
||||
InlineAsmReg::X86(reg)
|
||||
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
|
||||
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
|
||||
{
|
||||
// rustc emits x0 rather than xmm0
|
||||
write!(
|
||||
generated_asm,
|
||||
" movups xmm{}",
|
||||
reg as u32 - X86InlineAsmReg::xmm0 as u32
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
_ => {
|
||||
generated_asm.push_str(" mov ");
|
||||
reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
|
||||
}
|
||||
}
|
||||
writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
|
||||
}
|
||||
InlineAsmArch::AArch64 => {
|
||||
@ -728,7 +757,12 @@ fn call_inline_asm<'tcx>(
|
||||
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
|
||||
|
||||
for (offset, place) in outputs {
|
||||
let ty = fx.clif_type(place.layout().ty).unwrap();
|
||||
let ty = if place.layout().ty.is_simd() {
|
||||
let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
|
||||
fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
|
||||
} else {
|
||||
fx.clif_type(place.layout().ty).unwrap()
|
||||
};
|
||||
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
|
||||
fx,
|
||||
ty,
|
||||
|
@ -12,6 +12,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
|
||||
args: &[mir::Operand<'tcx>],
|
||||
ret: CPlace<'tcx>,
|
||||
target: Option<BasicBlock>,
|
||||
span: Span,
|
||||
) {
|
||||
if intrinsic.starts_with("llvm.aarch64") {
|
||||
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
|
||||
@ -31,6 +32,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
|
||||
args,
|
||||
ret,
|
||||
target,
|
||||
span,
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
args: &[mir::Operand<'tcx>],
|
||||
ret: CPlace<'tcx>,
|
||||
target: Option<BasicBlock>,
|
||||
span: Span,
|
||||
) {
|
||||
match intrinsic {
|
||||
"llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
|
||||
@ -718,6 +719,7 @@ fn select4(
|
||||
}
|
||||
|
||||
"llvm.x86.pclmulqdq" => {
|
||||
// FIXME use inline asm
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
|
||||
intrinsic_args!(fx, args => (a, b, imm8); intrinsic);
|
||||
|
||||
@ -779,6 +781,160 @@ fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value {
|
||||
ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted());
|
||||
}
|
||||
|
||||
"llvm.x86.aesni.aeskeygenassist" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261
|
||||
intrinsic_args!(fx, args => (a, _imm8); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
|
||||
let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1])
|
||||
{
|
||||
imm8
|
||||
} else {
|
||||
fx.tcx.sess.span_fatal(
|
||||
span,
|
||||
"Index argument for `_mm_aeskeygenassist_si128` is not a constant",
|
||||
);
|
||||
};
|
||||
|
||||
let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))],
|
||||
&[CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
}],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.aesni.aesimc" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260
|
||||
intrinsic_args!(fx, args => (a); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())],
|
||||
&[CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
}],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.aesni.aesenc" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252
|
||||
intrinsic_args!(fx, args => (a, round_key); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let round_key = round_key.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
value: round_key,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.aesni.aesenclast" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257
|
||||
intrinsic_args!(fx, args => (a, round_key); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let round_key = round_key.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
value: round_key,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.aesni.aesdec" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242
|
||||
intrinsic_args!(fx, args => (a, round_key); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let round_key = round_key.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
value: round_key,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.aesni.aesdeclast" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247
|
||||
intrinsic_args!(fx, args => (a, round_key); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let round_key = round_key.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
value: round_key,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.avx.ptestz.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256&ig_expand=6945
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
Loading…
Reference in New Issue
Block a user