aarch64 neon intrinsics: vmaxq_f32, vminq_f32, vaddvq_f32, vrndnq_f32 (#1533)

This commit is contained in:
Turki Al-Marri 2024-09-19 19:11:24 +03:00 committed by GitHub
parent 5349365566
commit 753271ce7e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 81 additions and 0 deletions

View File

@ -202,6 +202,44 @@ unsafe fn test_vqadd_u8() {
assert_eq!(r, e);
}
#[cfg(target_arch = "aarch64")]
unsafe fn test_vmaxq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmax.v4f32
let a = f32x4::from([0., -1., 2., -3.]);
let b = f32x4::from([-4., 5., -6., 7.]);
let e = f32x4::from([0., 5., 2., 7.]);
let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[cfg(target_arch = "aarch64")]
unsafe fn test_vminq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmin.v4f32
let a = f32x4::from([0., -1., 2., -3.]);
let b = f32x4::from([-4., 5., -6., 7.]);
let e = f32x4::from([-4., -1., -6., -3.]);
let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[cfg(target_arch = "aarch64")]
unsafe fn test_vaddvq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.faddv.f32.v4f32
let a = f32x4::from([0., 1., 2., 3.]);
let e = 6f32;
let r = vaddvq_f32(transmute(a));
assert_eq!(r, e);
}
#[cfg(target_arch = "aarch64")]
unsafe fn test_vrndnq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.frintn.v4f32
let a = f32x4::from([0.1, -1.9, 4.5, 5.5]);
let e = f32x4::from([0., -2., 4., 6.]);
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
assert_eq!(r, e);
}
#[cfg(target_arch = "aarch64")]
fn main() {
unsafe {
@ -229,6 +267,11 @@ fn main() {
test_vqsub_u8();
test_vqadd_u8();
test_vmaxq_f32();
test_vminq_f32();
test_vaddvq_f32();
test_vrndnq_f32();
}
}

View File

@ -91,6 +91,44 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
);
}
_ if intrinsic.starts_with("llvm.aarch64.neon.fmax.v") => {
intrinsic_args!(fx, args => (x, y); intrinsic);
simd_pair_for_each_lane(
fx,
x,
y,
ret,
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane),
);
}
_ if intrinsic.starts_with("llvm.aarch64.neon.fmin.v") => {
intrinsic_args!(fx, args => (x, y); intrinsic);
simd_pair_for_each_lane(
fx,
x,
y,
ret,
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane),
);
}
_ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
intrinsic_args!(fx, args => (v); intrinsic);
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().fadd(a, b));
}
_ if intrinsic.starts_with("llvm.aarch64.neon.frintn.v") => {
intrinsic_args!(fx, args => (v); intrinsic);
simd_for_each_lane(fx, v, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
fx.bcx.ins().nearest(lane)
});
}
_ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => {
intrinsic_args!(fx, args => (v); intrinsic);