aarch64 neon intrinsics: vmaxq_f32, vminq_f32, vaddvq_f32, vrndnq_f32 (#1533)
This commit is contained in:
parent
5349365566
commit
753271ce7e
@ -202,6 +202,44 @@ unsafe fn test_vqadd_u8() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
unsafe fn test_vmaxq_f32() {
|
||||
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmax.v4f32
|
||||
let a = f32x4::from([0., -1., 2., -3.]);
|
||||
let b = f32x4::from([-4., 5., -6., 7.]);
|
||||
let e = f32x4::from([0., 5., 2., 7.]);
|
||||
let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
unsafe fn test_vminq_f32() {
|
||||
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmin.v4f32
|
||||
let a = f32x4::from([0., -1., 2., -3.]);
|
||||
let b = f32x4::from([-4., 5., -6., 7.]);
|
||||
let e = f32x4::from([-4., -1., -6., -3.]);
|
||||
let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
unsafe fn test_vaddvq_f32() {
|
||||
// AArch64 llvm intrinsic: llvm.aarch64.neon.faddv.f32.v4f32
|
||||
let a = f32x4::from([0., 1., 2., 3.]);
|
||||
let e = 6f32;
|
||||
let r = vaddvq_f32(transmute(a));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
unsafe fn test_vrndnq_f32() {
|
||||
// AArch64 llvm intrinsic: llvm.aarch64.neon.frintn.v4f32
|
||||
let a = f32x4::from([0.1, -1.9, 4.5, 5.5]);
|
||||
let e = f32x4::from([0., -2., 4., 6.]);
|
||||
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
fn main() {
|
||||
unsafe {
|
||||
@ -229,6 +267,11 @@ fn main() {
|
||||
|
||||
test_vqsub_u8();
|
||||
test_vqadd_u8();
|
||||
|
||||
test_vmaxq_f32();
|
||||
test_vminq_f32();
|
||||
test_vaddvq_f32();
|
||||
test_vrndnq_f32();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,44 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
|
||||
);
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.aarch64.neon.fmax.v") => {
|
||||
intrinsic_args!(fx, args => (x, y); intrinsic);
|
||||
|
||||
simd_pair_for_each_lane(
|
||||
fx,
|
||||
x,
|
||||
y,
|
||||
ret,
|
||||
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane),
|
||||
);
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.aarch64.neon.fmin.v") => {
|
||||
intrinsic_args!(fx, args => (x, y); intrinsic);
|
||||
|
||||
simd_pair_for_each_lane(
|
||||
fx,
|
||||
x,
|
||||
y,
|
||||
ret,
|
||||
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane),
|
||||
);
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
|
||||
intrinsic_args!(fx, args => (v); intrinsic);
|
||||
|
||||
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().fadd(a, b));
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.aarch64.neon.frintn.v") => {
|
||||
intrinsic_args!(fx, args => (v); intrinsic);
|
||||
|
||||
simd_for_each_lane(fx, v, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
|
||||
fx.bcx.ins().nearest(lane)
|
||||
});
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => {
|
||||
intrinsic_args!(fx, args => (v); intrinsic);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user