Merge pull request #1491 from folkertdev/add-llvm-avx2-permd

add `llvm.x86.avx2.permd` intrinsic
This commit is contained in:
bjorn3 2024-05-11 22:11:53 +02:00 committed by GitHub
commit 893ba536bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 0 deletions

View File

@ -244,6 +244,7 @@ unsafe fn test_simd() {
test_mm256_shuffle_epi8();
test_mm256_permute2x128_si256();
test_mm256_permutevar8x32_epi32();
#[rustfmt::skip]
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
@ -447,6 +448,16 @@ unsafe fn test_mm256_permute2x128_si256() {
assert_eq_m256i(r, e);
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn test_mm256_permutevar8x32_epi32() {
let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
let idx = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_setr_epi32(800, 700, 600, 500, 400, 300, 200, 100);
let e = _mm256_permutevar8x32_epi32(a, idx);
assert_eq_m256i(r, e);
}
fn test_checked_mul() {
let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
assert_eq!(u, None);

View File

@ -374,6 +374,21 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
}
}
}
"llvm.x86.avx2.permd" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32
intrinsic_args!(fx, args => (a, idx); intrinsic);
for j in 0..=7 {
let index = idx.value_typed_lane(fx, fx.tcx.types.u32, j).load_scalar(fx);
let index = fx.bcx.ins().uextend(fx.pointer_type, index);
let value = a.value_lane_dyn(fx, index).load_scalar(fx);
ret.place_typed_lane(fx, fx.tcx.types.u32, j).to_ptr().store(
fx,
value,
MemFlags::trusted(),
);
}
}
"llvm.x86.avx2.vperm2i128"
| "llvm.x86.avx.vperm2f128.ps.256"
| "llvm.x86.avx.vperm2f128.pd.256" => {