diff --git a/example/std_example.rs b/example/std_example.rs index 90d4ab721da..0e1004420dd 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -244,6 +244,7 @@ unsafe fn test_simd() { test_mm256_shuffle_epi8(); test_mm256_permute2x128_si256(); + test_mm256_permutevar8x32_epi32(); #[rustfmt::skip] let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); @@ -447,6 +448,16 @@ unsafe fn test_mm256_permute2x128_si256() { assert_eq_m256i(r, e); } +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx2")] +unsafe fn test_mm256_permutevar8x32_epi32() { + let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800); + let idx = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_setr_epi32(800, 700, 600, 500, 400, 300, 200, 100); + let e = _mm256_permutevar8x32_epi32(a, idx); + assert_eq_m256i(r, e); +} + fn test_checked_mul() { let u: Option = u8::from_str_radix("1000", 10).ok(); assert_eq!(u, None); diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 71e06c73a37..75e9e16db55 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -374,6 +374,21 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } } + "llvm.x86.avx2.permd" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32 + intrinsic_args!(fx, args => (a, idx); intrinsic); + + for j in 0..=7 { + let index = idx.value_typed_lane(fx, fx.tcx.types.u32, j).load_scalar(fx); + let index = fx.bcx.ins().uextend(fx.pointer_type, index); + let value = a.value_lane_dyn(fx, index).load_scalar(fx); + ret.place_typed_lane(fx, fx.tcx.types.u32, j).to_ptr().store( + fx, + value, + MemFlags::trusted(), + ); + } + } "llvm.x86.avx2.vperm2i128" | "llvm.x86.avx.vperm2f128.ps.256" | "llvm.x86.avx.vperm2f128.pd.256" => {