Implement _mm_shuffle_epi8
This commit is contained in:
parent
e4d0811360
commit
c09ef96878
@ -197,6 +197,7 @@ unsafe fn test_simd() {
|
||||
|
||||
test_mm_extract_epi8();
|
||||
test_mm_insert_epi16();
|
||||
test_mm_shuffle_epi8();
|
||||
|
||||
test_mm256_shuffle_epi8();
|
||||
test_mm256_permute2x128_si256();
|
||||
@ -345,6 +346,26 @@ unsafe fn test_mm_insert_epi16() {
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
unsafe fn test_mm_shuffle_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm_setr_epi8(
|
||||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16,
|
||||
);
|
||||
#[rustfmt::skip]
|
||||
let b = _mm_setr_epi8(
|
||||
4, 128_u8 as i8, 4, 3,
|
||||
24, 12, 6, 19,
|
||||
12, 5, 5, 10,
|
||||
4, 1, 8, 0,
|
||||
);
|
||||
let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
|
||||
let r = _mm_shuffle_epi8(a, b);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn test_mm256_shuffle_epi8() {
|
||||
|
@ -222,7 +222,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
_ => fx.bcx.ins().iconst(types::I32, 0),
|
||||
});
|
||||
}
|
||||
"llvm.x86.avx2.pshuf.b" => {
|
||||
"llvm.x86.ssse3.pshuf.b.128" | "llvm.x86.avx2.pshuf.b" => {
|
||||
let (a, b) = match args {
|
||||
[a, b] => (a, b),
|
||||
_ => bug!("wrong number of args for intrinsic {intrinsic}"),
|
||||
@ -241,15 +241,18 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
let res = fx.bcx.ins().select(is_zero, zero, a_lane);
|
||||
ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
|
||||
}
|
||||
for i in 16..32 {
|
||||
let b_lane = b.value_lane(fx, i).load_scalar(fx);
|
||||
let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
|
||||
let b_lane_masked = fx.bcx.ins().band_imm(b_lane, 0xf);
|
||||
let a_idx = fx.bcx.ins().iadd_imm(b_lane_masked, 16);
|
||||
let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
|
||||
let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
|
||||
let res = fx.bcx.ins().select(is_zero, zero, a_lane);
|
||||
ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
|
||||
|
||||
if intrinsic == "llvm.x86.avx2.pshuf.b" {
|
||||
for i in 16..32 {
|
||||
let b_lane = b.value_lane(fx, i).load_scalar(fx);
|
||||
let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
|
||||
let b_lane_masked = fx.bcx.ins().band_imm(b_lane, 0xf);
|
||||
let a_idx = fx.bcx.ins().iadd_imm(b_lane_masked, 16);
|
||||
let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
|
||||
let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
|
||||
let res = fx.bcx.ins().select(is_zero, zero, a_lane);
|
||||
ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
|
||||
}
|
||||
}
|
||||
}
|
||||
"llvm.x86.avx2.vperm2i128" => {
|
||||
|
Loading…
x
Reference in New Issue
Block a user