fix simd_bitmask return type for non-power-of-two inputs, and add tests
This commit is contained in:
parent
6896fa6619
commit
e9dd39cda4
@ -1121,8 +1121,8 @@ macro_rules! require_simd {
|
|||||||
if name == sym::simd_select_bitmask {
|
if name == sym::simd_select_bitmask {
|
||||||
let (len, _) = require_simd!(arg_tys[1], SimdArgument);
|
let (len, _) = require_simd!(arg_tys[1], SimdArgument);
|
||||||
|
|
||||||
let expected_int_bits = (len.max(8) - 1).next_power_of_two();
|
let expected_int_bits = len.max(8).next_power_of_two();
|
||||||
let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
|
let expected_bytes = len.div_ceil(8);
|
||||||
|
|
||||||
let mask_ty = arg_tys[0];
|
let mask_ty = arg_tys[0];
|
||||||
let mask = match mask_ty.kind() {
|
let mask = match mask_ty.kind() {
|
||||||
@ -1379,17 +1379,16 @@ macro_rules! require_simd {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if name == sym::simd_bitmask {
|
if name == sym::simd_bitmask {
|
||||||
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
|
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a vector mask and
|
||||||
// vector mask and returns the most significant bit (MSB) of each lane in the form
|
// returns one bit for each lane (which must all be `0` or `!0`) in the form of either:
|
||||||
// of either:
|
|
||||||
// * an unsigned integer
|
// * an unsigned integer
|
||||||
// * an array of `u8`
|
// * an array of `u8`
|
||||||
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
|
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
|
||||||
//
|
//
|
||||||
// The bit order of the result depends on the byte endianness, LSB-first for little
|
// The bit order of the result depends on the byte endianness, LSB-first for little
|
||||||
// endian and MSB-first for big endian.
|
// endian and MSB-first for big endian.
|
||||||
let expected_int_bits = in_len.max(8);
|
let expected_int_bits = in_len.max(8).next_power_of_two();
|
||||||
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
|
let expected_bytes = in_len.div_ceil(8);
|
||||||
|
|
||||||
// Integer vector <i{in_bitwidth} x in_len>:
|
// Integer vector <i{in_bitwidth} x in_len>:
|
||||||
let (i_xn, in_elem_bitwidth) = match in_elem.kind() {
|
let (i_xn, in_elem_bitwidth) = match in_elem.kind() {
|
||||||
@ -1409,7 +1408,8 @@ macro_rules! require_simd {
|
|||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
|
// LLVM doesn't always know the inputs are `0` or `!0`, so we shift here so it optimizes to
|
||||||
|
// `pmovmskb` and similar on x86.
|
||||||
let shift_indices =
|
let shift_indices =
|
||||||
vec![
|
vec![
|
||||||
bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
|
bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
|
||||||
|
90
tests/ui/simd/simd-bitmask-notpow2.rs
Normal file
90
tests/ui/simd/simd-bitmask-notpow2.rs
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
//@run-pass
|
||||||
|
// SEGFAULTS on LLVM 17. This should be merged into `simd-bitmask` once we require LLVM 18.
|
||||||
|
//@ min-llvm-version: 18
|
||||||
|
// FIXME: broken codegen on big-endian (https://github.com/rust-lang/rust/issues/127205)
|
||||||
|
//@ ignore-endian-big
|
||||||
|
#![feature(repr_simd, intrinsics)]
|
||||||
|
|
||||||
|
extern "rust-intrinsic" {
|
||||||
|
fn simd_bitmask<T, U>(v: T) -> U;
|
||||||
|
fn simd_select_bitmask<T, U>(m: T, a: U, b: U) -> U;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Non-power-of-2 multi-byte mask.
|
||||||
|
#[repr(simd, packed)]
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||||
|
struct i32x10([i32; 10]);
|
||||||
|
impl i32x10 {
|
||||||
|
fn splat(x: i32) -> Self {
|
||||||
|
Self([x; 10])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
let mask = i32x10([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]);
|
||||||
|
let mask_bits = if cfg!(target_endian = "little") { 0b0101001011 } else { 0b1101001010 };
|
||||||
|
let mask_bytes =
|
||||||
|
if cfg!(target_endian = "little") { [0b01001011, 0b01] } else { [0b11, 0b01001010] };
|
||||||
|
|
||||||
|
let bitmask1: u16 = simd_bitmask(mask);
|
||||||
|
let bitmask2: [u8; 2] = simd_bitmask(mask);
|
||||||
|
assert_eq!(bitmask1, mask_bits);
|
||||||
|
assert_eq!(bitmask2, mask_bytes);
|
||||||
|
|
||||||
|
let selected1 = simd_select_bitmask::<u16, _>(
|
||||||
|
mask_bits,
|
||||||
|
i32x10::splat(!0), // yes
|
||||||
|
i32x10::splat(0), // no
|
||||||
|
);
|
||||||
|
let selected2 = simd_select_bitmask::<[u8; 2], _>(
|
||||||
|
mask_bytes,
|
||||||
|
i32x10::splat(!0), // yes
|
||||||
|
i32x10::splat(0), // no
|
||||||
|
);
|
||||||
|
assert_eq!(selected1, mask);
|
||||||
|
assert_eq!(selected2, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test for a mask where the next multiple of 8 is not a power of two.
|
||||||
|
#[repr(simd, packed)]
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||||
|
struct i32x20([i32; 20]);
|
||||||
|
impl i32x20 {
|
||||||
|
fn splat(x: i32) -> Self {
|
||||||
|
Self([x; 20])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
let mask = i32x20([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0, 0, 0, 0, !0, !0, !0, !0, !0, !0, !0]);
|
||||||
|
let mask_bits = if cfg!(target_endian = "little") {
|
||||||
|
0b11111110000101001011
|
||||||
|
} else {
|
||||||
|
0b11010010100001111111
|
||||||
|
};
|
||||||
|
let mask_bytes = if cfg!(target_endian = "little") {
|
||||||
|
[0b01001011, 0b11100001, 0b1111]
|
||||||
|
} else {
|
||||||
|
[0b1101, 0b00101000, 0b01111111]
|
||||||
|
};
|
||||||
|
|
||||||
|
let bitmask1: u32 = simd_bitmask(mask);
|
||||||
|
let bitmask2: [u8; 3] = simd_bitmask(mask);
|
||||||
|
assert_eq!(bitmask1, mask_bits);
|
||||||
|
assert_eq!(bitmask2, mask_bytes);
|
||||||
|
|
||||||
|
let selected1 = simd_select_bitmask::<u32, _>(
|
||||||
|
mask_bits,
|
||||||
|
i32x20::splat(!0), // yes
|
||||||
|
i32x20::splat(0), // no
|
||||||
|
);
|
||||||
|
let selected2 = simd_select_bitmask::<[u8; 3], _>(
|
||||||
|
mask_bytes,
|
||||||
|
i32x20::splat(!0), // yes
|
||||||
|
i32x20::splat(0), // no
|
||||||
|
);
|
||||||
|
assert_eq!(selected1, mask);
|
||||||
|
assert_eq!(selected2, mask);
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,4 @@
|
|||||||
//@run-pass
|
//@run-pass
|
||||||
//@ignore-endian-big behavior of simd_select_bitmask is endian-specific
|
|
||||||
#![feature(repr_simd, intrinsics)]
|
#![feature(repr_simd, intrinsics)]
|
||||||
|
|
||||||
extern "rust-intrinsic" {
|
extern "rust-intrinsic" {
|
||||||
@ -17,36 +16,58 @@ fn main() {
|
|||||||
let i: u8 = simd_bitmask(v);
|
let i: u8 = simd_bitmask(v);
|
||||||
let a: [u8; 1] = simd_bitmask(v);
|
let a: [u8; 1] = simd_bitmask(v);
|
||||||
|
|
||||||
assert_eq!(i, 0b0101);
|
if cfg!(target_endian = "little") {
|
||||||
assert_eq!(a, [0b0101]);
|
assert_eq!(i, 0b0101);
|
||||||
|
assert_eq!(a, [0b0101]);
|
||||||
|
} else {
|
||||||
|
assert_eq!(i, 0b1010);
|
||||||
|
assert_eq!(a, [0b1010]);
|
||||||
|
}
|
||||||
|
|
||||||
let v = Simd::<i8, 16>([0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0]);
|
let v = Simd::<i8, 16>([0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0]);
|
||||||
let i: u16 = simd_bitmask(v);
|
let i: u16 = simd_bitmask(v);
|
||||||
let a: [u8; 2] = simd_bitmask(v);
|
let a: [u8; 2] = simd_bitmask(v);
|
||||||
|
|
||||||
assert_eq!(i, 0b0101000000001100);
|
if cfg!(target_endian = "little") {
|
||||||
assert_eq!(a, [0b1100, 0b01010000]);
|
assert_eq!(i, 0b0101000000001100);
|
||||||
|
assert_eq!(a, [0b00001100, 0b01010000]);
|
||||||
|
} else {
|
||||||
|
assert_eq!(i, 0b0011000000001010);
|
||||||
|
assert_eq!(a, [0b00110000, 0b00001010]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
let a = Simd::<i32, 8>([0, 1, 2, 3, 4, 5, 6, 7]);
|
let a = Simd::<i32, 4>([0, 1, 2, 3]);
|
||||||
let b = Simd::<i32, 8>([8, 9, 10, 11, 12, 13, 14, 15]);
|
let b = Simd::<i32, 4>([8, 9, 10, 11]);
|
||||||
let e = [0, 9, 2, 11, 12, 13, 14, 15];
|
let e = [0, 9, 2, 11];
|
||||||
|
|
||||||
let r = simd_select_bitmask(0b0101u8, a, b);
|
let mask = if cfg!(target_endian = "little") { 0b0101u8 } else { 0b1010u8 };
|
||||||
|
let r = simd_select_bitmask(mask, a, b);
|
||||||
assert_eq!(r.0, e);
|
assert_eq!(r.0, e);
|
||||||
|
|
||||||
let r = simd_select_bitmask([0b0101u8], a, b);
|
let mask = if cfg!(target_endian = "little") { [0b0101u8] } else { [0b1010u8] };
|
||||||
|
let r = simd_select_bitmask(mask, a, b);
|
||||||
assert_eq!(r.0, e);
|
assert_eq!(r.0, e);
|
||||||
|
|
||||||
let a = Simd::<i32, 16>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
|
let a = Simd::<i32, 16>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
|
||||||
let b = Simd::<i32, 16>([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
|
let b = Simd::<i32, 16>([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
|
||||||
let e = [16, 17, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 29, 14, 31];
|
let e = [16, 17, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 29, 14, 31];
|
||||||
|
|
||||||
let r = simd_select_bitmask(0b0101000000001100u16, a, b);
|
let mask = if cfg!(target_endian = "little") {
|
||||||
|
0b0101000000001100u16
|
||||||
|
} else {
|
||||||
|
0b0011000000001010u16
|
||||||
|
};
|
||||||
|
let r = simd_select_bitmask(mask, a, b);
|
||||||
assert_eq!(r.0, e);
|
assert_eq!(r.0, e);
|
||||||
|
|
||||||
let r = simd_select_bitmask([0b1100u8, 0b01010000u8], a, b);
|
let mask = if cfg!(target_endian = "little") {
|
||||||
|
[0b00001100u8, 0b01010000u8]
|
||||||
|
} else {
|
||||||
|
[0b00110000u8, 0b00001010u8]
|
||||||
|
};
|
||||||
|
let r = simd_select_bitmask(mask, a, b);
|
||||||
assert_eq!(r.0, e);
|
assert_eq!(r.0, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user