portable-simd#261: Rename horizontal_* to reduce_*
This commit is contained in:
commit
72df4c4505
@ -33,7 +33,7 @@ SIMD has a few special vocabulary terms you should know:
|
|||||||
|
|
||||||
* **Vertical:** When an operation is "vertical", each lane processes individually without regard to the other lanes in the same vector. For example, a "vertical add" between two vectors would add lane 0 in `a` with lane 0 in `b`, with the total in lane 0 of `out`, and then the same thing for lanes 1, 2, etc. Most SIMD operations are vertical operations, so if your problem is a vertical problem then you can probably solve it with SIMD.
|
* **Vertical:** When an operation is "vertical", each lane processes individually without regard to the other lanes in the same vector. For example, a "vertical add" between two vectors would add lane 0 in `a` with lane 0 in `b`, with the total in lane 0 of `out`, and then the same thing for lanes 1, 2, etc. Most SIMD operations are vertical operations, so if your problem is a vertical problem then you can probably solve it with SIMD.
|
||||||
|
|
||||||
* **Horizontal:** When an operation is "horizontal", the lanes within a single vector interact in some way. A "horizontal add" might add up lane 0 of `a` with lane 1 of `a`, with the total in lane 0 of `out`.
|
* **Reducing/Reduce:** When an operation is "reducing" (functions named `reduce_*`), the lanes within a single vector are merged using some operation such as addition, returning the merged value as a scalar. For instance, a reducing add would return the sum of all the lanes' values.
|
||||||
|
|
||||||
* **Target Feature:** Rust calls a CPU architecture extension a `target_feature`. Proper SIMD requires various CPU extensions to be enabled (details below). Don't confuse this with `feature`, which is a Cargo crate concept.
|
* **Target Feature:** Rust calls a CPU architecture extension a `target_feature`. Proper SIMD requires various CPU extensions to be enabled (details below). Don't confuse this with `feature`, which is a Cargo crate concept.
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
|
|||||||
let det = det.rotate_lanes_right::<2>() + det;
|
let det = det.rotate_lanes_right::<2>() + det;
|
||||||
let det = det.reverse().rotate_lanes_right::<2>() + det;
|
let det = det.reverse().rotate_lanes_right::<2>() + det;
|
||||||
|
|
||||||
if det.horizontal_sum() == 0. {
|
if det.reduce_sum() == 0. {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
// calculate the reciprocal
|
// calculate the reciprocal
|
||||||
|
@ -107,10 +107,10 @@ mod nbody {
|
|||||||
let mut e = 0.;
|
let mut e = 0.;
|
||||||
for i in 0..N_BODIES {
|
for i in 0..N_BODIES {
|
||||||
let bi = &bodies[i];
|
let bi = &bodies[i];
|
||||||
e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5;
|
e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5;
|
||||||
for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
|
for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
|
||||||
let dx = bi.x - bj.x;
|
let dx = bi.x - bj.x;
|
||||||
e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt()
|
e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
e
|
e
|
||||||
@ -134,8 +134,8 @@ mod nbody {
|
|||||||
let mut mag = [0.0; N];
|
let mut mag = [0.0; N];
|
||||||
for i in (0..N).step_by(2) {
|
for i in (0..N).step_by(2) {
|
||||||
let d2s = f64x2::from_array([
|
let d2s = f64x2::from_array([
|
||||||
(r[i] * r[i]).horizontal_sum(),
|
(r[i] * r[i]).reduce_sum(),
|
||||||
(r[i + 1] * r[i + 1]).horizontal_sum(),
|
(r[i + 1] * r[i + 1]).reduce_sum(),
|
||||||
]);
|
]);
|
||||||
let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
|
let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
|
||||||
mag[i] = dmags[0];
|
mag[i] = dmags[0];
|
||||||
|
@ -20,7 +20,7 @@ fn mult_av(v: &[f64], out: &mut [f64]) {
|
|||||||
sum += b / a;
|
sum += b / a;
|
||||||
j += 2
|
j += 2
|
||||||
}
|
}
|
||||||
*out = sum.horizontal_sum();
|
*out = sum.reduce_sum();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,7 +38,7 @@ fn mult_atv(v: &[f64], out: &mut [f64]) {
|
|||||||
sum += b / a;
|
sum += b / a;
|
||||||
j += 2
|
j += 2
|
||||||
}
|
}
|
||||||
*out = sum.horizontal_sum();
|
*out = sum.reduce_sum();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,30 +11,30 @@ macro_rules! impl_integer_reductions {
|
|||||||
where
|
where
|
||||||
LaneCount<LANES>: SupportedLaneCount,
|
LaneCount<LANES>: SupportedLaneCount,
|
||||||
{
|
{
|
||||||
/// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition.
|
/// Reducing wrapping add. Returns the sum of the lanes of the vector, with wrapping addition.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_sum(self) -> $scalar {
|
pub fn reduce_sum(self) -> $scalar {
|
||||||
// Safety: `self` is an integer vector
|
// Safety: `self` is an integer vector
|
||||||
unsafe { simd_reduce_add_ordered(self, 0) }
|
unsafe { simd_reduce_add_ordered(self, 0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication.
|
/// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_product(self) -> $scalar {
|
pub fn reduce_product(self) -> $scalar {
|
||||||
// Safety: `self` is an integer vector
|
// Safety: `self` is an integer vector
|
||||||
unsafe { simd_reduce_mul_ordered(self, 1) }
|
unsafe { simd_reduce_mul_ordered(self, 1) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Horizontal maximum. Returns the maximum lane in the vector.
|
/// Reducing maximum. Returns the maximum lane in the vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_max(self) -> $scalar {
|
pub fn reduce_max(self) -> $scalar {
|
||||||
// Safety: `self` is an integer vector
|
// Safety: `self` is an integer vector
|
||||||
unsafe { simd_reduce_max(self) }
|
unsafe { simd_reduce_max(self) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Horizontal minimum. Returns the minimum lane in the vector.
|
/// Reducing minimum. Returns the minimum lane in the vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_min(self) -> $scalar {
|
pub fn reduce_min(self) -> $scalar {
|
||||||
// Safety: `self` is an integer vector
|
// Safety: `self` is an integer vector
|
||||||
unsafe { simd_reduce_min(self) }
|
unsafe { simd_reduce_min(self) }
|
||||||
}
|
}
|
||||||
@ -60,9 +60,9 @@ macro_rules! impl_float_reductions {
|
|||||||
LaneCount<LANES>: SupportedLaneCount,
|
LaneCount<LANES>: SupportedLaneCount,
|
||||||
{
|
{
|
||||||
|
|
||||||
/// Horizontal add. Returns the sum of the lanes of the vector.
|
/// Reducing add. Returns the sum of the lanes of the vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_sum(self) -> $scalar {
|
pub fn reduce_sum(self) -> $scalar {
|
||||||
// LLVM sum is inaccurate on i586
|
// LLVM sum is inaccurate on i586
|
||||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
||||||
self.as_array().iter().sum()
|
self.as_array().iter().sum()
|
||||||
@ -72,9 +72,9 @@ macro_rules! impl_float_reductions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Horizontal multiply. Returns the product of the lanes of the vector.
|
/// Reducing multiply. Returns the product of the lanes of the vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_product(self) -> $scalar {
|
pub fn reduce_product(self) -> $scalar {
|
||||||
// LLVM product is inaccurate on i586
|
// LLVM product is inaccurate on i586
|
||||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
||||||
self.as_array().iter().product()
|
self.as_array().iter().product()
|
||||||
@ -84,22 +84,22 @@ macro_rules! impl_float_reductions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Horizontal maximum. Returns the maximum lane in the vector.
|
/// Reducing maximum. Returns the maximum lane in the vector.
|
||||||
///
|
///
|
||||||
/// Returns values based on equality, so a vector containing both `0.` and `-0.` may
|
/// Returns values based on equality, so a vector containing both `0.` and `-0.` may
|
||||||
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
|
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_max(self) -> $scalar {
|
pub fn reduce_max(self) -> $scalar {
|
||||||
// Safety: `self` is a float vector
|
// Safety: `self` is a float vector
|
||||||
unsafe { simd_reduce_max(self) }
|
unsafe { simd_reduce_max(self) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Horizontal minimum. Returns the minimum lane in the vector.
|
/// Reducing minimum. Returns the minimum lane in the vector.
|
||||||
///
|
///
|
||||||
/// Returns values based on equality, so a vector containing both `0.` and `-0.` may
|
/// Returns values based on equality, so a vector containing both `0.` and `-0.` may
|
||||||
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
|
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_min(self) -> $scalar {
|
pub fn reduce_min(self) -> $scalar {
|
||||||
// Safety: `self` is a float vector
|
// Safety: `self` is a float vector
|
||||||
unsafe { simd_reduce_min(self) }
|
unsafe { simd_reduce_min(self) }
|
||||||
}
|
}
|
||||||
@ -116,10 +116,10 @@ where
|
|||||||
T: SimdElement + BitAnd<T, Output = T>,
|
T: SimdElement + BitAnd<T, Output = T>,
|
||||||
LaneCount<LANES>: SupportedLaneCount,
|
LaneCount<LANES>: SupportedLaneCount,
|
||||||
{
|
{
|
||||||
/// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of
|
/// Reducing bitwise "and". Returns the cumulative bitwise "and" across the lanes of
|
||||||
/// the vector.
|
/// the vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_and(self) -> T {
|
pub fn reduce_and(self) -> T {
|
||||||
unsafe { simd_reduce_and(self) }
|
unsafe { simd_reduce_and(self) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -130,10 +130,10 @@ where
|
|||||||
T: SimdElement + BitOr<T, Output = T>,
|
T: SimdElement + BitOr<T, Output = T>,
|
||||||
LaneCount<LANES>: SupportedLaneCount,
|
LaneCount<LANES>: SupportedLaneCount,
|
||||||
{
|
{
|
||||||
/// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of
|
/// Reducing bitwise "or". Returns the cumulative bitwise "or" across the lanes of
|
||||||
/// the vector.
|
/// the vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_or(self) -> T {
|
pub fn reduce_or(self) -> T {
|
||||||
unsafe { simd_reduce_or(self) }
|
unsafe { simd_reduce_or(self) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -144,10 +144,10 @@ where
|
|||||||
T: SimdElement + BitXor<T, Output = T>,
|
T: SimdElement + BitXor<T, Output = T>,
|
||||||
LaneCount<LANES>: SupportedLaneCount,
|
LaneCount<LANES>: SupportedLaneCount,
|
||||||
{
|
{
|
||||||
/// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of
|
/// Reducing bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of
|
||||||
/// the vector.
|
/// the vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn horizontal_xor(self) -> T {
|
pub fn reduce_xor(self) -> T {
|
||||||
unsafe { simd_reduce_xor(self) }
|
unsafe { simd_reduce_xor(self) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,70 +94,70 @@ macro_rules! impl_binary_checked_op_test {
|
|||||||
macro_rules! impl_common_integer_tests {
|
macro_rules! impl_common_integer_tests {
|
||||||
{ $vector:ident, $scalar:ident } => {
|
{ $vector:ident, $scalar:ident } => {
|
||||||
test_helpers::test_lanes! {
|
test_helpers::test_lanes! {
|
||||||
fn horizontal_sum<const LANES: usize>() {
|
fn reduce_sum<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
$vector::<LANES>::from_array(x).horizontal_sum(),
|
$vector::<LANES>::from_array(x).reduce_sum(),
|
||||||
x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
|
x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_product<const LANES: usize>() {
|
fn reduce_product<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
$vector::<LANES>::from_array(x).horizontal_product(),
|
$vector::<LANES>::from_array(x).reduce_product(),
|
||||||
x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
|
x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_and<const LANES: usize>() {
|
fn reduce_and<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
$vector::<LANES>::from_array(x).horizontal_and(),
|
$vector::<LANES>::from_array(x).reduce_and(),
|
||||||
x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
|
x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_or<const LANES: usize>() {
|
fn reduce_or<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
$vector::<LANES>::from_array(x).horizontal_or(),
|
$vector::<LANES>::from_array(x).reduce_or(),
|
||||||
x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
|
x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_xor<const LANES: usize>() {
|
fn reduce_xor<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
$vector::<LANES>::from_array(x).horizontal_xor(),
|
$vector::<LANES>::from_array(x).reduce_xor(),
|
||||||
x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
|
x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_max<const LANES: usize>() {
|
fn reduce_max<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
$vector::<LANES>::from_array(x).horizontal_max(),
|
$vector::<LANES>::from_array(x).reduce_max(),
|
||||||
x.iter().copied().max().unwrap(),
|
x.iter().copied().max().unwrap(),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_min<const LANES: usize>() {
|
fn reduce_min<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
$vector::<LANES>::from_array(x).horizontal_min(),
|
$vector::<LANES>::from_array(x).reduce_min(),
|
||||||
x.iter().copied().min().unwrap(),
|
x.iter().copied().min().unwrap(),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -528,29 +528,29 @@ macro_rules! impl_float_tests {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_sum<const LANES: usize>() {
|
fn reduce_sum<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
Vector::<LANES>::from_array(x).horizontal_sum(),
|
Vector::<LANES>::from_array(x).reduce_sum(),
|
||||||
x.iter().sum(),
|
x.iter().sum(),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_product<const LANES: usize>() {
|
fn reduce_product<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
test_helpers::prop_assert_biteq! (
|
test_helpers::prop_assert_biteq! (
|
||||||
Vector::<LANES>::from_array(x).horizontal_product(),
|
Vector::<LANES>::from_array(x).reduce_product(),
|
||||||
x.iter().product(),
|
x.iter().product(),
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_max<const LANES: usize>() {
|
fn reduce_max<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
let vmax = Vector::<LANES>::from_array(x).horizontal_max();
|
let vmax = Vector::<LANES>::from_array(x).reduce_max();
|
||||||
let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
|
let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
|
||||||
// 0 and -0 are treated the same
|
// 0 and -0 are treated the same
|
||||||
if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
|
if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
|
||||||
@ -560,9 +560,9 @@ macro_rules! impl_float_tests {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn horizontal_min<const LANES: usize>() {
|
fn reduce_min<const LANES: usize>() {
|
||||||
test_helpers::test_1(&|x| {
|
test_helpers::test_1(&|x| {
|
||||||
let vmax = Vector::<LANES>::from_array(x).horizontal_min();
|
let vmax = Vector::<LANES>::from_array(x).reduce_min();
|
||||||
let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
|
let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
|
||||||
// 0 and -0 are treated the same
|
// 0 and -0 are treated the same
|
||||||
if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
|
if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user