From c196b8abaafaf360647513c64fac0c8568f9b747 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Fri, 11 Mar 2022 14:49:06 -0800 Subject: [PATCH 1/2] replace horizontal_* with reduce_* --- crates/core_simd/examples/matrix_inversion.rs | 2 +- crates/core_simd/examples/nbody.rs | 8 ++-- crates/core_simd/examples/spectral_norm.rs | 4 +- crates/core_simd/src/reduction.rs | 44 +++++++++---------- crates/core_simd/tests/ops_macros.rs | 44 +++++++++---------- 5 files changed, 51 insertions(+), 51 deletions(-) diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index c51a566deb5..39f530f68f5 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -233,7 +233,7 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option { let det = det.rotate_lanes_right::<2>() + det; let det = det.reverse().rotate_lanes_right::<2>() + det; - if det.horizontal_sum() == 0. { + if det.reduce_sum() == 0. { return None; } // calculate the reciprocal diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index 7b1e6840f64..664a0454bbd 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -107,10 +107,10 @@ mod nbody { let mut e = 0.; for i in 0..N_BODIES { let bi = &bodies[i]; - e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5; + e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5; for bj in bodies.iter().take(N_BODIES).skip(i + 1) { let dx = bi.x - bj.x; - e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt() + e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt() } } e @@ -134,8 +134,8 @@ mod nbody { let mut mag = [0.0; N]; for i in (0..N).step_by(2) { let d2s = f64x2::from_array([ - (r[i] * r[i]).horizontal_sum(), - (r[i + 1] * r[i + 1]).horizontal_sum(), + (r[i] * r[i]).reduce_sum(), + (r[i + 1] * r[i + 1]).reduce_sum(), ]); let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt()); mag[i] = dmags[0]; diff --git a/crates/core_simd/examples/spectral_norm.rs b/crates/core_simd/examples/spectral_norm.rs index c515dad4dea..012182e090b 100644 --- a/crates/core_simd/examples/spectral_norm.rs +++ b/crates/core_simd/examples/spectral_norm.rs @@ -20,7 +20,7 @@ fn mult_av(v: &[f64], out: &mut [f64]) { sum += b / a; j += 2 } - *out = sum.horizontal_sum(); + *out = sum.reduce_sum(); } } @@ -38,7 +38,7 @@ fn mult_atv(v: &[f64], out: &mut [f64]) { sum += b / a; j += 2 } - *out = sum.horizontal_sum(); + *out = sum.reduce_sum(); } } diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs index e1cd743e442..3177fd167fc 100644 --- a/crates/core_simd/src/reduction.rs +++ b/crates/core_simd/src/reduction.rs @@ -11,30 +11,30 @@ macro_rules! impl_integer_reductions { where LaneCount: SupportedLaneCount, { - /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. + /// Reducing wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. #[inline] - pub fn horizontal_sum(self) -> $scalar { + pub fn reduce_sum(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_add_ordered(self, 0) } } - /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. + /// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. #[inline] - pub fn horizontal_product(self) -> $scalar { + pub fn reduce_product(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_mul_ordered(self, 1) } } - /// Horizontal maximum. Returns the maximum lane in the vector. + /// Reducing maximum. Returns the maximum lane in the vector. #[inline] - pub fn horizontal_max(self) -> $scalar { + pub fn reduce_max(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_max(self) } } - /// Horizontal minimum. Returns the minimum lane in the vector. + /// Reducing minimum. Returns the minimum lane in the vector. #[inline] - pub fn horizontal_min(self) -> $scalar { + pub fn reduce_min(self) -> $scalar { // Safety: `self` is an integer vector unsafe { simd_reduce_min(self) } } @@ -60,9 +60,9 @@ macro_rules! impl_float_reductions { LaneCount: SupportedLaneCount, { - /// Horizontal add. Returns the sum of the lanes of the vector. + /// Reducing add. Returns the sum of the lanes of the vector. #[inline] - pub fn horizontal_sum(self) -> $scalar { + pub fn reduce_sum(self) -> $scalar { // LLVM sum is inaccurate on i586 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().sum() @@ -72,9 +72,9 @@ macro_rules! impl_float_reductions { } } - /// Horizontal multiply. Returns the product of the lanes of the vector. + /// Reducing multiply. Returns the product of the lanes of the vector. #[inline] - pub fn horizontal_product(self) -> $scalar { + pub fn reduce_product(self) -> $scalar { // LLVM product is inaccurate on i586 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().product() @@ -84,22 +84,22 @@ macro_rules! impl_float_reductions { } } - /// Horizontal maximum. Returns the maximum lane in the vector. + /// Reducing maximum. Returns the maximum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] - pub fn horizontal_max(self) -> $scalar { + pub fn reduce_max(self) -> $scalar { // Safety: `self` is a float vector unsafe { simd_reduce_max(self) } } - /// Horizontal minimum. Returns the minimum lane in the vector. + /// Reducing minimum. Returns the minimum lane in the vector. /// /// Returns values based on equality, so a vector containing both `0.` and `-0.` may /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] - pub fn horizontal_min(self) -> $scalar { + pub fn reduce_min(self) -> $scalar { // Safety: `self` is a float vector unsafe { simd_reduce_min(self) } } @@ -116,10 +116,10 @@ where T: SimdElement + BitAnd, LaneCount: SupportedLaneCount, { - /// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of + /// Reducing bitwise "and". Returns the cumulative bitwise "and" across the lanes of /// the vector. #[inline] - pub fn horizontal_and(self) -> T { + pub fn reduce_and(self) -> T { unsafe { simd_reduce_and(self) } } } @@ -130,10 +130,10 @@ where T: SimdElement + BitOr, LaneCount: SupportedLaneCount, { - /// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of + /// Reducing bitwise "or". Returns the cumulative bitwise "or" across the lanes of /// the vector. #[inline] - pub fn horizontal_or(self) -> T { + pub fn reduce_or(self) -> T { unsafe { simd_reduce_or(self) } } } @@ -144,10 +144,10 @@ where T: SimdElement + BitXor, LaneCount: SupportedLaneCount, { - /// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of + /// Reducing bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of /// the vector. #[inline] - pub fn horizontal_xor(self) -> T { + pub fn reduce_xor(self) -> T { unsafe { simd_reduce_xor(self) } } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 96da8c1b8dc..0b3f8979a37 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -94,70 +94,70 @@ macro_rules! impl_binary_checked_op_test { macro_rules! impl_common_integer_tests { { $vector:ident, $scalar:ident } => { test_helpers::test_lanes! { - fn horizontal_sum() { + fn reduce_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_sum(), + $vector::::from_array(x).reduce_sum(), x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), ); Ok(()) }); } - fn horizontal_product() { + fn reduce_product() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_product(), + $vector::::from_array(x).reduce_product(), x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), ); Ok(()) }); } - fn horizontal_and() { + fn reduce_and() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_and(), + $vector::::from_array(x).reduce_and(), x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand), ); Ok(()) }); } - fn horizontal_or() { + fn reduce_or() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_or(), + $vector::::from_array(x).reduce_or(), x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor), ); Ok(()) }); } - fn horizontal_xor() { + fn reduce_xor() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_xor(), + $vector::::from_array(x).reduce_xor(), x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor), ); Ok(()) }); } - fn horizontal_max() { + fn reduce_max() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_max(), + $vector::::from_array(x).reduce_max(), x.iter().copied().max().unwrap(), ); Ok(()) }); } - fn horizontal_min() { + fn reduce_min() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - $vector::::from_array(x).horizontal_min(), + $vector::::from_array(x).reduce_min(), x.iter().copied().min().unwrap(), ); Ok(()) @@ -516,29 +516,29 @@ macro_rules! impl_float_tests { }) } - fn horizontal_sum() { + fn reduce_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).horizontal_sum(), + Vector::::from_array(x).reduce_sum(), x.iter().sum(), ); Ok(()) }); } - fn horizontal_product() { + fn reduce_product() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( - Vector::::from_array(x).horizontal_product(), + Vector::::from_array(x).reduce_product(), x.iter().product(), ); Ok(()) }); } - fn horizontal_max() { + fn reduce_max() { test_helpers::test_1(&|x| { - let vmax = Vector::::from_array(x).horizontal_max(); + let vmax = Vector::::from_array(x).reduce_max(); let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max); // 0 and -0 are treated the same if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { @@ -548,9 +548,9 @@ macro_rules! impl_float_tests { }); } - fn horizontal_min() { + fn reduce_min() { test_helpers::test_1(&|x| { - let vmax = Vector::::from_array(x).horizontal_min(); + let vmax = Vector::::from_array(x).reduce_min(); let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min); // 0 and -0 are treated the same if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) { From b6e03f58864dde979dbe97b7d983d0ba29b16227 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Fri, 11 Mar 2022 14:54:36 -0800 Subject: [PATCH 2/2] Change beginner's guide to explain Reducing rather than Horizontal. --- beginners-guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beginners-guide.md b/beginners-guide.md index dfd357c4592..75158e5aa85 100644 --- a/beginners-guide.md +++ b/beginners-guide.md @@ -33,7 +33,7 @@ SIMD has a few special vocabulary terms you should know: * **Vertical:** When an operation is "vertical", each lane processes individually without regard to the other lanes in the same vector. For example, a "vertical add" between two vectors would add lane 0 in `a` with lane 0 in `b`, with the total in lane 0 of `out`, and then the same thing for lanes 1, 2, etc. Most SIMD operations are vertical operations, so if your problem is a vertical problem then you can probably solve it with SIMD. -* **Horizontal:** When an operation is "horizontal", the lanes within a single vector interact in some way. A "horizontal add" might add up lane 0 of `a` with lane 1 of `a`, with the total in lane 0 of `out`. +* **Reducing/Reduce:** When an operation is "reducing" (functions named `reduce_*`), the lanes within a single vector are merged using some operation such as addition, returning the merged value as a scalar. For instance, a reducing add would return the sum of all the lanes' values. * **Target Feature:** Rust calls a CPU architecture extension a `target_feature`. Proper SIMD requires various CPU extensions to be enabled (details below). Don't confuse this with `feature`, which is a Cargo crate concept. @@ -83,4 +83,4 @@ Fortunately, most SIMD types have a fairly predictable size. `i32x4` is bit-equi However, this is not the same as alignment. Computer architectures generally prefer aligned accesses, especially when moving data between memory and vector registers, and while some support specialized operations that can bend the rules to help with this, unaligned access is still typically slow, or even undefined behavior. In addition, different architectures can require different alignments when interacting with their native SIMD types. For this reason, any `#[repr(simd)]` type has a non-portable alignment. If it is necessary to directly interact with the alignment of these types, it should be via [`mem::align_of`]. [`mem::transmute`]: https://doc.rust-lang.org/core/mem/fn.transmute.html -[`mem::align_of`]: https://doc.rust-lang.org/core/mem/fn.align_of.html \ No newline at end of file +[`mem::align_of`]: https://doc.rust-lang.org/core/mem/fn.align_of.html