Stop using into_iter in array::map

2023-02-02 20:58:22 -08:00 · 2023-02-02 20:58:22 -08:00 · 5a7342c3dd
commit 5a7342c3dd
parent 50d3ba5bcb
4 changed files with 147 additions and 11 deletions
--- a/library/core/src/array/drain.rs
+++ b/library/core/src/array/drain.rs
@ -0,0 +1,51 @@
+use crate::iter::TrustedLen;
+use crate::mem::ManuallyDrop;
+use crate::ptr::drop_in_place;
+use crate::slice;
+
+// INVARIANT: It's ok to drop the remainder of the inner iterator.
+pub(crate) struct Drain<'a, T>(slice::IterMut<'a, T>);
+
+pub(crate) fn drain_array_with<T, R, const N: usize>(
+    array: [T; N],
+    func: impl for<'a> FnOnce(Drain<'a, T>) -> R,
+) -> R {
+    let mut array = ManuallyDrop::new(array);
+    // SAFETY: Now that the local won't drop it, it's ok to construct the `Drain` which will.
+    let drain = Drain(array.iter_mut());
+    func(drain)
+}
+
+impl<T> Drop for Drain<'_, T> {
+    fn drop(&mut self) {
+        // SAFETY: By the type invariant, we're allowed to drop all these.
+        unsafe { drop_in_place(self.0.as_mut_slice()) }
+    }
+}
+
+impl<T> Iterator for Drain<'_, T> {
+    type Item = T;
+
+    #[inline]
+    fn next(&mut self) -> Option<T> {
+        let p: *const T = self.0.next()?;
+        // SAFETY: The iterator was already advanced, so we won't drop this later.
+        Some(unsafe { p.read() })
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let n = self.len();
+        (n, Some(n))
+    }
+}
+
+impl<T> ExactSizeIterator for Drain<'_, T> {
+    #[inline]
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+}
+
+// SAFETY: This is a 1:1 wrapper for a slice iterator, which is also `TrustedLen`.
+unsafe impl<T> TrustedLen for Drain<'_, T> {}
--- a/library/core/src/array/mod.rs
+++ b/library/core/src/array/mod.rs
@ -17,9 +17,12 @@ use crate::ops::{
 };
 use crate::slice::{Iter, IterMut};

+mod drain;
 mod equality;
 mod iter;

+pub(crate) use drain::drain_array_with;
+
 #[stable(feature = "array_value_iter", since = "1.51.0")]
 pub use iter::IntoIter;

@ -513,9 +516,12 @@ impl<T, const N: usize> [T; N] {
    where
        F: FnMut(T) -> U,
    {
-        // SAFETY: we know for certain that this iterator will yield exactly `N`
-        // items.
-        unsafe { collect_into_array_unchecked(&mut IntoIterator::into_iter(self).map(f)) }
+        drain_array_with(self, |iter| {
+            let mut iter = iter.map(f);
+            // SAFETY: we know for certain that this iterator will yield exactly `N`
+            // items.
+            unsafe { collect_into_array_unchecked(&mut iter) }
+        })
    }

    /// A fallible function `f` applied to each element on array `self` in order to
@ -552,9 +558,12 @@ impl<T, const N: usize> [T; N] {
        R: Try,
        R::Residual: Residual<[R::Output; N]>,
    {
-        // SAFETY: we know for certain that this iterator will yield exactly `N`
-        // items.
-        unsafe { try_collect_into_array_unchecked(&mut IntoIterator::into_iter(self).map(f)) }
+        drain_array_with(self, |iter| {
+            let mut iter = iter.map(f);
+            // SAFETY: we know for certain that this iterator will yield exactly `N`
+            // items.
+            unsafe { try_collect_into_array_unchecked(&mut iter) }
+        })
    }

    /// 'Zips up' two arrays into a single array of pairs.
@ -575,11 +584,14 @@ impl<T, const N: usize> [T; N] {
    /// ```
    #[unstable(feature = "array_zip", issue = "80094")]
    pub fn zip<U>(self, rhs: [U; N]) -> [(T, U); N] {
-        let mut iter = IntoIterator::into_iter(self).zip(rhs);
-
-        // SAFETY: we know for certain that this iterator will yield exactly `N`
-        // items.
-        unsafe { collect_into_array_unchecked(&mut iter) }
+        drain_array_with(self, |lhs| {
+            drain_array_with(rhs, |rhs| {
+                let mut iter = crate::iter::zip(lhs, rhs);
+                // SAFETY: we know for certain that this iterator will yield exactly `N`
+                // items.
+                unsafe { collect_into_array_unchecked(&mut iter) }
+            })
+        })
    }

    /// Returns a slice containing the entire array. Equivalent to `&s[..]`.
--- a/library/core/tests/array.rs
+++ b/library/core/tests/array.rs
@ -700,3 +700,28 @@ fn array_into_iter_rfold() {
    let s = it.rfold(10, |a, b| 10 * a + b);
    assert_eq!(s, 10432);
 }
+
+#[cfg(not(panic = "abort"))]
+#[test]
+fn array_map_drops_unmapped_elements_on_panic() {
+    struct DropCounter<'a>(usize, &'a AtomicUsize);
+    impl Drop for DropCounter<'_> {
+        fn drop(&mut self) {
+            self.1.fetch_add(1, Ordering::SeqCst);
+        }
+    }
+
+    const MAX: usize = 11;
+    for panic_after in 0..MAX {
+        let counter = AtomicUsize::new(0);
+        let a = array::from_fn::<_, 11, _>(|i| DropCounter(i, &counter));
+        let success = std::panic::catch_unwind(|| {
+            let _ = a.map(|x| {
+                assert!(x.0 < panic_after);
+                assert_eq!(counter.load(Ordering::SeqCst), x.0);
+            });
+        });
+        assert!(success.is_err());
+        assert_eq!(counter.load(Ordering::SeqCst), MAX);
+    }
+}
--- a/tests/codegen/array-map.rs
+++ b/tests/codegen/array-map.rs
@ -0,0 +1,48 @@
+// compile-flags: -C opt-level=3 -C target-cpu=x86-64-v3 -C llvm-args=-x86-asm-syntax=intel --emit=llvm-ir,asm
+// no-system-llvm
+// only-x86_64
+// ignore-debug (the extra assertions get in the way)
+
+#![crate_type = "lib"]
+#![feature(array_zip)]
+
+// CHECK-LABEL: @short_integer_map
+#[no_mangle]
+pub fn short_integer_map(x: [u32; 8]) -> [u32; 8] {
+    // CHECK: load <8 x i32>
+    // CHECK: shl <8 x i32>
+    // CHECK: or <8 x i32>
+    // CHECK: store <8 x i32>
+    x.map(|x| 2 * x + 1)
+}
+
+// CHECK-LABEL: @short_integer_zip_map
+#[no_mangle]
+pub fn short_integer_zip_map(x: [u32; 8], y: [u32; 8]) -> [u32; 8] {
+    // CHECK: %[[A:.+]] = load <8 x i32>
+    // CHECK: %[[B:.+]] = load <8 x i32>
+    // CHECK: sub <8 x i32> %[[A]], %[[B]]
+    // CHECK: store <8 x i32>
+    x.zip(y).map(|(x, y)| x - y)
+}
+
+// This test is checking that LLVM can SRoA away a bunch of the overhead,
+// like fully moving the iterators to registers.  Notably, previous implementations
+// of `map` ended up `alloca`ing the whole `array::IntoIterator`, meaning both a
+// hard-to-eliminate `memcpy` and that the iteration counts needed to be written
+// out to stack every iteration, even for infallible operations on `Copy` types.
+//
+// This is still imperfect, as there's more copies than would be ideal,
+// but hopefully work like #103830 will improve that in future,
+// and update this test to be stricter.
+//
+// CHECK-LABEL: @long_integer_map
+#[no_mangle]
+pub fn long_integer_map(x: [u32; 64]) -> [u32; 64] {
+    // CHECK: start:
+    // CHECK-NEXT: alloca [{{64|65}} x i32]
+    // CHECK-NEXT: alloca [{{64|65}} x i32]
+    // CHECK-NEXT: alloca %"core::mem::manually_drop::ManuallyDrop<[u32; 64]>"
+    // CHECK-NOT: alloca
+    x.map(|x| 2 * x + 1)
+}