Document reentrancy in *Arena::alloc_from_iter

2023-12-02 03:03:16 +01:00 · 2023-12-02 03:03:16 +01:00 · c1774a137d
commit c1774a137d
parent 7058df2f4e
1 changed files with 25 additions and 16 deletions
--- a/compiler/rustc_arena/src/lib.rs
+++ b/compiler/rustc_arena/src/lib.rs
@ -197,23 +197,24 @@ fn alloc_raw_slice(&self, len: usize) -> *mut T {
        start_ptr
    }

+    /// Allocates the elements of this iterator into a contiguous slice in the `TypedArena`.
+    ///
+    /// Note: for reasons of reentrancy and panic safety we collect into a `SmallVec<[_; 8]>` before
+    /// storing the elements in the arena.
    #[inline]
    pub fn alloc_from_iter<I: IntoIterator<Item = T>>(&self, iter: I) -> &mut [T] {
-        // This implementation is entirely separate to
-        // `DroplessIterator::alloc_from_iter`, even though conceptually they
-        // are the same.
+        // Despite the similarlty with `DroplessArena`, we cannot reuse their fast case. The reason
+        // is subtle: these arenas are reentrant. In other words, `iter` may very well be holding a
+        // reference to `self` and adding elements to the arena during iteration.
        //
-        // `DroplessIterator` (in the fast case) writes elements from the
-        // iterator one at a time into the allocated memory. That's easy
-        // because the elements don't implement `Drop`. But for `TypedArena`
-        // they do implement `Drop`, which means that if the iterator panics we
-        // could end up with some allocated-but-uninitialized elements, which
-        // will then cause UB in `TypedArena::drop`.
+        // For this reason, if we pre-allocated any space for the elements of this iterator, we'd
+        // have to track that some uninitialized elements are followed by some initialized elements,
+        // else we might accidentally drop uninitialized memory if something panics or if the
+        // iterator doesn't fill all the length we expected.
        //
-        // Instead we use an approach where any iterator panic will occur
-        // before the memory is allocated. This function is much less hot than
-        // `DroplessArena::alloc_from_iter`, so it doesn't need to be
-        // hyper-optimized.
+        // So we collect all the elements beforehand, which takes care of reentrancy and panic
+        // safety. This function is much less hot than `DroplessArena::alloc_from_iter`, so it
+        // doesn't need to be hyper-optimized.
        assert!(mem::size_of::<T>() != 0);

        let mut vec: SmallVec<[_; 8]> = iter.into_iter().collect();
@ -485,8 +486,9 @@ pub fn alloc_slice<T>(&self, slice: &[T]) -> &mut [T]

    /// # Safety
    ///
-    /// The caller must ensure that `mem` is valid for writes up to
-    /// `size_of::<T>() * len`.
+    /// The caller must ensure that `mem` is valid for writes up to `size_of::<T>() * len`, and that
+    /// that memory stays allocated and not shared for the lifetime of `self`. This must hold even
+    /// if `iter.next()` allocates onto `self`.
    #[inline]
    unsafe fn write_from_iter<T, I: Iterator<Item = T>>(
        &self,
@ -516,6 +518,8 @@ unsafe fn write_from_iter<T, I: Iterator<Item = T>>(

    #[inline]
    pub fn alloc_from_iter<T, I: IntoIterator<Item = T>>(&self, iter: I) -> &mut [T] {
+        // Warning: this function is reentrant: `iter` could hold a reference to `&self` and
+        // allocate additional elements while we're iterating.
        let iter = iter.into_iter();
        assert!(mem::size_of::<T>() != 0);
        assert!(!mem::needs_drop::<T>());
@ -524,7 +528,7 @@ pub fn alloc_from_iter<T, I: IntoIterator<Item = T>>(&self, iter: I) -> &mut [T]

        match size_hint {
            (min, Some(max)) if min == max => {
-                // We know the exact number of elements the iterator will produce here
+                // We know the exact number of elements the iterator expects to produce here.
                let len = min;

                if len == 0 {
@ -532,10 +536,15 @@ pub fn alloc_from_iter<T, I: IntoIterator<Item = T>>(&self, iter: I) -> &mut [T]
                }

                let mem = self.alloc_raw(Layout::array::<T>(len).unwrap()) as *mut T;
+                // SAFETY: `write_from_iter` doesn't touch `self`. It only touches the slice we just
+                // reserved. If the iterator panics or doesn't output `len` elements, this will
+                // leave some unallocated slots in the arena, which is fine because we do not call
+                // `drop`.
                unsafe { self.write_from_iter(iter, len, mem) }
            }
            (_, _) => {
                outline(move || -> &mut [T] {
+                    // Takes care of reentrancy.
                    let mut vec: SmallVec<[_; 8]> = iter.collect();
                    if vec.is_empty() {
                        return &mut [];