From 6dbd9a29c21db63e2c72f5e7f4f8b5ba58023875 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sat, 4 Jun 2022 16:23:15 -0700 Subject: [PATCH] Optimize `array::IntoIter` `.into_iter()` on arrays was slower than it needed to be (especially compared to slice iterator) since it uses `Range`, which needs to handle degenerate ranges like `10..4`. This PR adds an internal `IndexRange` type that's like `Range` but with a safety invariant that means it doesn't need to worry about those cases -- it only handles `start <= end` -- and thus can give LLVM more information to optimize better. I added one simple demonstration of the improvement as a codegen test. --- library/core/src/array/iter.rs | 54 +++---- library/core/src/lib.rs | 1 + library/core/src/ops/index_range.rs | 166 +++++++++++++++++++++ library/core/src/ops/mod.rs | 3 + library/core/src/slice/index.rs | 75 ++++++++++ src/test/codegen/slice-iter-len-eq-zero.rs | 14 ++ 6 files changed, 282 insertions(+), 31 deletions(-) create mode 100644 library/core/src/ops/index_range.rs diff --git a/library/core/src/array/iter.rs b/library/core/src/array/iter.rs index f4885ed9ffb..b3b26040067 100644 --- a/library/core/src/array/iter.rs +++ b/library/core/src/array/iter.rs @@ -1,10 +1,10 @@ //! Defines the `IntoIter` owned iterator for arrays. use crate::{ - cmp, fmt, + fmt, iter::{self, ExactSizeIterator, FusedIterator, TrustedLen}, mem::{self, MaybeUninit}, - ops::Range, + ops::{IndexRange, Range}, ptr, }; @@ -29,9 +29,10 @@ pub struct IntoIter { /// The elements in `data` that have not been yielded yet. /// /// Invariants: - /// - `alive.start <= alive.end` /// - `alive.end <= N` - alive: Range, + /// + /// (And the `IndexRange` type requires `alive.start <= alive.end`.) + alive: IndexRange, } // Note: the `#[rustc_skip_array_during_method_dispatch]` on `trait IntoIterator` @@ -69,7 +70,7 @@ impl IntoIterator for [T; N] { // Until then, we can use `mem::transmute_copy` to create a bitwise copy // as a different type, then forget `array` so that it is not dropped. unsafe { - let iter = IntoIter { data: mem::transmute_copy(&self), alive: 0..N }; + let iter = IntoIter { data: mem::transmute_copy(&self), alive: IndexRange::zero_to(N) }; mem::forget(self); iter } @@ -147,7 +148,9 @@ impl IntoIter { buffer: [MaybeUninit; N], initialized: Range, ) -> Self { - Self { data: buffer, alive: initialized } + // SAFETY: one of our safety conditions is that the range is canonical. + let alive = unsafe { IndexRange::new_unchecked(initialized.start, initialized.end) }; + Self { data: buffer, alive } } /// Creates an iterator over `T` which returns no elements. @@ -283,16 +286,11 @@ impl Iterator for IntoIter { } fn advance_by(&mut self, n: usize) -> Result<(), usize> { - let len = self.len(); + let original_len = self.len(); - // The number of elements to drop. Always in-bounds by construction. - let delta = cmp::min(n, len); - - let range_to_drop = self.alive.start..(self.alive.start + delta); - - // Moving the start marks them as conceptually "dropped", so if anything - // goes bad then our drop impl won't double-free them. - self.alive.start += delta; + // This also moves the start, which marks them as conceptually "dropped", + // so if anything goes bad then our drop impl won't double-free them. + let range_to_drop = self.alive.take_prefix(n); // SAFETY: These elements are currently initialized, so it's fine to drop them. unsafe { @@ -300,7 +298,7 @@ impl Iterator for IntoIter { ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice)); } - if n > len { Err(len) } else { Ok(()) } + if n > original_len { Err(original_len) } else { Ok(()) } } } @@ -338,16 +336,11 @@ impl DoubleEndedIterator for IntoIter { } fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { - let len = self.len(); + let original_len = self.len(); - // The number of elements to drop. Always in-bounds by construction. - let delta = cmp::min(n, len); - - let range_to_drop = (self.alive.end - delta)..self.alive.end; - - // Moving the end marks them as conceptually "dropped", so if anything - // goes bad then our drop impl won't double-free them. - self.alive.end -= delta; + // This also moves the end, which marks them as conceptually "dropped", + // so if anything goes bad then our drop impl won't double-free them. + let range_to_drop = self.alive.take_suffix(n); // SAFETY: These elements are currently initialized, so it's fine to drop them. unsafe { @@ -355,7 +348,7 @@ impl DoubleEndedIterator for IntoIter { ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice)); } - if n > len { Err(len) } else { Ok(()) } + if n > original_len { Err(original_len) } else { Ok(()) } } } @@ -372,9 +365,7 @@ impl Drop for IntoIter { #[stable(feature = "array_value_iter_impls", since = "1.40.0")] impl ExactSizeIterator for IntoIter { fn len(&self) -> usize { - // Will never underflow due to the invariant `alive.start <= - // alive.end`. - self.alive.end - self.alive.start + self.alive.len() } fn is_empty(&self) -> bool { self.alive.is_empty() @@ -396,14 +387,15 @@ impl Clone for IntoIter { fn clone(&self) -> Self { // Note, we don't really need to match the exact same alive range, so // we can just clone into offset 0 regardless of where `self` is. - let mut new = Self { data: MaybeUninit::uninit_array(), alive: 0..0 }; + let mut new = Self { data: MaybeUninit::uninit_array(), alive: IndexRange::zero_to(0) }; // Clone all alive elements. for (src, dst) in iter::zip(self.as_slice(), &mut new.data) { // Write a clone into the new array, then update its alive range. // If cloning panics, we'll correctly drop the previous items. dst.write(src.clone()); - new.alive.end += 1; + // This addition cannot overflow as we're iterating a slice + new.alive = IndexRange::zero_to(new.alive.end() + 1); } new diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 5621d15c1cd..9955ec2d528 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -114,6 +114,7 @@ #![feature(const_fmt_arguments_new)] #![feature(const_heap)] #![feature(const_convert)] +#![feature(const_index_range_slice_index)] #![feature(const_inherent_unchecked_arith)] #![feature(const_int_unchecked_arith)] #![feature(const_intrinsic_forget)] diff --git a/library/core/src/ops/index_range.rs b/library/core/src/ops/index_range.rs new file mode 100644 index 00000000000..41ffe11f610 --- /dev/null +++ b/library/core/src/ops/index_range.rs @@ -0,0 +1,166 @@ +use crate::intrinsics::{assert_unsafe_precondition, unchecked_add, unchecked_sub}; +use crate::iter::{FusedIterator, TrustedLen}; + +/// Like a `Range`, but with a safety invariant that `start <= end`. +/// +/// This means that `end - start` cannot overflow, allowing some μoptimizations. +/// +/// (Normal `Range` code needs to handle degenerate ranges like `10..0`, +/// which takes extra checks compared to only handling the canonical form.) +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct IndexRange { + start: usize, + end: usize, +} + +impl IndexRange { + /// # Safety + /// - `start <= end` + #[inline] + pub const unsafe fn new_unchecked(start: usize, end: usize) -> Self { + // SAFETY: comparisons on usize are pure + unsafe { assert_unsafe_precondition!((start: usize, end: usize) => start <= end) }; + IndexRange { start, end } + } + + #[inline] + pub const fn zero_to(end: usize) -> Self { + IndexRange { start: 0, end } + } + + #[inline] + pub const fn start(&self) -> usize { + self.start + } + + #[inline] + pub const fn end(&self) -> usize { + self.end + } + + #[inline] + pub const fn len(&self) -> usize { + // SAFETY: By invariant, this cannot wrap + unsafe { unchecked_sub(self.end, self.start) } + } + + /// # Safety + /// - Can only be called when `start < end`, aka when `len > 0`. + #[inline] + unsafe fn next_unchecked(&mut self) -> usize { + debug_assert!(self.start < self.end); + + let value = self.start; + // SAFETY: The range isn't empty, so this cannot overflow + self.start = unsafe { unchecked_add(value, 1) }; + value + } + + /// # Safety + /// - Can only be called when `start < end`, aka when `len > 0`. + #[inline] + unsafe fn next_back_unchecked(&mut self) -> usize { + debug_assert!(self.start < self.end); + + // SAFETY: The range isn't empty, so this cannot overflow + let value = unsafe { unchecked_sub(self.end, 1) }; + self.end = value; + value + } + + /// Removes the first `n` items from this range, returning them as an `IndexRange`. + /// If there are fewer than `n`, then the whole range is returned and + /// `self` is left empty. + /// + /// This is designed to help implement `Iterator::advance_by`. + #[inline] + pub fn take_prefix(&mut self, n: usize) -> Self { + let mid = if n <= self.len() { + // SAFETY: We just checked that this will be between start and end, + // and thus the addition cannot overflow. + unsafe { unchecked_add(self.start, n) } + } else { + self.end + }; + let prefix = Self { start: self.start, end: mid }; + self.start = mid; + prefix + } + + /// Removes the last `n` items from this range, returning them as an `IndexRange`. + /// If there are fewer than `n`, then the whole range is returned and + /// `self` is left empty. + /// + /// This is designed to help implement `Iterator::advance_back_by`. + #[inline] + pub fn take_suffix(&mut self, n: usize) -> Self { + let mid = if n <= self.len() { + // SAFETY: We just checked that this will be between start and end, + // and thus the addition cannot overflow. + unsafe { unchecked_sub(self.end, n) } + } else { + self.start + }; + let suffix = Self { start: mid, end: self.end }; + self.end = mid; + suffix + } +} + +impl Iterator for IndexRange { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + if self.len() > 0 { + // SAFETY: We just checked that the range is non-empty + unsafe { Some(self.next_unchecked()) } + } else { + None + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } + + #[inline] + fn advance_by(&mut self, n: usize) -> Result<(), usize> { + let original_len = self.len(); + self.take_prefix(n); + if n > original_len { Err(original_len) } else { Ok(()) } + } +} + +impl DoubleEndedIterator for IndexRange { + #[inline] + fn next_back(&mut self) -> Option { + if self.len() > 0 { + // SAFETY: We just checked that the range is non-empty + unsafe { Some(self.next_back_unchecked()) } + } else { + None + } + } + + #[inline] + fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { + let original_len = self.len(); + self.take_suffix(n); + if n > original_len { Err(original_len) } else { Ok(()) } + } +} + +impl ExactSizeIterator for IndexRange { + #[inline] + fn len(&self) -> usize { + self.len() + } +} + +// SAFETY: Because we only deal in `usize`, our `len` is always perfect. +unsafe impl TrustedLen for IndexRange {} + +impl FusedIterator for IndexRange {} diff --git a/library/core/src/ops/mod.rs b/library/core/src/ops/mod.rs index 31c1a1d099d..a5e5b13b336 100644 --- a/library/core/src/ops/mod.rs +++ b/library/core/src/ops/mod.rs @@ -146,6 +146,7 @@ mod drop; mod function; mod generator; mod index; +mod index_range; mod range; mod try_trait; mod unsize; @@ -178,6 +179,8 @@ pub use self::index::{Index, IndexMut}; #[stable(feature = "rust1", since = "1.0.0")] pub use self::range::{Range, RangeFrom, RangeFull, RangeTo}; +pub(crate) use self::index_range::IndexRange; + #[stable(feature = "inclusive_range", since = "1.26.0")] pub use self::range::{Bound, RangeBounds, RangeInclusive, RangeToInclusive}; diff --git a/library/core/src/slice/index.rs b/library/core/src/slice/index.rs index 3403a5a86f7..3ec6bd93e15 100644 --- a/library/core/src/slice/index.rs +++ b/library/core/src/slice/index.rs @@ -139,6 +139,8 @@ mod private_slice_index { impl Sealed for ops::RangeToInclusive {} #[stable(feature = "slice_index_with_ops_bound_pair", since = "1.53.0")] impl Sealed for (ops::Bound, ops::Bound) {} + + impl Sealed for ops::IndexRange {} } /// A helper trait used for indexing operations. @@ -257,6 +259,79 @@ unsafe impl const SliceIndex<[T]> for usize { } } +/// Because `IndexRange` guarantees `start <= end`, fewer checks are needed here +/// than there are for a general `Range` (which might be `100..3`). +#[rustc_const_unstable(feature = "const_index_range_slice_index", issue = "none")] +unsafe impl const SliceIndex<[T]> for ops::IndexRange { + type Output = [T]; + + #[inline] + fn get(self, slice: &[T]) -> Option<&[T]> { + if self.end() <= slice.len() { + // SAFETY: `self` is checked to be valid and in bounds above. + unsafe { Some(&*self.get_unchecked(slice)) } + } else { + None + } + } + + #[inline] + fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> { + if self.end() <= slice.len() { + // SAFETY: `self` is checked to be valid and in bounds above. + unsafe { Some(&mut *self.get_unchecked_mut(slice)) } + } else { + None + } + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const [T]) -> *const [T] { + let end = self.end(); + // SAFETY: the caller guarantees that `slice` is not dangling, so it + // cannot be longer than `isize::MAX`. They also guarantee that + // `self` is in bounds of `slice` so `self` cannot overflow an `isize`, + // so the call to `add` is safe. + + unsafe { + assert_unsafe_precondition!([T](end: usize, slice: *const [T]) => + end <= slice.len()); + ptr::slice_from_raw_parts(slice.as_ptr().add(self.start()), self.len()) + } + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut [T] { + let end = self.end(); + // SAFETY: see comments for `get_unchecked` above. + unsafe { + assert_unsafe_precondition!([T](end: usize, slice: *mut [T]) => + end <= slice.len()); + ptr::slice_from_raw_parts_mut(slice.as_mut_ptr().add(self.start()), self.len()) + } + } + + #[inline] + fn index(self, slice: &[T]) -> &[T] { + if self.end() <= slice.len() { + // SAFETY: `self` is checked to be valid and in bounds above. + unsafe { &*self.get_unchecked(slice) } + } else { + slice_end_index_len_fail(self.end(), slice.len()) + } + } + + #[inline] + fn index_mut(self, slice: &mut [T]) -> &mut [T] { + if self.end() <= slice.len() { + // SAFETY: `self` is checked to be valid and in bounds above. + unsafe { &mut *self.get_unchecked_mut(slice) } + } else { + slice_end_index_len_fail(self.end(), slice.len()) + } + } +} + #[stable(feature = "slice_get_slice_impls", since = "1.15.0")] #[rustc_const_unstable(feature = "const_slice_index", issue = "none")] unsafe impl const SliceIndex<[T]> for ops::Range { diff --git a/src/test/codegen/slice-iter-len-eq-zero.rs b/src/test/codegen/slice-iter-len-eq-zero.rs index fd19e624cdd..1124028253d 100644 --- a/src/test/codegen/slice-iter-len-eq-zero.rs +++ b/src/test/codegen/slice-iter-len-eq-zero.rs @@ -1,5 +1,6 @@ // no-system-llvm // compile-flags: -O +// ignore-debug: the debug assertions add extra comparisons #![crate_type = "lib"] type Demo = [u8; 3]; @@ -12,3 +13,16 @@ pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool { // CHECK: ret i1 %2 y.len() == 0 } + +// CHECK-LABEL: @array_into_iter_len_eq_zero +#[no_mangle] +pub fn array_into_iter_len_eq_zero(y: std::array::IntoIter) -> bool { + // This should be able to just check that the indexes are equal, and not + // need any subtractions or comparisons to handle `start > end`. + + // CHECK-NOT: icmp + // CHECK-NOT: sub + // CHECK: %1 = icmp eq {{i16|i32|i64}} + // CHECK: ret i1 %1 + y.len() == 0 +}