Auto merge of #100214 - scottmcm:strict-range, r=thomcc

Optimize `array::IntoIter`

`.into_iter()` on arrays was slower than it needed to be (especially compared to slice iterator) since it uses `Range<usize>`, which needs to handle degenerate ranges like `10..4`.

This PR adds an internal `IndexRange` type that's like `Range<usize>` but with a safety invariant that means it doesn't need to worry about those cases -- it only handles `start <= end` -- and thus can give LLVM more information to optimize better.

I added one simple demonstration of the improvement as a codegen test.

(`vec::IntoIter` uses pointers instead of indexes, so doesn't have this problem, but that only works because its elements are boxed.  `array::IntoIter` can't use pointers because that would keep it from being movable.)
This commit is contained in:
bors 2022-09-21 00:41:33 +00:00
commit 4ecfdfac51
6 changed files with 282 additions and 31 deletions

View File

@ -1,10 +1,10 @@
//! Defines the `IntoIter` owned iterator for arrays. //! Defines the `IntoIter` owned iterator for arrays.
use crate::{ use crate::{
cmp, fmt, fmt,
iter::{self, ExactSizeIterator, FusedIterator, TrustedLen}, iter::{self, ExactSizeIterator, FusedIterator, TrustedLen},
mem::{self, MaybeUninit}, mem::{self, MaybeUninit},
ops::Range, ops::{IndexRange, Range},
ptr, ptr,
}; };
@ -29,9 +29,10 @@ pub struct IntoIter<T, const N: usize> {
/// The elements in `data` that have not been yielded yet. /// The elements in `data` that have not been yielded yet.
/// ///
/// Invariants: /// Invariants:
/// - `alive.start <= alive.end`
/// - `alive.end <= N` /// - `alive.end <= N`
alive: Range<usize>, ///
/// (And the `IndexRange` type requires `alive.start <= alive.end`.)
alive: IndexRange,
} }
// Note: the `#[rustc_skip_array_during_method_dispatch]` on `trait IntoIterator` // Note: the `#[rustc_skip_array_during_method_dispatch]` on `trait IntoIterator`
@ -69,7 +70,7 @@ impl<T, const N: usize> IntoIterator for [T; N] {
// Until then, we can use `mem::transmute_copy` to create a bitwise copy // Until then, we can use `mem::transmute_copy` to create a bitwise copy
// as a different type, then forget `array` so that it is not dropped. // as a different type, then forget `array` so that it is not dropped.
unsafe { unsafe {
let iter = IntoIter { data: mem::transmute_copy(&self), alive: 0..N }; let iter = IntoIter { data: mem::transmute_copy(&self), alive: IndexRange::zero_to(N) };
mem::forget(self); mem::forget(self);
iter iter
} }
@ -147,7 +148,9 @@ impl<T, const N: usize> IntoIter<T, N> {
buffer: [MaybeUninit<T>; N], buffer: [MaybeUninit<T>; N],
initialized: Range<usize>, initialized: Range<usize>,
) -> Self { ) -> Self {
Self { data: buffer, alive: initialized } // SAFETY: one of our safety conditions is that the range is canonical.
let alive = unsafe { IndexRange::new_unchecked(initialized.start, initialized.end) };
Self { data: buffer, alive }
} }
/// Creates an iterator over `T` which returns no elements. /// Creates an iterator over `T` which returns no elements.
@ -283,16 +286,11 @@ impl<T, const N: usize> Iterator for IntoIter<T, N> {
} }
fn advance_by(&mut self, n: usize) -> Result<(), usize> { fn advance_by(&mut self, n: usize) -> Result<(), usize> {
let len = self.len(); let original_len = self.len();
// The number of elements to drop. Always in-bounds by construction. // This also moves the start, which marks them as conceptually "dropped",
let delta = cmp::min(n, len); // so if anything goes bad then our drop impl won't double-free them.
let range_to_drop = self.alive.take_prefix(n);
let range_to_drop = self.alive.start..(self.alive.start + delta);
// Moving the start marks them as conceptually "dropped", so if anything
// goes bad then our drop impl won't double-free them.
self.alive.start += delta;
// SAFETY: These elements are currently initialized, so it's fine to drop them. // SAFETY: These elements are currently initialized, so it's fine to drop them.
unsafe { unsafe {
@ -300,7 +298,7 @@ impl<T, const N: usize> Iterator for IntoIter<T, N> {
ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice)); ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice));
} }
if n > len { Err(len) } else { Ok(()) } if n > original_len { Err(original_len) } else { Ok(()) }
} }
} }
@ -338,16 +336,11 @@ impl<T, const N: usize> DoubleEndedIterator for IntoIter<T, N> {
} }
fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { fn advance_back_by(&mut self, n: usize) -> Result<(), usize> {
let len = self.len(); let original_len = self.len();
// The number of elements to drop. Always in-bounds by construction. // This also moves the end, which marks them as conceptually "dropped",
let delta = cmp::min(n, len); // so if anything goes bad then our drop impl won't double-free them.
let range_to_drop = self.alive.take_suffix(n);
let range_to_drop = (self.alive.end - delta)..self.alive.end;
// Moving the end marks them as conceptually "dropped", so if anything
// goes bad then our drop impl won't double-free them.
self.alive.end -= delta;
// SAFETY: These elements are currently initialized, so it's fine to drop them. // SAFETY: These elements are currently initialized, so it's fine to drop them.
unsafe { unsafe {
@ -355,7 +348,7 @@ impl<T, const N: usize> DoubleEndedIterator for IntoIter<T, N> {
ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice)); ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice));
} }
if n > len { Err(len) } else { Ok(()) } if n > original_len { Err(original_len) } else { Ok(()) }
} }
} }
@ -372,9 +365,7 @@ impl<T, const N: usize> Drop for IntoIter<T, N> {
#[stable(feature = "array_value_iter_impls", since = "1.40.0")] #[stable(feature = "array_value_iter_impls", since = "1.40.0")]
impl<T, const N: usize> ExactSizeIterator for IntoIter<T, N> { impl<T, const N: usize> ExactSizeIterator for IntoIter<T, N> {
fn len(&self) -> usize { fn len(&self) -> usize {
// Will never underflow due to the invariant `alive.start <= self.alive.len()
// alive.end`.
self.alive.end - self.alive.start
} }
fn is_empty(&self) -> bool { fn is_empty(&self) -> bool {
self.alive.is_empty() self.alive.is_empty()
@ -396,14 +387,15 @@ impl<T: Clone, const N: usize> Clone for IntoIter<T, N> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
// Note, we don't really need to match the exact same alive range, so // Note, we don't really need to match the exact same alive range, so
// we can just clone into offset 0 regardless of where `self` is. // we can just clone into offset 0 regardless of where `self` is.
let mut new = Self { data: MaybeUninit::uninit_array(), alive: 0..0 }; let mut new = Self { data: MaybeUninit::uninit_array(), alive: IndexRange::zero_to(0) };
// Clone all alive elements. // Clone all alive elements.
for (src, dst) in iter::zip(self.as_slice(), &mut new.data) { for (src, dst) in iter::zip(self.as_slice(), &mut new.data) {
// Write a clone into the new array, then update its alive range. // Write a clone into the new array, then update its alive range.
// If cloning panics, we'll correctly drop the previous items. // If cloning panics, we'll correctly drop the previous items.
dst.write(src.clone()); dst.write(src.clone());
new.alive.end += 1; // This addition cannot overflow as we're iterating a slice
new.alive = IndexRange::zero_to(new.alive.end() + 1);
} }
new new

View File

@ -114,6 +114,7 @@
#![feature(const_fmt_arguments_new)] #![feature(const_fmt_arguments_new)]
#![feature(const_heap)] #![feature(const_heap)]
#![feature(const_convert)] #![feature(const_convert)]
#![feature(const_index_range_slice_index)]
#![feature(const_inherent_unchecked_arith)] #![feature(const_inherent_unchecked_arith)]
#![feature(const_int_unchecked_arith)] #![feature(const_int_unchecked_arith)]
#![feature(const_intrinsic_forget)] #![feature(const_intrinsic_forget)]

View File

@ -0,0 +1,166 @@
use crate::intrinsics::{assert_unsafe_precondition, unchecked_add, unchecked_sub};
use crate::iter::{FusedIterator, TrustedLen};
/// Like a `Range<usize>`, but with a safety invariant that `start <= end`.
///
/// This means that `end - start` cannot overflow, allowing some μoptimizations.
///
/// (Normal `Range` code needs to handle degenerate ranges like `10..0`,
/// which takes extra checks compared to only handling the canonical form.)
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct IndexRange {
start: usize,
end: usize,
}
impl IndexRange {
/// # Safety
/// - `start <= end`
#[inline]
pub const unsafe fn new_unchecked(start: usize, end: usize) -> Self {
// SAFETY: comparisons on usize are pure
unsafe { assert_unsafe_precondition!((start: usize, end: usize) => start <= end) };
IndexRange { start, end }
}
#[inline]
pub const fn zero_to(end: usize) -> Self {
IndexRange { start: 0, end }
}
#[inline]
pub const fn start(&self) -> usize {
self.start
}
#[inline]
pub const fn end(&self) -> usize {
self.end
}
#[inline]
pub const fn len(&self) -> usize {
// SAFETY: By invariant, this cannot wrap
unsafe { unchecked_sub(self.end, self.start) }
}
/// # Safety
/// - Can only be called when `start < end`, aka when `len > 0`.
#[inline]
unsafe fn next_unchecked(&mut self) -> usize {
debug_assert!(self.start < self.end);
let value = self.start;
// SAFETY: The range isn't empty, so this cannot overflow
self.start = unsafe { unchecked_add(value, 1) };
value
}
/// # Safety
/// - Can only be called when `start < end`, aka when `len > 0`.
#[inline]
unsafe fn next_back_unchecked(&mut self) -> usize {
debug_assert!(self.start < self.end);
// SAFETY: The range isn't empty, so this cannot overflow
let value = unsafe { unchecked_sub(self.end, 1) };
self.end = value;
value
}
/// Removes the first `n` items from this range, returning them as an `IndexRange`.
/// If there are fewer than `n`, then the whole range is returned and
/// `self` is left empty.
///
/// This is designed to help implement `Iterator::advance_by`.
#[inline]
pub fn take_prefix(&mut self, n: usize) -> Self {
let mid = if n <= self.len() {
// SAFETY: We just checked that this will be between start and end,
// and thus the addition cannot overflow.
unsafe { unchecked_add(self.start, n) }
} else {
self.end
};
let prefix = Self { start: self.start, end: mid };
self.start = mid;
prefix
}
/// Removes the last `n` items from this range, returning them as an `IndexRange`.
/// If there are fewer than `n`, then the whole range is returned and
/// `self` is left empty.
///
/// This is designed to help implement `Iterator::advance_back_by`.
#[inline]
pub fn take_suffix(&mut self, n: usize) -> Self {
let mid = if n <= self.len() {
// SAFETY: We just checked that this will be between start and end,
// and thus the addition cannot overflow.
unsafe { unchecked_sub(self.end, n) }
} else {
self.start
};
let suffix = Self { start: mid, end: self.end };
self.end = mid;
suffix
}
}
impl Iterator for IndexRange {
type Item = usize;
#[inline]
fn next(&mut self) -> Option<usize> {
if self.len() > 0 {
// SAFETY: We just checked that the range is non-empty
unsafe { Some(self.next_unchecked()) }
} else {
None
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.len();
(len, Some(len))
}
#[inline]
fn advance_by(&mut self, n: usize) -> Result<(), usize> {
let original_len = self.len();
self.take_prefix(n);
if n > original_len { Err(original_len) } else { Ok(()) }
}
}
impl DoubleEndedIterator for IndexRange {
#[inline]
fn next_back(&mut self) -> Option<usize> {
if self.len() > 0 {
// SAFETY: We just checked that the range is non-empty
unsafe { Some(self.next_back_unchecked()) }
} else {
None
}
}
#[inline]
fn advance_back_by(&mut self, n: usize) -> Result<(), usize> {
let original_len = self.len();
self.take_suffix(n);
if n > original_len { Err(original_len) } else { Ok(()) }
}
}
impl ExactSizeIterator for IndexRange {
#[inline]
fn len(&self) -> usize {
self.len()
}
}
// SAFETY: Because we only deal in `usize`, our `len` is always perfect.
unsafe impl TrustedLen for IndexRange {}
impl FusedIterator for IndexRange {}

View File

@ -146,6 +146,7 @@ mod drop;
mod function; mod function;
mod generator; mod generator;
mod index; mod index;
mod index_range;
mod range; mod range;
mod try_trait; mod try_trait;
mod unsize; mod unsize;
@ -178,6 +179,8 @@ pub use self::index::{Index, IndexMut};
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
pub use self::range::{Range, RangeFrom, RangeFull, RangeTo}; pub use self::range::{Range, RangeFrom, RangeFull, RangeTo};
pub(crate) use self::index_range::IndexRange;
#[stable(feature = "inclusive_range", since = "1.26.0")] #[stable(feature = "inclusive_range", since = "1.26.0")]
pub use self::range::{Bound, RangeBounds, RangeInclusive, RangeToInclusive}; pub use self::range::{Bound, RangeBounds, RangeInclusive, RangeToInclusive};

View File

@ -139,6 +139,8 @@ mod private_slice_index {
impl Sealed for ops::RangeToInclusive<usize> {} impl Sealed for ops::RangeToInclusive<usize> {}
#[stable(feature = "slice_index_with_ops_bound_pair", since = "1.53.0")] #[stable(feature = "slice_index_with_ops_bound_pair", since = "1.53.0")]
impl Sealed for (ops::Bound<usize>, ops::Bound<usize>) {} impl Sealed for (ops::Bound<usize>, ops::Bound<usize>) {}
impl Sealed for ops::IndexRange {}
} }
/// A helper trait used for indexing operations. /// A helper trait used for indexing operations.
@ -257,6 +259,79 @@ unsafe impl<T> const SliceIndex<[T]> for usize {
} }
} }
/// Because `IndexRange` guarantees `start <= end`, fewer checks are needed here
/// than there are for a general `Range<usize>` (which might be `100..3`).
#[rustc_const_unstable(feature = "const_index_range_slice_index", issue = "none")]
unsafe impl<T> const SliceIndex<[T]> for ops::IndexRange {
type Output = [T];
#[inline]
fn get(self, slice: &[T]) -> Option<&[T]> {
if self.end() <= slice.len() {
// SAFETY: `self` is checked to be valid and in bounds above.
unsafe { Some(&*self.get_unchecked(slice)) }
} else {
None
}
}
#[inline]
fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
if self.end() <= slice.len() {
// SAFETY: `self` is checked to be valid and in bounds above.
unsafe { Some(&mut *self.get_unchecked_mut(slice)) }
} else {
None
}
}
#[inline]
unsafe fn get_unchecked(self, slice: *const [T]) -> *const [T] {
let end = self.end();
// SAFETY: the caller guarantees that `slice` is not dangling, so it
// cannot be longer than `isize::MAX`. They also guarantee that
// `self` is in bounds of `slice` so `self` cannot overflow an `isize`,
// so the call to `add` is safe.
unsafe {
assert_unsafe_precondition!([T](end: usize, slice: *const [T]) =>
end <= slice.len());
ptr::slice_from_raw_parts(slice.as_ptr().add(self.start()), self.len())
}
}
#[inline]
unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut [T] {
let end = self.end();
// SAFETY: see comments for `get_unchecked` above.
unsafe {
assert_unsafe_precondition!([T](end: usize, slice: *mut [T]) =>
end <= slice.len());
ptr::slice_from_raw_parts_mut(slice.as_mut_ptr().add(self.start()), self.len())
}
}
#[inline]
fn index(self, slice: &[T]) -> &[T] {
if self.end() <= slice.len() {
// SAFETY: `self` is checked to be valid and in bounds above.
unsafe { &*self.get_unchecked(slice) }
} else {
slice_end_index_len_fail(self.end(), slice.len())
}
}
#[inline]
fn index_mut(self, slice: &mut [T]) -> &mut [T] {
if self.end() <= slice.len() {
// SAFETY: `self` is checked to be valid and in bounds above.
unsafe { &mut *self.get_unchecked_mut(slice) }
} else {
slice_end_index_len_fail(self.end(), slice.len())
}
}
}
#[stable(feature = "slice_get_slice_impls", since = "1.15.0")] #[stable(feature = "slice_get_slice_impls", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_slice_index", issue = "none")] #[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
unsafe impl<T> const SliceIndex<[T]> for ops::Range<usize> { unsafe impl<T> const SliceIndex<[T]> for ops::Range<usize> {

View File

@ -1,5 +1,6 @@
// no-system-llvm // no-system-llvm
// compile-flags: -O // compile-flags: -O
// ignore-debug: the debug assertions add extra comparisons
#![crate_type = "lib"] #![crate_type = "lib"]
type Demo = [u8; 3]; type Demo = [u8; 3];
@ -12,3 +13,16 @@ pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
// CHECK: ret i1 %2 // CHECK: ret i1 %2
y.len() == 0 y.len() == 0
} }
// CHECK-LABEL: @array_into_iter_len_eq_zero
#[no_mangle]
pub fn array_into_iter_len_eq_zero(y: std::array::IntoIter<Demo, 123>) -> bool {
// This should be able to just check that the indexes are equal, and not
// need any subtractions or comparisons to handle `start > end`.
// CHECK-NOT: icmp
// CHECK-NOT: sub
// CHECK: %1 = icmp eq {{i16|i32|i64}}
// CHECK: ret i1 %1
y.len() == 0
}