From 557210c5c7f4045a2007583c741476a4a3aee1df Mon Sep 17 00:00:00 2001 From: wr7 Date: Fri, 14 Jun 2024 14:45:53 -0500 Subject: [PATCH] Add elem_offset and related methods --- library/core/src/slice/mod.rs | 115 ++++++++++++++++++++++++++++++++++ library/core/src/str/mod.rs | 34 ++++++++++ 2 files changed, 149 insertions(+) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 521c3248204..1fed92a3fa1 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -4540,6 +4540,121 @@ pub fn get_many_mut( // are disjunct and in bounds. unsafe { Ok(self.get_many_unchecked_mut(indices)) } } + + /// Returns the index that an element reference points to. + /// + /// Returns `None` if `element` does not point within the slice or if it points between elements. + /// + /// This method is useful for extending slice iterators like [`slice::split`]. + /// + /// Note that this uses pointer arithmetic and **does not compare elements**. + /// To find the index of an element via comparison, use + /// [`.iter().position()`](crate::iter::Iterator::position) instead. + /// + /// # Panics + /// Panics if `T` is zero-sized. + /// + /// # Examples + /// Basic usage: + /// ``` + /// #![feature(substr_range)] + /// + /// let nums: &[u32] = &[1, 7, 1, 1]; + /// let num = &nums[2]; + /// + /// assert_eq!(num, &1); + /// assert_eq!(nums.elem_offset(num), Some(2)); + /// ``` + /// Returning `None` with an in-between element: + /// ``` + /// #![feature(substr_range)] + /// + /// let arr: &[[u32; 2]] = &[[0, 1], [2, 3]]; + /// let flat_arr: &[u32] = arr.as_flattened(); + /// + /// let ok_elm: &[u32; 2] = flat_arr[0..2].try_into().unwrap(); + /// let weird_elm: &[u32; 2] = flat_arr[1..3].try_into().unwrap(); + /// + /// assert_eq!(ok_elm, &[0, 1]); + /// assert_eq!(weird_elm, &[1, 2]); + /// + /// assert_eq!(arr.elem_offset(ok_elm), Some(0)); // Points to element 0 + /// assert_eq!(arr.elem_offset(weird_elm), None); // Points between element 0 and 1 + /// ``` + #[must_use] + #[unstable(feature = "substr_range", issue = "126769")] + pub fn elem_offset(&self, element: &T) -> Option { + if T::IS_ZST { + panic!("elements are zero-sized"); + } + + let self_start = self.as_ptr() as usize; + let elem_start = element as *const T as usize; + + let byte_offset = elem_start.wrapping_sub(self_start); + + if byte_offset % mem::size_of::() != 0 { + return None; + } + + let offset = byte_offset / mem::size_of::(); + + if offset < self.len() { Some(offset) } else { None } + } + + /// Returns the range of indices that a subslice points to. + /// + /// Returns `None` if `subslice` does not point within the slice or if it points between elements. + /// + /// This method **does not compare elements**. Instead, this method finds the location in the slice that + /// `subslice` was obtained from. To find the index of a subslice via comparison, instead use + /// [`.windows()`](slice::windows)[`.position()`](crate::iter::Iterator::position). + /// + /// This method is useful for extending slice iterators like [`slice::split`]. + /// + /// Note that this may return a false positive (either `Some(0..0)` or `Some(self.len()..self.len())`) + /// if `subslice` has a length of zero and points to the beginning or end of another, separate, slice. + /// + /// # Panics + /// Panics if `T` is zero-sized. + /// + /// # Examples + /// Basic usage: + /// ``` + /// #![feature(substr_range)] + /// + /// let nums = &[0, 5, 10, 0, 0, 5]; + /// + /// let mut iter = nums + /// .split(|t| *t == 0) + /// .map(|n| nums.subslice_range(n).unwrap()); + /// + /// assert_eq!(iter.next(), Some(0..0)); + /// assert_eq!(iter.next(), Some(1..3)); + /// assert_eq!(iter.next(), Some(4..4)); + /// assert_eq!(iter.next(), Some(5..6)); + /// ``` + #[must_use] + #[unstable(feature = "substr_range", issue = "126769")] + pub fn subslice_range(&self, subslice: &[T]) -> Option> { + if T::IS_ZST { + panic!("elements are zero-sized"); + } + + let self_start = self.as_ptr() as usize; + let subslice_start = subslice.as_ptr() as usize; + + let byte_start = subslice_start.wrapping_sub(self_start); + + if byte_start % core::mem::size_of::() != 0 { + return None; + } + + let start = byte_start / core::mem::size_of::(); + let end = start.wrapping_add(subslice.len()); + + if start <= self.len() && end <= self.len() { Some(start..end) } else { None } + } } impl [[T; N]] { diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 68310938043..1392fd7e7cd 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -19,6 +19,7 @@ use crate::ascii; use crate::char::{self, EscapeDebugExtArgs}; use crate::mem; +use crate::ops::Range; use crate::slice::{self, SliceIndex}; pub mod pattern; @@ -2721,6 +2722,39 @@ pub fn escape_default(&self) -> EscapeDefault<'_> { pub fn escape_unicode(&self) -> EscapeUnicode<'_> { EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) } } + + /// Returns the range that a substring points to. + /// + /// Returns `None` if `substr` does not point within `self`. + /// + /// Unlike [`str::find`], **this does not search through the string**. + /// Instead, it uses pointer arithmetic to find where in the string + /// `substr` is derived from. + /// + /// This is useful for extending [`str::split`] and similar methods. + /// + /// Note that this method may return false positives (typically either + /// `Some(0..0)` or `Some(self.len()..self.len())`) if `substr` is a + /// zero-length `str` that points at the beginning or end of another, + /// independent, `str`. + /// + /// # Examples + /// ``` + /// #![feature(substr_range)] + /// + /// let data = "a, b, b, a"; + /// let mut iter = data.split(", ").map(|s| data.substr_range(s).unwrap()); + /// + /// assert_eq!(iter.next(), Some(0..1)); + /// assert_eq!(iter.next(), Some(3..4)); + /// assert_eq!(iter.next(), Some(6..7)); + /// assert_eq!(iter.next(), Some(9..10)); + /// ``` + #[must_use] + #[unstable(feature = "substr_range", issue = "126769")] + pub fn substr_range(&self, substr: &str) -> Option> { + self.as_bytes().subslice_range(substr.as_bytes()) + } } #[stable(feature = "rust1", since = "1.0.0")]