diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs index c1ae67a1a33..ea75f8903c3 100644 --- a/src/liballoc/tests/lib.rs +++ b/src/liballoc/tests/lib.rs @@ -12,6 +12,7 @@ #![feature(binary_heap_into_iter_sorted)] #![feature(binary_heap_drain_sorted)] #![feature(vec_remove_item)] +#![feature(split_inclusive)] use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs index 51ddb5e7a4e..3d6b4bff5e0 100644 --- a/src/liballoc/tests/slice.rs +++ b/src/liballoc/tests/slice.rs @@ -851,6 +851,86 @@ fn test_splitator() { assert_eq!(xs.split(|x| *x == 5).collect::>(), splits); } +#[test] +fn test_splitator_inclusive() { + let xs = &[1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]]; + assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 1).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 10).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]]; + assert_eq!(xs.split_inclusive(|_| true).collect::>(), splits); + + let xs: &[i32] = &[]; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::>(), splits); +} + +#[test] +fn test_splitator_inclusive_reverse() { + let xs = &[1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]]; + assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]]; + assert_eq!(xs.split_inclusive(|x| *x == 1).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 10).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]]; + assert_eq!(xs.split_inclusive(|_| true).rev().collect::>(), splits); + + let xs: &[i32] = &[]; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::>(), splits); +} + +#[test] +fn test_splitator_mut_inclusive() { + let xs = &mut [1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 1).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 10).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]]; + assert_eq!(xs.split_inclusive_mut(|_| true).collect::>(), splits); + + let xs: &mut [i32] = &mut []; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::>(), splits); +} + +#[test] +fn test_splitator_mut_inclusive_reverse() { + let xs = &mut [1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]]; + assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 1).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 10).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]]; + assert_eq!(xs.split_inclusive_mut(|_| true).rev().collect::>(), splits); + + let xs: &mut [i32] = &mut []; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::>(), splits); +} + #[test] fn test_splitnator() { let xs = &[1, 2, 3, 4, 5]; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index d3c72615696..b703df6f3cb 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1247,6 +1247,49 @@ fn test_split_char_iterator_no_trailing() { assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); } +#[test] +fn test_split_char_iterator_inclusive() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_inclusive('\n').collect(); + assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]); + + let uppercase_separated = "SheePSharKTurtlECaT"; + let mut first_char = true; + let split: Vec<&str> = uppercase_separated + .split_inclusive(|c: char| { + let split = !first_char && c.is_uppercase(); + first_char = split; + split + }) + .collect(); + assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]); +} + +#[test] +fn test_split_char_iterator_inclusive_rev() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_inclusive('\n').rev().collect(); + assert_eq!(split, ["Little lämb\n", "Märy häd ä little lämb\n", "\n"]); + + // Note that the predicate is stateful and thus dependent + // on the iteration order. + // (A different predicate is needed for reverse iterator vs normal iterator.) + // Not sure if anything can be done though. + let uppercase_separated = "SheePSharKTurtlECaT"; + let mut term_char = true; + let split: Vec<&str> = uppercase_separated + .split_inclusive(|c: char| { + let split = term_char && c.is_uppercase(); + term_char = c.is_uppercase(); + split + }) + .rev() + .collect(); + assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]); +} + #[test] fn test_rsplit() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs index e79a775325f..7c65f595790 100644 --- a/src/libcore/slice/mod.rs +++ b/src/libcore/slice/mod.rs @@ -1155,6 +1155,69 @@ pub fn split_mut(&mut self, pred: F) -> SplitMut<'_, T, F> SplitMut { v: self, pred, finished: false } } + /// Returns an iterator over subslices separated by elements that match + /// `pred`. The matched element is contained in the end of the previous + /// subslice as a terminator. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let slice = [10, 40, 33, 20]; + /// let mut iter = slice.split_inclusive(|num| num % 3 == 0); + /// + /// assert_eq!(iter.next().unwrap(), &[10, 40, 33]); + /// assert_eq!(iter.next().unwrap(), &[20]); + /// assert!(iter.next().is_none()); + /// ``` + /// + /// If the last element of the slice is matched, + /// that element will be considered the terminator of the preceding slice. + /// That slice will be the last item returned by the iterator. + /// + /// ``` + /// #![feature(split_inclusive)] + /// let slice = [3, 10, 40, 33]; + /// let mut iter = slice.split_inclusive(|num| num % 3 == 0); + /// + /// assert_eq!(iter.next().unwrap(), &[3]); + /// assert_eq!(iter.next().unwrap(), &[10, 40, 33]); + /// assert!(iter.next().is_none()); + /// ``` + #[unstable(feature = "split_inclusive", issue = "none")] + #[inline] + pub fn split_inclusive(&self, pred: F) -> SplitInclusive<'_, T, F> + where + F: FnMut(&T) -> bool, + { + SplitInclusive { v: self, pred, finished: false } + } + + /// Returns an iterator over mutable subslices separated by elements that + /// match `pred`. The matched element is contained in the previous + /// subslice as a terminator. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let mut v = [10, 40, 30, 20, 60, 50]; + /// + /// for group in v.split_inclusive_mut(|num| *num % 3 == 0) { + /// let terminator_idx = group.len()-1; + /// group[terminator_idx] = 1; + /// } + /// assert_eq!(v, [10, 40, 1, 20, 1, 1]); + /// ``` + #[unstable(feature = "split_inclusive", issue = "none")] + #[inline] + pub fn split_inclusive_mut(&mut self, pred: F) -> SplitInclusiveMut<'_, T, F> + where + F: FnMut(&T) -> bool, + { + SplitInclusiveMut { v: self, pred, finished: false } + } + /// Returns an iterator over subslices separated by elements that match /// `pred`, starting at the end of the slice and working backwards. /// The matched element is not contained in the subslices. @@ -3675,7 +3738,106 @@ fn finish(&mut self) -> Option<&'a [T]> { #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for Split<'_, T, P> where P: FnMut(&T) -> bool {} -/// An iterator over the subslices of the vector which are separated +/// An iterator over subslices separated by elements that match a predicate +/// function. Unlike `Split`, it contains the matched part as a terminator +/// of the subslice. +/// +/// This struct is created by the [`split_inclusive`] method on [slices]. +/// +/// [`split_inclusive`]: ../../std/primitive.slice.html#method.split_inclusive +/// [slices]: ../../std/primitive.slice.html +#[unstable(feature = "split_inclusive", issue = "none")] +pub struct SplitInclusive<'a, T: 'a, P> +where + P: FnMut(&T) -> bool, +{ + v: &'a [T], + pred: P, + finished: bool, +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl fmt::Debug for SplitInclusive<'_, T, P> +where + P: FnMut(&T) -> bool, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusive") + .field("v", &self.v) + .field("finished", &self.finished) + .finish() + } +} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_inclusive", issue = "none")] +impl Clone for SplitInclusive<'_, T, P> +where + P: Clone + FnMut(&T) -> bool, +{ + fn clone(&self) -> Self { + SplitInclusive { v: self.v, pred: self.pred.clone(), finished: self.finished } + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> Iterator for SplitInclusive<'a, T, P> +where + P: FnMut(&T) -> bool, +{ + type Item = &'a [T]; + + #[inline] + fn next(&mut self) -> Option<&'a [T]> { + if self.finished { + return None; + } + + let idx = + self.v.iter().position(|x| (self.pred)(x)).map(|idx| idx + 1).unwrap_or(self.v.len()); + if idx == self.v.len() { + self.finished = true; + } + let ret = Some(&self.v[..idx]); + self.v = &self.v[idx..]; + ret + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + if self.finished { (0, Some(0)) } else { (1, Some(self.v.len() + 1)) } + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusive<'a, T, P> +where + P: FnMut(&T) -> bool, +{ + #[inline] + fn next_back(&mut self) -> Option<&'a [T]> { + if self.finished { + return None; + } + + // The last index of self.v is already checked and found to match + // by the last iteration, so we start searching a new match + // one index to the left. + let remainder = if self.v.len() == 0 { &[] } else { &self.v[..(self.v.len() - 1)] }; + let idx = remainder.iter().rposition(|x| (self.pred)(x)).map(|idx| idx + 1).unwrap_or(0); + if idx == 0 { + self.finished = true; + } + let ret = Some(&self.v[idx..]); + self.v = &self.v[..idx]; + ret + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl FusedIterator for SplitInclusive<'_, T, P> where P: FnMut(&T) -> bool {} + +/// An iterator over the mutable subslices of the vector which are separated /// by elements that match `pred`. /// /// This struct is created by the [`split_mut`] method on [slices]. @@ -3789,6 +3951,114 @@ fn next_back(&mut self) -> Option<&'a mut [T]> { #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for SplitMut<'_, T, P> where P: FnMut(&T) -> bool {} +/// An iterator over the mutable subslices of the vector which are separated +/// by elements that match `pred`. Unlike `SplitMut`, it contains the matched +/// parts in the ends of the subslices. +/// +/// This struct is created by the [`split_inclusive_mut`] method on [slices]. +/// +/// [`split_inclusive_mut`]: ../../std/primitive.slice.html#method.split_inclusive_mut +/// [slices]: ../../std/primitive.slice.html +#[unstable(feature = "split_inclusive", issue = "none")] +pub struct SplitInclusiveMut<'a, T: 'a, P> +where + P: FnMut(&T) -> bool, +{ + v: &'a mut [T], + pred: P, + finished: bool, +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl fmt::Debug for SplitInclusiveMut<'_, T, P> +where + P: FnMut(&T) -> bool, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusiveMut") + .field("v", &self.v) + .field("finished", &self.finished) + .finish() + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> Iterator for SplitInclusiveMut<'a, T, P> +where + P: FnMut(&T) -> bool, +{ + type Item = &'a mut [T]; + + #[inline] + fn next(&mut self) -> Option<&'a mut [T]> { + if self.finished { + return None; + } + + let idx_opt = { + // work around borrowck limitations + let pred = &mut self.pred; + self.v.iter().position(|x| (*pred)(x)) + }; + let idx = idx_opt.map(|idx| idx + 1).unwrap_or(self.v.len()); + if idx == self.v.len() { + self.finished = true; + } + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx); + self.v = tail; + Some(head) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + if self.finished { + (0, Some(0)) + } else { + // if the predicate doesn't match anything, we yield one slice + // if it matches every element, we yield len+1 empty slices. + (1, Some(self.v.len() + 1)) + } + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusiveMut<'a, T, P> +where + P: FnMut(&T) -> bool, +{ + #[inline] + fn next_back(&mut self) -> Option<&'a mut [T]> { + if self.finished { + return None; + } + + let idx_opt = if self.v.len() == 0 { + None + } else { + // work around borrowck limitations + let pred = &mut self.pred; + + // The last index of self.v is already checked and found to match + // by the last iteration, so we start searching a new match + // one index to the left. + let remainder = &self.v[..(self.v.len() - 1)]; + remainder.iter().rposition(|x| (*pred)(x)) + }; + let idx = idx_opt.map(|idx| idx + 1).unwrap_or(0); + if idx == 0 { + self.finished = true; + } + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx); + self.v = head; + Some(tail) + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl FusedIterator for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> bool {} + /// An iterator over subslices separated by elements that match a predicate /// function, starting from the end of the slice. /// diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 7c4acb0edb7..668b3ff3a36 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -1132,6 +1132,26 @@ fn next(&mut self) -> Option<&'a str> { } } + #[inline] + fn next_inclusive(&mut self) -> Option<&'a str> { + if self.finished { + return None; + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match() { + // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary, + // and self.start is either the start of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. + Some((_, b)) => unsafe { + let elt = haystack.get_unchecked(self.start..b); + self.start = b; + Some(elt) + }, + None => self.get_end(), + } + } + #[inline] fn next_back(&mut self) -> Option<&'a str> where @@ -1168,6 +1188,49 @@ fn next_back(&mut self) -> Option<&'a str> }, } } + + #[inline] + fn next_back_inclusive(&mut self) -> Option<&'a str> + where + P::Searcher: ReverseSearcher<'a>, + { + if self.finished { + return None; + } + + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + match self.next_back_inclusive() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match_back() { + // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary, + // and self.end is either the end of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. + Some((_, b)) => unsafe { + let elt = haystack.get_unchecked(b..self.end); + self.end = b; + Some(elt) + }, + // SAFETY: self.start is either the start of the original string, + // or start of a substring that represents the part of the string that hasn't + // iterated yet. Either way, it is guaranteed to lie on unicode boundary. + // self.end is either the end of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. + None => unsafe { + self.finished = true; + Some(haystack.get_unchecked(self.start..self.end)) + }, + } + } } generate_pattern_iterators! { @@ -3213,6 +3276,42 @@ pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { }) } + /// An iterator over substrings of this string slice, separated by + /// characters matched by a pattern. Differs from the iterator produced by + /// `split` in that `split_inclusive` leaves the matched part as the + /// terminator of the substring. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb." + /// .split_inclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]); + /// ``` + /// + /// If the last element of the string is matched, + /// that element will be considered the terminator of the preceding substring. + /// That substring will be the last item returned by the iterator. + /// + /// ``` + /// #![feature(split_inclusive)] + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n" + /// .split_inclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]); + /// ``` + #[unstable(feature = "split_inclusive", issue = "none")] + #[inline] + pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> { + SplitInclusive(SplitInternal { + start: 0, + end: self.len(), + matcher: pat.into_searcher(self), + allow_trailing_empty: false, + finished: false, + }) + } + /// An iterator over substrings of the given string slice, separated by /// characters matched by a pattern and yielded in reverse order. /// @@ -4406,6 +4505,19 @@ pub struct SplitAsciiWhitespace<'a> { inner: Map, BytesIsNotEmpty>, UnsafeBytesToStr>, } +/// An iterator over the substrings of a string, +/// terminated by a substring matching to a predicate function +/// Unlike `Split`, it contains the matched part as a terminator +/// of the subslice. +/// +/// This struct is created by the [`split_inclusive`] method on [`str`]. +/// See its documentation for more. +/// +/// [`split_inclusive`]: ../../std/primitive.str.html#method.split_inclusive +/// [`str`]: ../../std/primitive.str.html +#[unstable(feature = "split_inclusive", issue = "none")] +pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>); + impl_fn_for_zst! { #[derive(Clone)] struct IsWhitespace impl Fn = |c: char| -> bool { @@ -4496,6 +4608,44 @@ fn next_back(&mut self) -> Option<&'a str> { #[stable(feature = "split_ascii_whitespace", since = "1.34.0")] impl FusedIterator for SplitAsciiWhitespace<'_> {} +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.0.next_inclusive() + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusive").field("0", &self.0).finish() + } +} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> { + fn clone(&self) -> Self { + SplitInclusive(self.0.clone()) + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator + for SplitInclusive<'a, P> +{ + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back_inclusive() + } +} + +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {} + /// An iterator of [`u16`] over the string encoded as UTF-16. /// /// [`u16`]: ../../std/primitive.u16.html