Add str::split_ascii_whitespace.

2018-05-05 00:33:20 -04:00 · 2018-05-05 00:33:20 -04:00 · b5cee029a5
commit b5cee029a5
parent 23b55161ab
3 changed files with 158 additions and 4 deletions
--- a/src/liballoc/lib.rs
+++ b/src/liballoc/lib.rs
@ -108,6 +108,7 @@
 #![cfg_attr(stage0, feature(repr_transparent))]
 #![feature(rustc_attrs)]
 #![feature(specialization)]
 #![feature(split_ascii_whitespace)]
 #![feature(staged_api)]
 #![feature(str_internals)]
 #![feature(trusted_len)]
--- a/src/liballoc/str.rs
+++ b/src/liballoc/str.rs
@ -78,6 +78,8 @@ pub use core::str::SplitWhitespace;
 pub use core::str::pattern;
 #[stable(feature = "encode_utf16", since = "1.8.0")]
 pub use core::str::EncodeUtf16;
 #[unstable(feature = "split_ascii_whitespace", issue = "48656")]
 pub use core::str::SplitAsciiWhitespace;
 #[unstable(feature = "slice_concat_ext",
           reason = "trait should not have to exist",
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@ -21,7 +21,7 @@ use char;
 use fmt;
 use iter::{Map, Cloned, FusedIterator, TrustedLen, Filter};
 use iter_private::TrustedRandomAccess;
-use slice::{self, SliceIndex};
+use slice::{self, SliceIndex, Split as SliceSplit};
 use mem;
 pub mod pattern;
@ -2722,7 +2722,10 @@ impl str {
    /// the original string slice, separated by any amount of whitespace.
    ///
    /// 'Whitespace' is defined according to the terms of the Unicode Derived
-    /// Core Property `White_Space`.
+    /// Core Property `White_Space`. If you only want to split on ASCII whitespace
    /// instead, use [`split_ascii_whitespace`].
    ///
    /// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
    ///
    /// # Examples
    ///
@ -2756,6 +2759,53 @@ impl str {
        SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
    }
    /// Split a string slice by ASCII whitespace.
    ///
    /// The iterator returned will return string slices that are sub-slices of
    /// the original string slice, separated by any amount of ASCII whitespace.
    ///
    /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
    ///
    /// [`split_whitespace`]: #method.split_whitespace
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// #![feature(split_ascii_whitespace)]
    /// let mut iter = "A few words".split_ascii_whitespace();
    ///
    /// assert_eq!(Some("A"), iter.next());
    /// assert_eq!(Some("few"), iter.next());
    /// assert_eq!(Some("words"), iter.next());
    ///
    /// assert_eq!(None, iter.next());
    /// ```
    ///
    /// All kinds of ASCII whitespace are considered:
    ///
    /// ```
    /// let mut iter = " Mary   had\ta little  \n\t lamb".split_whitespace();
    /// assert_eq!(Some("Mary"), iter.next());
    /// assert_eq!(Some("had"), iter.next());
    /// assert_eq!(Some("a"), iter.next());
    /// assert_eq!(Some("little"), iter.next());
    /// assert_eq!(Some("lamb"), iter.next());
    ///
    /// assert_eq!(None, iter.next());
    /// ```
    #[unstable(feature = "split_ascii_whitespace", issue = "48656")]
    #[inline]
    pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace {
        let inner = self
            .as_bytes()
            .split(IsAsciiWhitespace)
            .filter(IsNotEmpty)
            .map(UnsafeBytesToStr);
        SplitAsciiWhitespace { inner }
    }
    /// An iterator over the lines of a string, as string slices.
    ///
    /// Lines are ended with either a newline (`\n`) or a carriage return with
@ -3895,6 +3945,20 @@ pub struct SplitWhitespace<'a> {
    inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
 }
 /// An iterator over the non-ASCII-whitespace substrings of a string,
 /// separated by any amount of ASCII whitespace.
 ///
 /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
 /// See its documentation for more.
 ///
 /// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
 /// [`str`]: ../../std/primitive.str.html
 #[unstable(feature = "split_ascii_whitespace", issue = "48656")]
 #[derive(Clone, Debug)]
 pub struct SplitAsciiWhitespace<'a> {
    inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, IsNotEmpty>, UnsafeBytesToStr>,
 }
 #[derive(Clone)]
 struct IsWhitespace;
@ -3914,6 +3978,25 @@ impl FnMut<(char, )> for IsWhitespace {
    }
 }
 #[derive(Clone)]
 struct IsAsciiWhitespace;
 impl<'a> FnOnce<(&'a u8, )> for IsAsciiWhitespace {
    type Output = bool;
    #[inline]
    extern "rust-call" fn call_once(mut self, arg: (&u8, )) -> bool {
        self.call_mut(arg)
    }
 }
 impl<'a> FnMut<(&'a u8, )> for IsAsciiWhitespace {
    #[inline]
    extern "rust-call" fn call_mut(&mut self, arg: (&u8, )) -> bool {
        arg.0.is_ascii_whitespace()
    }
 }
 #[derive(Clone)]
 struct IsNotEmpty;
@ -3921,30 +4004,72 @@ impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty {
    type Output = bool;
    #[inline]
-    extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool {
+    extern "rust-call" fn call_once(mut self, arg: (&'a &'b str, )) -> bool {
        self.call_mut(arg)
    }
 }
 impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty {
    #[inline]
-    extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool {
+    extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b str, )) -> bool {
        !arg.0.is_empty()
    }
 }
 impl<'a, 'b> FnOnce<(&'a &'b [u8], )> for IsNotEmpty {
    type Output = bool;
    #[inline]
    extern "rust-call" fn call_once(mut self, arg: (&'a &'b [u8], )) -> bool {
        self.call_mut(arg)
    }
 }
 impl<'a, 'b> FnMut<(&'a &'b [u8], )> for IsNotEmpty {
    #[inline]
    extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b [u8], )) -> bool {
        !arg.0.is_empty()
    }
 }
 #[derive(Clone)]
 struct UnsafeBytesToStr;
 impl<'a> FnOnce<(&'a [u8], )> for UnsafeBytesToStr {
    type Output = &'a str;
    #[inline]
    extern "rust-call" fn call_once(mut self, arg: (&'a [u8], )) -> &'a str {
        self.call_mut(arg)
    }
 }
 impl<'a> FnMut<(&'a [u8], )> for UnsafeBytesToStr {
    #[inline]
    extern "rust-call" fn call_mut(&mut self, arg: (&'a [u8], )) -> &'a str {
        unsafe { from_utf8_unchecked(arg.0) }
    }
 }
 #[stable(feature = "split_whitespace", since = "1.1.0")]
 impl<'a> Iterator for SplitWhitespace<'a> {
    type Item = &'a str;
    #[inline]
    fn next(&mut self) -> Option<&'a str> {
        self.inner.next()
    }
    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.inner.size_hint()
    }
 }
 #[stable(feature = "split_whitespace", since = "1.1.0")]
 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
    #[inline]
    fn next_back(&mut self) -> Option<&'a str> {
        self.inner.next_back()
    }
@ -3953,6 +4078,32 @@ impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
 #[stable(feature = "fused", since = "1.26.0")]
 impl<'a> FusedIterator for SplitWhitespace<'a> {}
 #[unstable(feature = "split_ascii_whitespace", issue = "48656")]
 impl<'a> Iterator for SplitAsciiWhitespace<'a> {
    type Item = &'a str;
    #[inline]
    fn next(&mut self) -> Option<&'a str> {
        self.inner.next()
    }
    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.inner.size_hint()
    }
 }
 #[unstable(feature = "split_ascii_whitespace", issue = "48656")]
 impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
    #[inline]
    fn next_back(&mut self) -> Option<&'a str> {
        self.inner.next_back()
    }
 }
 #[unstable(feature = "split_ascii_whitespace", issue = "48656")]
 impl<'a> FusedIterator for SplitAsciiWhitespace<'a> {}
 /// An iterator of [`u16`] over the string encoded as UTF-16.
 ///
 /// [`u16`]: ../../std/primitive.u16.html