diff --git a/src/liballoc/lib.rs b/src/liballoc/lib.rs index e25742a4a61..ec9b5eba561 100644 --- a/src/liballoc/lib.rs +++ b/src/liballoc/lib.rs @@ -108,6 +108,7 @@ #![cfg_attr(stage0, feature(repr_transparent))] #![feature(rustc_attrs)] #![feature(specialization)] +#![feature(split_ascii_whitespace)] #![feature(staged_api)] #![feature(str_internals)] #![feature(trusted_len)] diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 32ca8d1fa5e..ec9c39c916c 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -78,6 +78,8 @@ pub use core::str::SplitWhitespace; pub use core::str::pattern; #[stable(feature = "encode_utf16", since = "1.8.0")] pub use core::str::EncodeUtf16; +#[unstable(feature = "split_ascii_whitespace", issue = "48656")] +pub use core::str::SplitAsciiWhitespace; #[unstable(feature = "slice_concat_ext", reason = "trait should not have to exist", diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 42fb1bc238b..5ae2f6349e5 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -21,7 +21,7 @@ use char; use fmt; use iter::{Map, Cloned, FusedIterator, TrustedLen, Filter}; use iter_private::TrustedRandomAccess; -use slice::{self, SliceIndex}; +use slice::{self, SliceIndex, Split as SliceSplit}; use mem; pub mod pattern; @@ -2722,7 +2722,10 @@ impl str { /// the original string slice, separated by any amount of whitespace. /// /// 'Whitespace' is defined according to the terms of the Unicode Derived - /// Core Property `White_Space`. + /// Core Property `White_Space`. If you only want to split on ASCII whitespace + /// instead, use [`split_ascii_whitespace`]. + /// + /// [`split_ascii_whitespace`]: #method.split_ascii_whitespace /// /// # Examples /// @@ -2756,6 +2759,53 @@ impl str { SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) } } + /// Split a string slice by ASCII whitespace. + /// + /// The iterator returned will return string slices that are sub-slices of + /// the original string slice, separated by any amount of ASCII whitespace. + /// + /// To split by Unicode `Whitespace` instead, use [`split_whitespace`]. + /// + /// [`split_whitespace`]: #method.split_whitespace + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(split_ascii_whitespace)] + /// let mut iter = "A few words".split_ascii_whitespace(); + /// + /// assert_eq!(Some("A"), iter.next()); + /// assert_eq!(Some("few"), iter.next()); + /// assert_eq!(Some("words"), iter.next()); + /// + /// assert_eq!(None, iter.next()); + /// ``` + /// + /// All kinds of ASCII whitespace are considered: + /// + /// ``` + /// let mut iter = " Mary had\ta little \n\t lamb".split_whitespace(); + /// assert_eq!(Some("Mary"), iter.next()); + /// assert_eq!(Some("had"), iter.next()); + /// assert_eq!(Some("a"), iter.next()); + /// assert_eq!(Some("little"), iter.next()); + /// assert_eq!(Some("lamb"), iter.next()); + /// + /// assert_eq!(None, iter.next()); + /// ``` + #[unstable(feature = "split_ascii_whitespace", issue = "48656")] + #[inline] + pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace { + let inner = self + .as_bytes() + .split(IsAsciiWhitespace) + .filter(IsNotEmpty) + .map(UnsafeBytesToStr); + SplitAsciiWhitespace { inner } + } + /// An iterator over the lines of a string, as string slices. /// /// Lines are ended with either a newline (`\n`) or a carriage return with @@ -3895,6 +3945,20 @@ pub struct SplitWhitespace<'a> { inner: Filter, IsNotEmpty>, } +/// An iterator over the non-ASCII-whitespace substrings of a string, +/// separated by any amount of ASCII whitespace. +/// +/// This struct is created by the [`split_ascii_whitespace`] method on [`str`]. +/// See its documentation for more. +/// +/// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace +/// [`str`]: ../../std/primitive.str.html +#[unstable(feature = "split_ascii_whitespace", issue = "48656")] +#[derive(Clone, Debug)] +pub struct SplitAsciiWhitespace<'a> { + inner: Map, IsNotEmpty>, UnsafeBytesToStr>, +} + #[derive(Clone)] struct IsWhitespace; @@ -3914,6 +3978,25 @@ impl FnMut<(char, )> for IsWhitespace { } } +#[derive(Clone)] +struct IsAsciiWhitespace; + +impl<'a> FnOnce<(&'a u8, )> for IsAsciiWhitespace { + type Output = bool; + + #[inline] + extern "rust-call" fn call_once(mut self, arg: (&u8, )) -> bool { + self.call_mut(arg) + } +} + +impl<'a> FnMut<(&'a u8, )> for IsAsciiWhitespace { + #[inline] + extern "rust-call" fn call_mut(&mut self, arg: (&u8, )) -> bool { + arg.0.is_ascii_whitespace() + } +} + #[derive(Clone)] struct IsNotEmpty; @@ -3921,30 +4004,72 @@ impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty { type Output = bool; #[inline] - extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool { + extern "rust-call" fn call_once(mut self, arg: (&'a &'b str, )) -> bool { self.call_mut(arg) } } impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty { #[inline] - extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool { + extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b str, )) -> bool { !arg.0.is_empty() } } +impl<'a, 'b> FnOnce<(&'a &'b [u8], )> for IsNotEmpty { + type Output = bool; + + #[inline] + extern "rust-call" fn call_once(mut self, arg: (&'a &'b [u8], )) -> bool { + self.call_mut(arg) + } +} + +impl<'a, 'b> FnMut<(&'a &'b [u8], )> for IsNotEmpty { + #[inline] + extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b [u8], )) -> bool { + !arg.0.is_empty() + } +} + +#[derive(Clone)] +struct UnsafeBytesToStr; + +impl<'a> FnOnce<(&'a [u8], )> for UnsafeBytesToStr { + type Output = &'a str; + + #[inline] + extern "rust-call" fn call_once(mut self, arg: (&'a [u8], )) -> &'a str { + self.call_mut(arg) + } +} + +impl<'a> FnMut<(&'a [u8], )> for UnsafeBytesToStr { + #[inline] + extern "rust-call" fn call_mut(&mut self, arg: (&'a [u8], )) -> &'a str { + unsafe { from_utf8_unchecked(arg.0) } + } +} + #[stable(feature = "split_whitespace", since = "1.1.0")] impl<'a> Iterator for SplitWhitespace<'a> { type Item = &'a str; + #[inline] fn next(&mut self) -> Option<&'a str> { self.inner.next() } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } } #[stable(feature = "split_whitespace", since = "1.1.0")] impl<'a> DoubleEndedIterator for SplitWhitespace<'a> { + #[inline] fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } @@ -3953,6 +4078,32 @@ impl<'a> DoubleEndedIterator for SplitWhitespace<'a> { #[stable(feature = "fused", since = "1.26.0")] impl<'a> FusedIterator for SplitWhitespace<'a> {} +#[unstable(feature = "split_ascii_whitespace", issue = "48656")] +impl<'a> Iterator for SplitAsciiWhitespace<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.inner.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +#[unstable(feature = "split_ascii_whitespace", issue = "48656")] +impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.inner.next_back() + } +} + +#[unstable(feature = "split_ascii_whitespace", issue = "48656")] +impl<'a> FusedIterator for SplitAsciiWhitespace<'a> {} + /// An iterator of [`u16`] over the string encoded as UTF-16. /// /// [`u16`]: ../../std/primitive.u16.html