std::str: Use iterators instead of while loops for CharSplitIterator

Embed an iterator in the CharSplitIterator struct, and combine that with
the former bool `only_ascii`; so use an enum instead.
This commit is contained in:
blake2-ppc 2013-08-19 15:34:48 +02:00
parent 30ab96b272
commit 8fe8302887

View File

@ -23,7 +23,7 @@
use container::{Container, Mutable};
use iter::Times;
use iterator::{Iterator, FromIterator, Extendable};
use iterator::{Filter, AdditiveIterator, Map};
use iterator::{Filter, AdditiveIterator, Map, Enumerate};
use iterator::{Invert, DoubleEndedIterator};
use libc;
use num::{Saturating, Zero};
@ -359,9 +359,18 @@ fn next_back(&mut self) -> Option<(uint, char)> {
/// Use with the `std::iterator` module.
pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>;
/// An iterator over byte index and either &u8 or char
#[deriving(Clone)]
enum OffsetIterator<'self> {
// use ByteIterator here when it can be cloned
ByteOffset(Enumerate<vec::VecIterator<'self, u8>>),
CharOffset(CharOffsetIterator<'self>),
}
/// An iterator over the substrings of a string, separated by `sep`.
#[deriving(Clone)]
pub struct CharSplitIterator<'self,Sep> {
priv iter: OffsetIterator<'self>,
priv string: &'self str,
priv position: uint,
priv sep: Sep,
@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> {
/// Whether an empty string at the end is allowed
priv allow_trailing_empty: bool,
priv finished: bool,
priv only_ascii: bool
}
/// An iterator over the words of a string, separated by an sequence of whitespace
@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
fn next(&mut self) -> Option<&'self str> {
if self.finished { return None }
let l = self.string.len();
let start = self.position;
let len = self.string.len();
if self.only_ascii {
// this gives a *huge* speed up for splitting on ASCII
// characters (e.g. '\n' or ' ')
while self.position < l && self.count > 0 {
let byte = self.string[self.position];
if self.sep.matches(byte as char) {
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
self.position += 1;
self.count -= 1;
return Some(slice);
}
self.position += 1;
}
} else {
while self.position < l && self.count > 0 {
let CharRange {ch, next} = self.string.char_range_at(self.position);
if self.sep.matches(ch) {
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
self.position = next;
self.count -= 1;
return Some(slice);
}
self.position = next;
if self.count > 0 {
match self.iter {
// this gives a *huge* speed up for splitting on ASCII
// characters (e.g. '\n' or ' ')
ByteOffset(ref mut iter) =>
for (idx, &byte) in *iter {
if self.sep.matches(byte as char) {
self.position = idx + 1;
self.count -= 1;
return Some(unsafe {
raw::slice_bytes(self.string, start, idx)
})
}
},
CharOffset(ref mut iter) =>
for (idx, ch) in *iter {
if self.sep.matches(ch) {
// skip over the separator
self.position = self.string.char_range_at(idx).next;
self.count -= 1;
return Some(unsafe {
raw::slice_bytes(self.string, start, idx)
})
}
},
}
}
self.finished = true;
if self.allow_trailing_empty || start < l {
Some(unsafe { raw::slice_bytes(self.string, start, l) })
if self.allow_trailing_empty || start < len {
Some(unsafe { raw::slice_bytes(self.string, start, len) })
} else {
None
}
@ -1327,15 +1335,19 @@ fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'
#[inline]
fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
-> CharSplitIterator<'self, Sep> {
let only_ascii = sep.only_ascii();
let iter = if sep.only_ascii() {
ByteOffset(self.as_bytes().iter().enumerate())
} else {
CharOffset(self.char_offset_iter())
};
CharSplitIterator {
iter: iter,
string: *self,
position: 0,
sep: sep,
count: count,
allow_trailing_empty: allow_trailing_empty,
finished: false,
only_ascii: only_ascii
}
}