std::str: Use iterators instead of while loops for CharSplitIterator
Embed an iterator in the CharSplitIterator struct, and combine that with the former bool `only_ascii`; so use an enum instead.
This commit is contained in:
parent
30ab96b272
commit
8fe8302887
@ -23,7 +23,7 @@
|
||||
use container::{Container, Mutable};
|
||||
use iter::Times;
|
||||
use iterator::{Iterator, FromIterator, Extendable};
|
||||
use iterator::{Filter, AdditiveIterator, Map};
|
||||
use iterator::{Filter, AdditiveIterator, Map, Enumerate};
|
||||
use iterator::{Invert, DoubleEndedIterator};
|
||||
use libc;
|
||||
use num::{Saturating, Zero};
|
||||
@ -359,9 +359,18 @@ fn next_back(&mut self) -> Option<(uint, char)> {
|
||||
/// Use with the `std::iterator` module.
|
||||
pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>;
|
||||
|
||||
/// An iterator over byte index and either &u8 or char
|
||||
#[deriving(Clone)]
|
||||
enum OffsetIterator<'self> {
|
||||
// use ByteIterator here when it can be cloned
|
||||
ByteOffset(Enumerate<vec::VecIterator<'self, u8>>),
|
||||
CharOffset(CharOffsetIterator<'self>),
|
||||
}
|
||||
|
||||
/// An iterator over the substrings of a string, separated by `sep`.
|
||||
#[deriving(Clone)]
|
||||
pub struct CharSplitIterator<'self,Sep> {
|
||||
priv iter: OffsetIterator<'self>,
|
||||
priv string: &'self str,
|
||||
priv position: uint,
|
||||
priv sep: Sep,
|
||||
@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> {
|
||||
/// Whether an empty string at the end is allowed
|
||||
priv allow_trailing_empty: bool,
|
||||
priv finished: bool,
|
||||
priv only_ascii: bool
|
||||
}
|
||||
|
||||
/// An iterator over the words of a string, separated by an sequence of whitespace
|
||||
@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
|
||||
fn next(&mut self) -> Option<&'self str> {
|
||||
if self.finished { return None }
|
||||
|
||||
let l = self.string.len();
|
||||
let start = self.position;
|
||||
let len = self.string.len();
|
||||
|
||||
if self.only_ascii {
|
||||
// this gives a *huge* speed up for splitting on ASCII
|
||||
// characters (e.g. '\n' or ' ')
|
||||
while self.position < l && self.count > 0 {
|
||||
let byte = self.string[self.position];
|
||||
|
||||
if self.sep.matches(byte as char) {
|
||||
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
|
||||
self.position += 1;
|
||||
self.count -= 1;
|
||||
return Some(slice);
|
||||
}
|
||||
self.position += 1;
|
||||
}
|
||||
} else {
|
||||
while self.position < l && self.count > 0 {
|
||||
let CharRange {ch, next} = self.string.char_range_at(self.position);
|
||||
|
||||
if self.sep.matches(ch) {
|
||||
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
|
||||
self.position = next;
|
||||
self.count -= 1;
|
||||
return Some(slice);
|
||||
}
|
||||
self.position = next;
|
||||
if self.count > 0 {
|
||||
match self.iter {
|
||||
// this gives a *huge* speed up for splitting on ASCII
|
||||
// characters (e.g. '\n' or ' ')
|
||||
ByteOffset(ref mut iter) =>
|
||||
for (idx, &byte) in *iter {
|
||||
if self.sep.matches(byte as char) {
|
||||
self.position = idx + 1;
|
||||
self.count -= 1;
|
||||
return Some(unsafe {
|
||||
raw::slice_bytes(self.string, start, idx)
|
||||
})
|
||||
}
|
||||
},
|
||||
CharOffset(ref mut iter) =>
|
||||
for (idx, ch) in *iter {
|
||||
if self.sep.matches(ch) {
|
||||
// skip over the separator
|
||||
self.position = self.string.char_range_at(idx).next;
|
||||
self.count -= 1;
|
||||
return Some(unsafe {
|
||||
raw::slice_bytes(self.string, start, idx)
|
||||
})
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
self.finished = true;
|
||||
if self.allow_trailing_empty || start < l {
|
||||
Some(unsafe { raw::slice_bytes(self.string, start, l) })
|
||||
if self.allow_trailing_empty || start < len {
|
||||
Some(unsafe { raw::slice_bytes(self.string, start, len) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@ -1327,15 +1335,19 @@ fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'
|
||||
#[inline]
|
||||
fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
|
||||
-> CharSplitIterator<'self, Sep> {
|
||||
let only_ascii = sep.only_ascii();
|
||||
let iter = if sep.only_ascii() {
|
||||
ByteOffset(self.as_bytes().iter().enumerate())
|
||||
} else {
|
||||
CharOffset(self.char_offset_iter())
|
||||
};
|
||||
CharSplitIterator {
|
||||
iter: iter,
|
||||
string: *self,
|
||||
position: 0,
|
||||
sep: sep,
|
||||
count: count,
|
||||
allow_trailing_empty: allow_trailing_empty,
|
||||
finished: false,
|
||||
only_ascii: only_ascii
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user