Added string pattern traits and basic implementantions

2014-12-30 21:54:17 +01:00 · 2014-12-30 21:54:17 +01:00 · 54f0bead81
commit 54f0bead81
parent d68eb3d248
3 changed files with 296 additions and 133 deletions
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@ -36,12 +36,16 @@ use result::Result::{self, Ok, Err};
 use slice::{self, SliceExt};
 use usize;

+pub use self::pattern::{Pattern, Matcher, ReverseMatcher, DoubleEndedMatcher};
+
+mod pattern;
+
 macro_rules! delegate_iter {
    (exact $te:ty : $ti:ty) => {
        delegate_iter!{$te : $ti}
        impl<'a> ExactSizeIterator for $ti {
            #[inline]
-            fn len(&self) -> usize {
+            fn len(&self) -> uint {
                self.0.len()
            }
        }
@ -56,7 +60,7 @@ macro_rules! delegate_iter {
                self.0.next()
            }
            #[inline]
-            fn size_hint(&self) -> (usize, Option<usize>) {
+            fn size_hint(&self) -> (uint, Option<uint>) {
                self.0.size_hint()
            }
        }
@ -78,7 +82,7 @@ macro_rules! delegate_iter {
                self.0.next()
            }
            #[inline]
-            fn size_hint(&self) -> (usize, Option<usize>) {
+            fn size_hint(&self) -> (uint, Option<uint>) {
                self.0.size_hint()
            }
        }
@ -100,7 +104,7 @@ macro_rules! delegate_iter {
                self.0.next()
            }
            #[inline]
-            fn size_hint(&self) -> (usize, Option<usize>) {
+            fn size_hint(&self) -> (uint, Option<uint>) {
                self.0.size_hint()
            }
        }
@ -149,6 +153,7 @@ impl FromStr for bool {

 /// An error returned when parsing a `bool` from a string fails.
 #[derive(Debug, Clone, PartialEq)]
+#[allow(missing_copy_implementations)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct ParseBoolError { _priv: () }

@ -178,7 +183,7 @@ pub enum Utf8Error {
    /// The offset is guaranteed to be in bounds of the slice in question, and
    /// the byte at the specified offset was the first invalid byte in the
    /// sequence detected.
-    InvalidByte(usize),
+    InvalidByte(uint),

    /// The byte slice was invalid because more bytes were needed but no more
    /// bytes were available.
@ -227,7 +232,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
 pub unsafe fn from_c_str(s: *const i8) -> &'static str {
    let s = s as *const u8;
    let mut len = 0;
-    while *s.offset(len as isize) != 0 {
+    while *s.offset(len as int) != 0 {
        len += 1;
    }
    let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
@ -250,7 +255,7 @@ impl CharEq for char {
    fn matches(&mut self, c: char) -> bool { *self == c }

    #[inline]
-    fn only_ascii(&self) -> bool { (*self as u32) < 128 }
+    fn only_ascii(&self) -> bool { (*self as uint) < 128 }
 }

 impl<F> CharEq for F where F: FnMut(char) -> bool {
@ -383,7 +388,7 @@ impl<'a> Iterator for Chars<'a> {
    }

    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
+    fn size_hint(&self) -> (uint, Option<uint>) {
        let (len, _) = self.iter.size_hint();
        (len.saturating_add(3) / 4, Some(len))
    }
@ -428,16 +433,16 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct CharIndices<'a> {
-    front_offset: usize,
+    front_offset: uint,
    iter: Chars<'a>,
 }

 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for CharIndices<'a> {
-    type Item = (usize, char);
+    type Item = (uint, char);

    #[inline]
-    fn next(&mut self) -> Option<(usize, char)> {
+    fn next(&mut self) -> Option<(uint, char)> {
        let (pre_len, _) = self.iter.iter.size_hint();
        match self.iter.next() {
            None => None,
@ -451,7 +456,7 @@ impl<'a> Iterator for CharIndices<'a> {
    }

    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
+    fn size_hint(&self) -> (uint, Option<uint>) {
        self.iter.size_hint()
    }
 }
@ -459,7 +464,7 @@ impl<'a> Iterator for CharIndices<'a> {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> DoubleEndedIterator for CharIndices<'a> {
    #[inline]
-    fn next_back(&mut self) -> Option<(usize, char)> {
+    fn next_back(&mut self) -> Option<(uint, char)> {
        match self.iter.next_back() {
            None => None,
            Some(ch) => {
@ -512,7 +517,7 @@ struct CharSplits<'a, Sep> {
 struct CharSplitsN<'a, Sep> {
    iter: CharSplits<'a, Sep>,
    /// The number of splits remaining
-    count: usize,
+    count: uint,
    invert: bool,
 }

@ -636,7 +641,7 @@ impl<'a, Sep: CharEq> Iterator for CharSplitsN<'a, Sep> {
 /// within a larger string using naive search
 #[derive(Clone)]
 struct NaiveSearcher {
-    position: usize
+    position: uint
 }

 impl NaiveSearcher {
@ -644,7 +649,7 @@ impl NaiveSearcher {
        NaiveSearcher { position: 0 }
    }

-    fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(usize, usize)> {
+    fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(uint, uint)> {
        while self.position + needle.len() <= haystack.len() {
            if &haystack[self.position .. self.position + needle.len()] == needle {
                let match_pos = self.position;
@ -663,13 +668,13 @@ impl NaiveSearcher {
 #[derive(Clone)]
 struct TwoWaySearcher {
    // constants
-    crit_pos: usize,
-    period: usize,
+    crit_pos: uint,
+    period: uint,
    byteset: u64,

    // variables
-    position: usize,
-    memory: usize
+    position: uint,
+    memory: uint
 }

 /*
@ -756,7 +761,7 @@ impl TwoWaySearcher {

        // This isn't in the original algorithm, as far as I'm aware.
        let byteset = needle.iter()
-                            .fold(0, |a, &b| (1 << ((b & 0x3f) as usize)) | a);
+                            .fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a);

        // A particularly readable explanation of what's going on here can be found
        // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
@ -794,8 +799,7 @@ impl TwoWaySearcher {
    // How far we can jump when we encounter a mismatch is all based on the fact
    // that (u, v) is a critical factorization for the needle.
    #[inline]
-    fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
-            -> Option<(usize, usize)> {
+    fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(uint, uint)> {
        'search: loop {
            // Check that we have room to search in
            if self.position + needle.len() > haystack.len() {
@ -805,7 +809,7 @@ impl TwoWaySearcher {
            // Quickly skip by large portions unrelated to our substring
            if (self.byteset >>
                    ((haystack[self.position + needle.len() - 1] & 0x3f)
-                     as usize)) & 1 == 0 {
+                     as uint)) & 1 == 0 {
                self.position += needle.len();
                if !long_period {
                    self.memory = 0;
@ -852,7 +856,7 @@ impl TwoWaySearcher {
    // Specifically, returns (i, p), where i is the starting index of v in some
    // critical factorization (u, v) and p = period(v)
    #[inline]
-    fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) {
+    fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) {
        let mut left = -1; // Corresponds to i in the paper
        let mut right = 0; // Corresponds to j in the paper
        let mut offset = 1; // Corresponds to k in the paper
@ -897,6 +901,7 @@ impl TwoWaySearcher {
 /// within a larger string using a dynamically chosen search algorithm
 #[derive(Clone)]
 enum Searcher {
+    EmptyNeedle { pos: usize, done: bool },
    Naive(NaiveSearcher),
    TwoWay(TwoWaySearcher),
    TwoWayLong(TwoWaySearcher)
@ -904,11 +909,16 @@ enum Searcher {

 impl Searcher {
    fn new(haystack: &[u8], needle: &[u8]) -> Searcher {
+        if needle.len() == 0 {
+            Searcher::EmptyNeedle {
+                pos: 0,
+                done: false
+            }
        // FIXME: Tune this.
        // FIXME(#16715): This unsigned integer addition will probably not
        // overflow because that would mean that the memory almost solely
        // consists of the needle. Needs #16715 to be formally fixed.
-        if needle.len() + 20 > haystack.len() {
+        } else if needle.len() + 20 > haystack.len() {
            Naive(NaiveSearcher::new())
        } else {
            let searcher = TwoWaySearcher::new(needle);
@ -938,23 +948,37 @@ pub struct MatchIndices<'a> {
 #[unstable(feature = "core", reason = "type may be removed")]
 pub struct SplitStr<'a> {
    it: MatchIndices<'a>,
-    last_end: usize,
+    last_end: uint,
    finished: bool
 }

 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for MatchIndices<'a> {
-    type Item = (usize, usize);
+    type Item = (uint, uint);

    #[inline]
-    fn next(&mut self) -> Option<(usize, usize)> {
+    fn next(&mut self) -> Option<(uint, uint)> {
        match self.searcher {
            Naive(ref mut searcher)
                => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes()),
            TwoWay(ref mut searcher)
                => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
            TwoWayLong(ref mut searcher)
-                => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true)
+                => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true),
+            Searcher::EmptyNeedle { ref mut pos, ref mut done } => {
+                if !*done {
+                    let r = Some((*pos, *pos));
+                    if *pos == self.haystack.len() {
+                        *done = true;
+                    } else {
+                        use char::CharExt;
+                        *pos += self.haystack.char_at(*pos).len_utf8();
+                    }
+                    r
+                } else {
+                    None
+                }
+            }
        }
    }
 }
@ -994,7 +1018,7 @@ Section: Comparing strings
 fn eq_slice_(a: &str, b: &str) -> bool {
    // NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
    #[allow(improper_ctypes)]
-    extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
+    extern { fn memcmp(s1: *const i8, s2: *const i8, n: uint) -> i32; }
    a.len() == b.len() && unsafe {
        memcmp(a.as_ptr() as *const i8,
               b.as_ptr() as *const i8,
@ -1051,7 +1075,7 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
        // ASCII characters are always valid, so only large
        // bytes need more examination.
        if first >= 128 {
-            let w = UTF8_CHAR_WIDTH[first as usize] as usize;
+            let w = UTF8_CHAR_WIDTH[first as uint] as uint;
            let second = next!();
            // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
            //        first  C2 80        last DF BF
@ -1126,7 +1150,7 @@ pub struct CharRange {
    /// Current `char`
    pub ch: char,
    /// Index of the first byte of the next `char`
-    pub next: usize,
+    pub next: uint,
 }

 /// Mask of the value bits of a continuation byte
@ -1211,10 +1235,10 @@ mod traits {
    /// // &s[3 .. 100];
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
-    impl ops::Index<ops::Range<usize>> for str {
+    impl ops::Index<ops::Range<uint>> for str {
        type Output = str;
        #[inline]
-        fn index(&self, index: &ops::Range<usize>) -> &str {
+        fn index(&self, index: &ops::Range<uint>) -> &str {
            // is_char_boundary checks that the index is in [0, .len()]
            if index.start <= index.end &&
               self.is_char_boundary(index.start) &&
@ -1234,10 +1258,10 @@ mod traits {
    /// Panics when `end` does not point to a valid character, or is
    /// out of bounds.
    #[stable(feature = "rust1", since = "1.0.0")]
-    impl ops::Index<ops::RangeTo<usize>> for str {
+    impl ops::Index<ops::RangeTo<uint>> for str {
        type Output = str;
        #[inline]
-        fn index(&self, index: &ops::RangeTo<usize>) -> &str {
+        fn index(&self, index: &ops::RangeTo<uint>) -> &str {
            // is_char_boundary checks that the index is in [0, .len()]
            if self.is_char_boundary(index.end) {
                unsafe { self.slice_unchecked(0, index.end) }
@ -1254,10 +1278,10 @@ mod traits {
    /// Panics when `begin` does not point to a valid character, or is
    /// out of bounds.
    #[stable(feature = "rust1", since = "1.0.0")]
-    impl ops::Index<ops::RangeFrom<usize>> for str {
+    impl ops::Index<ops::RangeFrom<uint>> for str {
        type Output = str;
        #[inline]
-        fn index(&self, index: &ops::RangeFrom<usize>) -> &str {
+        fn index(&self, index: &ops::RangeFrom<uint>) -> &str {
            // is_char_boundary checks that the index is in [0, .len()]
            if self.is_char_boundary(index.start) {
                unsafe { self.slice_unchecked(index.start, self.len()) }
@ -1328,46 +1352,49 @@ pub trait StrExt {
    // NB there are no docs here are they're all located on the StrExt trait in
    // libcollections, not here.

-    fn contains(&self, pat: &str) -> bool;
-    fn contains_char<P: CharEq>(&self, pat: P) -> bool;
+    fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
+    fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
    fn chars<'a>(&'a self) -> Chars<'a>;
    fn bytes<'a>(&'a self) -> Bytes<'a>;
    fn char_indices<'a>(&'a self) -> CharIndices<'a>;
    fn split<'a, P: CharEq>(&'a self, pat: P) -> Split<'a, P>;
-    fn splitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
+    fn splitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> SplitN<'a, P>;
    fn split_terminator<'a, P: CharEq>(&'a self, pat: P) -> SplitTerminator<'a, P>;
-    fn rsplitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>;
+    fn rsplitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> RSplitN<'a, P>;
    fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a>;
    fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a>;
    fn lines<'a>(&'a self) -> Lines<'a>;
    fn lines_any<'a>(&'a self) -> LinesAny<'a>;
-    fn char_len(&self) -> usize;
-    fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str;
-    unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str;
+    fn char_len(&self) -> uint;
+    fn slice_chars<'a>(&'a self, begin: uint, end: uint) -> &'a str;
+    unsafe fn slice_unchecked<'a>(&'a self, begin: uint, end: uint) -> &'a str;
    fn starts_with(&self, pat: &str) -> bool;
    fn ends_with(&self, pat: &str) -> bool;
-    fn trim_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
-    fn trim_left_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
-    fn trim_right_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
-    fn is_char_boundary(&self, index: usize) -> bool;
-    fn char_range_at(&self, start: usize) -> CharRange;
-    fn char_range_at_reverse(&self, start: usize) -> CharRange;
-    fn char_at(&self, i: usize) -> char;
-    fn char_at_reverse(&self, i: usize) -> char;
+    fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+        where P::Matcher: DoubleEndedMatcher<'a>;
+    fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
+    fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+        where P::Matcher: ReverseMatcher<'a>;
+    fn is_char_boundary(&self, index: uint) -> bool;
+    fn char_range_at(&self, start: uint) -> CharRange;
+    fn char_range_at_reverse(&self, start: uint) -> CharRange;
+    fn char_at(&self, i: uint) -> char;
+    fn char_at_reverse(&self, i: uint) -> char;
    fn as_bytes<'a>(&'a self) -> &'a [u8];
-    fn find<P: CharEq>(&self, pat: P) -> Option<usize>;
-    fn rfind<P: CharEq>(&self, pat: P) -> Option<usize>;
-    fn find_str(&self, pat: &str) -> Option<usize>;
+    fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>;
+    fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
+        where P::Matcher: ReverseMatcher<'a>;
+    fn find_str(&self, pat: &str) -> Option<uint>;
    fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
-    fn subslice_offset(&self, inner: &str) -> usize;
+    fn subslice_offset(&self, inner: &str) -> uint;
    fn as_ptr(&self) -> *const u8;
-    fn len(&self) -> usize;
+    fn len(&self) -> uint;
    fn is_empty(&self) -> bool;
    fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
 }

 #[inline(never)]
-fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
+fn slice_error_fail(s: &str, begin: uint, end: uint) -> ! {
    assert!(begin <= end);
    panic!("index {} and/or {} in `{}` do not lie on character boundary",
          begin, end, s);
@ -1375,13 +1402,13 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {

 impl StrExt for str {
    #[inline]
-    fn contains(&self, needle: &str) -> bool {
-        self.find_str(needle).is_some()
+    fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
+        pat.is_contained_in(self)
    }

    #[inline]
-    fn contains_char<P: CharEq>(&self, pat: P) -> bool {
-        self.find(pat).is_some()
+    fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
+        pat.is_contained_in(self)
    }

    #[inline]
@ -1411,7 +1438,7 @@ impl StrExt for str {
    }

    #[inline]
-    fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
+    fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
        SplitN(CharSplitsN {
            iter: self.split(pat).0,
            count: count,
@ -1428,7 +1455,7 @@ impl StrExt for str {
    }

    #[inline]
-    fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
+    fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
        RSplitN(CharSplitsN {
            iter: self.split(pat).0,
            count: count,
@ -1438,7 +1465,6 @@ impl StrExt for str {

    #[inline]
    fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a> {
-        assert!(!sep.is_empty());
        MatchIndices {
            haystack: self,
            needle: sep,
@ -1472,9 +1498,9 @@ impl StrExt for str {
    }

    #[inline]
-    fn char_len(&self) -> usize { self.chars().count() }
+    fn char_len(&self) -> uint { self.chars().count() }

-    fn slice_chars(&self, begin: usize, end: usize) -> &str {
+    fn slice_chars(&self, begin: uint, end: uint) -> &str {
        assert!(begin <= end);
        let mut count = 0;
        let mut begin_byte = None;
@ -1498,9 +1524,9 @@ impl StrExt for str {
    }

    #[inline]
-    unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
+    unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
        mem::transmute(Slice {
-            data: self.as_ptr().offset(begin as isize),
+            data: self.as_ptr().offset(begin as int),
            len: end - begin,
        })
    }
@ -1518,41 +1544,71 @@ impl StrExt for str {
    }

    #[inline]
-    fn trim_matches<P: CharEq>(&self, mut pat: P) -> &str {
-        let cur = match self.find(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(i) => unsafe { self.slice_unchecked(i, self.len()) }
-        };
-        match cur.rfind(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(i) => {
-                let right = cur.char_range_at(i).next;
-                unsafe { cur.slice_unchecked(0, right) }
+    fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+    where P::Matcher: DoubleEndedMatcher<'a> {
+        let mut i = 0;
+        let mut matcher = pat.into_matcher(self);
+        let mut possible_end_match = None;
+        while let Some((a, b)) = Matcher::next(&mut matcher) {
+            if a == i {
+                i = b;
+            } else {
+                possible_end_match = Some((a, b));
+                break;
            }
        }
-    }
-
-    #[inline]
-    fn trim_left_matches<P: CharEq>(&self, mut pat: P) -> &str {
-        match self.find(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(first) => unsafe { self.slice_unchecked(first, self.len()) }
-        }
-    }
-
-    #[inline]
-    fn trim_right_matches<P: CharEq>(&self, mut pat: P) -> &str {
-        match self.rfind(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(last) => {
-                let next = self.char_range_at(last).next;
-                unsafe { self.slice_unchecked(0, next) }
+        let mut j = self.len();
+        while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher)
+                .or_else(|| possible_end_match.take()) {
+            if b == j {
+                j = a;
+            } else {
+                break;
            }
        }
+        unsafe {
+            // Matcher is known to return valid indices
+            self.slice_unchecked(i, j)
+        }
    }

    #[inline]
-    fn is_char_boundary(&self, index: usize) -> bool {
+    fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str {
+        let mut i = 0;
+        let mut matcher = pat.into_matcher(self);
+        while let Some((a, b)) = Matcher::next(&mut matcher) {
+            if a == i {
+                i = b;
+            } else {
+                break;
+            }
+        }
+        unsafe {
+            // Matcher is known to return valid indices
+            self.slice_unchecked(i, self.len())
+        }
+    }
+
+    #[inline]
+    fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str
+    where P::Matcher: ReverseMatcher<'a> {
+        let mut i = self.len();
+        let mut matcher = pat.into_matcher(self);
+        while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher) {
+            if b == i {
+                i = a;
+            } else {
+                break;
+            }
+        }
+        unsafe {
+            // Matcher is known to return valid indices
+            self.slice_unchecked(0, i)
+        }
+    }
+
+    #[inline]
+    fn is_char_boundary(&self, index: uint) -> bool {
        if index == self.len() { return true; }
        match self.as_bytes().get(index) {
            None => false,
@ -1561,13 +1617,13 @@ impl StrExt for str {
    }

    #[inline]
-    fn char_range_at(&self, i: usize) -> CharRange {
+    fn char_range_at(&self, i: uint) -> CharRange {
        let (c, n) = char_range_at_raw(self.as_bytes(), i);
        CharRange { ch: unsafe { mem::transmute(c) }, next: n }
    }

    #[inline]
-    fn char_range_at_reverse(&self, start: usize) -> CharRange {
+    fn char_range_at_reverse(&self, start: uint) -> CharRange {
        let mut prev = start;

        prev = prev.saturating_sub(1);
@ -1576,14 +1632,14 @@ impl StrExt for str {
        }

        // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
-        fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
+        fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
            // while there is a previous byte == 10......
            while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
                i -= 1;
            }

            let mut val = s.as_bytes()[i] as u32;
-            let w = UTF8_CHAR_WIDTH[val as usize] as usize;
+            let w = UTF8_CHAR_WIDTH[val as uint] as uint;
            assert!((w != 0));

            val = utf8_first_byte!(val, w);
@ -1598,12 +1654,12 @@ impl StrExt for str {
    }

    #[inline]
-    fn char_at(&self, i: usize) -> char {
+    fn char_at(&self, i: uint) -> char {
        self.char_range_at(i).ch
    }

    #[inline]
-    fn char_at_reverse(&self, i: usize) -> char {
+    fn char_at_reverse(&self, i: uint) -> char {
        self.char_range_at_reverse(i).ch
    }

@ -1612,29 +1668,16 @@ impl StrExt for str {
        unsafe { mem::transmute(self) }
    }

-    fn find<P: CharEq>(&self, mut pat: P) -> Option<usize> {
-        if pat.only_ascii() {
-            self.bytes().position(|b| pat.matches(b as char))
-        } else {
-            for (index, c) in self.char_indices() {
-                if pat.matches(c) { return Some(index); }
-            }
-            None
-        }
+    fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint> {
+        Matcher::next(&mut pat.into_matcher(self)).map(|(i, _)| i)
    }

-    fn rfind<P: CharEq>(&self, mut pat: P) -> Option<usize> {
-        if pat.only_ascii() {
-            self.bytes().rposition(|b| pat.matches(b as char))
-        } else {
-            for (index, c) in self.char_indices().rev() {
-                if pat.matches(c) { return Some(index); }
-            }
-            None
-        }
+    fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
+    where P::Matcher: ReverseMatcher<'a> {
+        ReverseMatcher::next_back(&mut pat.into_matcher(self)).map(|(i, _)| i)
    }

-    fn find_str(&self, needle: &str) -> Option<usize> {
+    fn find_str(&self, needle: &str) -> Option<uint> {
        if needle.is_empty() {
            Some(0)
        } else {
@ -1655,10 +1698,10 @@ impl StrExt for str {
        }
    }

-    fn subslice_offset(&self, inner: &str) -> usize {
-        let a_start = self.as_ptr() as usize;
+    fn subslice_offset(&self, inner: &str) -> uint {
+        let a_start = self.as_ptr() as uint;
        let a_end = a_start + self.len();
-        let b_start = inner.as_ptr() as usize;
+        let b_start = inner.as_ptr() as uint;
        let b_end = b_start + inner.len();

        assert!(a_start <= b_start);
@ -1672,7 +1715,7 @@ impl StrExt for str {
    }

    #[inline]
-    fn len(&self) -> usize { self.repr().len }
+    fn len(&self) -> uint { self.repr().len }

    #[inline]
    fn is_empty(&self) -> bool { self.len() == 0 }
@ -1685,15 +1728,15 @@ impl StrExt for str {
 /// index of the next code point.
 #[inline]
 #[unstable(feature = "core")]
-pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
+pub fn char_range_at_raw(bytes: &[u8], i: uint) -> (u32, usize) {
    if bytes[i] < 128u8 {
        return (bytes[i] as u32, i + 1);
    }

    // Multibyte case is a fn to allow char_range_at to inline cleanly
-    fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
+    fn multibyte_char_range_at(bytes: &[u8], i: uint) -> (u32, usize) {
        let mut val = bytes[i] as u32;
-        let w = UTF8_CHAR_WIDTH[val as usize] as usize;
+        let w = UTF8_CHAR_WIDTH[val as uint] as uint;
        assert!((w != 0));

        val = utf8_first_byte!(val, w);
@ -1720,7 +1763,7 @@ impl<'a> Iterator for Lines<'a> {
    #[inline]
    fn next(&mut self) -> Option<&'a str> { self.inner.next() }
    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
+    fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
 }

 #[stable(feature = "rust1", since = "1.0.0")]
@ -1736,7 +1779,7 @@ impl<'a> Iterator for LinesAny<'a> {
    #[inline]
    fn next(&mut self) -> Option<&'a str> { self.inner.next() }
    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
+    fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
 }

 #[stable(feature = "rust1", since = "1.0.0")]
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@ -0,0 +1,113 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(missing_docs)]
+
+use prelude::*;
+use super::CharEq;
+
+// Pattern
+
+pub trait Pattern<'a>: Sized {
+    type Matcher: Matcher<'a>;
+    fn into_matcher(self, haystack: &'a str) -> Self::Matcher;
+
+    #[inline]
+    fn is_contained_in(self, haystack: &'a str) -> bool {
+        Matcher::next(&mut self.into_matcher(haystack)).is_some()
+    }
+}
+
+// Matcher
+
+pub unsafe trait Matcher<'a> {
+    fn haystack(&self) -> &'a str;
+    fn next(&mut self) -> Option<(usize, usize)>;
+}
+
+pub unsafe trait ReverseMatcher<'a>: Matcher<'a> {
+    fn next_back(&mut self) -> Option<(usize, usize)>;
+}
+
+pub trait DoubleEndedMatcher<'a>: ReverseMatcher<'a> {}
+
+// Impl for CharEq
+
+struct CharEqMatcher<'a, C>(C, &'a str, super::CharIndices<'a>);
+
+impl<'a, C: CharEq> Pattern<'a> for C {
+    type Matcher = CharEqMatcher<'a, C>;
+
+    #[inline]
+    fn into_matcher(self, haystack: &'a str) -> CharEqMatcher<'a, C> {
+        CharEqMatcher(self, haystack, haystack.char_indices())
+    }
+}
+
+unsafe impl<'a, C: CharEq> Matcher<'a> for CharEqMatcher<'a, C> {
+    #[inline]
+    fn haystack(&self) -> &'a str {
+        self.1
+    }
+
+    #[inline]
+    fn next(&mut self) -> Option<(usize, usize)> {
+        while let Some((i, c)) = self.2.next() {
+            if self.0.matches(c) {
+                return Some((i, i + c.len_utf8()));
+            }
+        }
+        None
+    }
+}
+
+unsafe impl<'a, C: CharEq> ReverseMatcher<'a> for CharEqMatcher<'a, C> {
+    #[inline]
+    fn next_back(&mut self) -> Option<(usize, usize)> {
+        while let Some((i, c)) = self.2.next_back() {
+            if self.0.matches(c) {
+                return Some((i, i + c.len_utf8()));
+            }
+        }
+        None
+    }
+}
+
+impl<'a, C: CharEq> DoubleEndedMatcher<'a> for CharEqMatcher<'a, C> {}
+
+// Impl for &str
+
+struct StrMatcher<'a>(super::MatchIndices<'a>);
+
+impl<'a> Pattern<'a> for &'a str {
+    type Matcher = StrMatcher<'a>;
+
+    #[inline]
+    fn into_matcher(self, haystack: &'a str) -> StrMatcher<'a> {
+        let mi = super::MatchIndices {
+            haystack: haystack,
+            needle: self,
+            searcher: super::Searcher::new(haystack.as_bytes(), self.as_bytes())
+        };
+        StrMatcher(mi)
+    }
+}
+
+unsafe impl<'a> Matcher<'a> for StrMatcher<'a>  {
+    #[inline]
+    fn haystack(&self) -> &'a str {
+        self.0.haystack
+    }
+
+    #[inline]
+    fn next(&mut self) -> Option<(usize, usize)> {
+        self.0.next()
+    }
+}
--- a/src/libcoretest/str.rs
+++ b/src/libcoretest/str.rs
@ -8,6 +8,13 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

+#[test]
+fn test_empty_match_indices() {
+    let data = "aä中!";
+    let vec: Vec<_> = data.match_indices("").collect();
+    assert_eq!(vec, vec![(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
+}
+
 #[test]
 fn test_bool_from_str() {
    assert_eq!("true".parse().ok(), Some(true));