Added string pattern traits and basic implementantions

This commit is contained in:
Marvin Löbel 2014-12-30 21:54:17 +01:00
parent d68eb3d248
commit 54f0bead81
3 changed files with 296 additions and 133 deletions

View File

@ -36,12 +36,16 @@ use result::Result::{self, Ok, Err};
use slice::{self, SliceExt};
use usize;
pub use self::pattern::{Pattern, Matcher, ReverseMatcher, DoubleEndedMatcher};
mod pattern;
macro_rules! delegate_iter {
(exact $te:ty : $ti:ty) => {
delegate_iter!{$te : $ti}
impl<'a> ExactSizeIterator for $ti {
#[inline]
fn len(&self) -> usize {
fn len(&self) -> uint {
self.0.len()
}
}
@ -56,7 +60,7 @@ macro_rules! delegate_iter {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
fn size_hint(&self) -> (uint, Option<uint>) {
self.0.size_hint()
}
}
@ -78,7 +82,7 @@ macro_rules! delegate_iter {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
fn size_hint(&self) -> (uint, Option<uint>) {
self.0.size_hint()
}
}
@ -100,7 +104,7 @@ macro_rules! delegate_iter {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
fn size_hint(&self) -> (uint, Option<uint>) {
self.0.size_hint()
}
}
@ -149,6 +153,7 @@ impl FromStr for bool {
/// An error returned when parsing a `bool` from a string fails.
#[derive(Debug, Clone, PartialEq)]
#[allow(missing_copy_implementations)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct ParseBoolError { _priv: () }
@ -178,7 +183,7 @@ pub enum Utf8Error {
/// The offset is guaranteed to be in bounds of the slice in question, and
/// the byte at the specified offset was the first invalid byte in the
/// sequence detected.
InvalidByte(usize),
InvalidByte(uint),
/// The byte slice was invalid because more bytes were needed but no more
/// bytes were available.
@ -227,7 +232,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
pub unsafe fn from_c_str(s: *const i8) -> &'static str {
let s = s as *const u8;
let mut len = 0;
while *s.offset(len as isize) != 0 {
while *s.offset(len as int) != 0 {
len += 1;
}
let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
@ -250,7 +255,7 @@ impl CharEq for char {
fn matches(&mut self, c: char) -> bool { *self == c }
#[inline]
fn only_ascii(&self) -> bool { (*self as u32) < 128 }
fn only_ascii(&self) -> bool { (*self as uint) < 128 }
}
impl<F> CharEq for F where F: FnMut(char) -> bool {
@ -383,7 +388,7 @@ impl<'a> Iterator for Chars<'a> {
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
fn size_hint(&self) -> (uint, Option<uint>) {
let (len, _) = self.iter.size_hint();
(len.saturating_add(3) / 4, Some(len))
}
@ -428,16 +433,16 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct CharIndices<'a> {
front_offset: usize,
front_offset: uint,
iter: Chars<'a>,
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for CharIndices<'a> {
type Item = (usize, char);
type Item = (uint, char);
#[inline]
fn next(&mut self) -> Option<(usize, char)> {
fn next(&mut self) -> Option<(uint, char)> {
let (pre_len, _) = self.iter.iter.size_hint();
match self.iter.next() {
None => None,
@ -451,7 +456,7 @@ impl<'a> Iterator for CharIndices<'a> {
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
fn size_hint(&self) -> (uint, Option<uint>) {
self.iter.size_hint()
}
}
@ -459,7 +464,7 @@ impl<'a> Iterator for CharIndices<'a> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> DoubleEndedIterator for CharIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<(usize, char)> {
fn next_back(&mut self) -> Option<(uint, char)> {
match self.iter.next_back() {
None => None,
Some(ch) => {
@ -512,7 +517,7 @@ struct CharSplits<'a, Sep> {
struct CharSplitsN<'a, Sep> {
iter: CharSplits<'a, Sep>,
/// The number of splits remaining
count: usize,
count: uint,
invert: bool,
}
@ -636,7 +641,7 @@ impl<'a, Sep: CharEq> Iterator for CharSplitsN<'a, Sep> {
/// within a larger string using naive search
#[derive(Clone)]
struct NaiveSearcher {
position: usize
position: uint
}
impl NaiveSearcher {
@ -644,7 +649,7 @@ impl NaiveSearcher {
NaiveSearcher { position: 0 }
}
fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(usize, usize)> {
fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(uint, uint)> {
while self.position + needle.len() <= haystack.len() {
if &haystack[self.position .. self.position + needle.len()] == needle {
let match_pos = self.position;
@ -663,13 +668,13 @@ impl NaiveSearcher {
#[derive(Clone)]
struct TwoWaySearcher {
// constants
crit_pos: usize,
period: usize,
crit_pos: uint,
period: uint,
byteset: u64,
// variables
position: usize,
memory: usize
position: uint,
memory: uint
}
/*
@ -756,7 +761,7 @@ impl TwoWaySearcher {
// This isn't in the original algorithm, as far as I'm aware.
let byteset = needle.iter()
.fold(0, |a, &b| (1 << ((b & 0x3f) as usize)) | a);
.fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a);
// A particularly readable explanation of what's going on here can be found
// in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
@ -794,8 +799,7 @@ impl TwoWaySearcher {
// How far we can jump when we encounter a mismatch is all based on the fact
// that (u, v) is a critical factorization for the needle.
#[inline]
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
-> Option<(usize, usize)> {
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(uint, uint)> {
'search: loop {
// Check that we have room to search in
if self.position + needle.len() > haystack.len() {
@ -805,7 +809,7 @@ impl TwoWaySearcher {
// Quickly skip by large portions unrelated to our substring
if (self.byteset >>
((haystack[self.position + needle.len() - 1] & 0x3f)
as usize)) & 1 == 0 {
as uint)) & 1 == 0 {
self.position += needle.len();
if !long_period {
self.memory = 0;
@ -852,7 +856,7 @@ impl TwoWaySearcher {
// Specifically, returns (i, p), where i is the starting index of v in some
// critical factorization (u, v) and p = period(v)
#[inline]
fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) {
fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) {
let mut left = -1; // Corresponds to i in the paper
let mut right = 0; // Corresponds to j in the paper
let mut offset = 1; // Corresponds to k in the paper
@ -897,6 +901,7 @@ impl TwoWaySearcher {
/// within a larger string using a dynamically chosen search algorithm
#[derive(Clone)]
enum Searcher {
EmptyNeedle { pos: usize, done: bool },
Naive(NaiveSearcher),
TwoWay(TwoWaySearcher),
TwoWayLong(TwoWaySearcher)
@ -904,11 +909,16 @@ enum Searcher {
impl Searcher {
fn new(haystack: &[u8], needle: &[u8]) -> Searcher {
if needle.len() == 0 {
Searcher::EmptyNeedle {
pos: 0,
done: false
}
// FIXME: Tune this.
// FIXME(#16715): This unsigned integer addition will probably not
// overflow because that would mean that the memory almost solely
// consists of the needle. Needs #16715 to be formally fixed.
if needle.len() + 20 > haystack.len() {
} else if needle.len() + 20 > haystack.len() {
Naive(NaiveSearcher::new())
} else {
let searcher = TwoWaySearcher::new(needle);
@ -938,23 +948,37 @@ pub struct MatchIndices<'a> {
#[unstable(feature = "core", reason = "type may be removed")]
pub struct SplitStr<'a> {
it: MatchIndices<'a>,
last_end: usize,
last_end: uint,
finished: bool
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for MatchIndices<'a> {
type Item = (usize, usize);
type Item = (uint, uint);
#[inline]
fn next(&mut self) -> Option<(usize, usize)> {
fn next(&mut self) -> Option<(uint, uint)> {
match self.searcher {
Naive(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes()),
TwoWay(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
TwoWayLong(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true),
Searcher::EmptyNeedle { ref mut pos, ref mut done } => {
if !*done {
let r = Some((*pos, *pos));
if *pos == self.haystack.len() {
*done = true;
} else {
use char::CharExt;
*pos += self.haystack.char_at(*pos).len_utf8();
}
r
} else {
None
}
}
}
}
}
@ -994,7 +1018,7 @@ Section: Comparing strings
fn eq_slice_(a: &str, b: &str) -> bool {
// NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
#[allow(improper_ctypes)]
extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
extern { fn memcmp(s1: *const i8, s2: *const i8, n: uint) -> i32; }
a.len() == b.len() && unsafe {
memcmp(a.as_ptr() as *const i8,
b.as_ptr() as *const i8,
@ -1051,7 +1075,7 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
// ASCII characters are always valid, so only large
// bytes need more examination.
if first >= 128 {
let w = UTF8_CHAR_WIDTH[first as usize] as usize;
let w = UTF8_CHAR_WIDTH[first as uint] as uint;
let second = next!();
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
// first C2 80 last DF BF
@ -1126,7 +1150,7 @@ pub struct CharRange {
/// Current `char`
pub ch: char,
/// Index of the first byte of the next `char`
pub next: usize,
pub next: uint,
}
/// Mask of the value bits of a continuation byte
@ -1211,10 +1235,10 @@ mod traits {
/// // &s[3 .. 100];
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
impl ops::Index<ops::Range<usize>> for str {
impl ops::Index<ops::Range<uint>> for str {
type Output = str;
#[inline]
fn index(&self, index: &ops::Range<usize>) -> &str {
fn index(&self, index: &ops::Range<uint>) -> &str {
// is_char_boundary checks that the index is in [0, .len()]
if index.start <= index.end &&
self.is_char_boundary(index.start) &&
@ -1234,10 +1258,10 @@ mod traits {
/// Panics when `end` does not point to a valid character, or is
/// out of bounds.
#[stable(feature = "rust1", since = "1.0.0")]
impl ops::Index<ops::RangeTo<usize>> for str {
impl ops::Index<ops::RangeTo<uint>> for str {
type Output = str;
#[inline]
fn index(&self, index: &ops::RangeTo<usize>) -> &str {
fn index(&self, index: &ops::RangeTo<uint>) -> &str {
// is_char_boundary checks that the index is in [0, .len()]
if self.is_char_boundary(index.end) {
unsafe { self.slice_unchecked(0, index.end) }
@ -1254,10 +1278,10 @@ mod traits {
/// Panics when `begin` does not point to a valid character, or is
/// out of bounds.
#[stable(feature = "rust1", since = "1.0.0")]
impl ops::Index<ops::RangeFrom<usize>> for str {
impl ops::Index<ops::RangeFrom<uint>> for str {
type Output = str;
#[inline]
fn index(&self, index: &ops::RangeFrom<usize>) -> &str {
fn index(&self, index: &ops::RangeFrom<uint>) -> &str {
// is_char_boundary checks that the index is in [0, .len()]
if self.is_char_boundary(index.start) {
unsafe { self.slice_unchecked(index.start, self.len()) }
@ -1328,46 +1352,49 @@ pub trait StrExt {
// NB there are no docs here are they're all located on the StrExt trait in
// libcollections, not here.
fn contains(&self, pat: &str) -> bool;
fn contains_char<P: CharEq>(&self, pat: P) -> bool;
fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
fn chars<'a>(&'a self) -> Chars<'a>;
fn bytes<'a>(&'a self) -> Bytes<'a>;
fn char_indices<'a>(&'a self) -> CharIndices<'a>;
fn split<'a, P: CharEq>(&'a self, pat: P) -> Split<'a, P>;
fn splitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
fn splitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> SplitN<'a, P>;
fn split_terminator<'a, P: CharEq>(&'a self, pat: P) -> SplitTerminator<'a, P>;
fn rsplitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>;
fn rsplitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> RSplitN<'a, P>;
fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a>;
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a>;
fn lines<'a>(&'a self) -> Lines<'a>;
fn lines_any<'a>(&'a self) -> LinesAny<'a>;
fn char_len(&self) -> usize;
fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str;
unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str;
fn char_len(&self) -> uint;
fn slice_chars<'a>(&'a self, begin: uint, end: uint) -> &'a str;
unsafe fn slice_unchecked<'a>(&'a self, begin: uint, end: uint) -> &'a str;
fn starts_with(&self, pat: &str) -> bool;
fn ends_with(&self, pat: &str) -> bool;
fn trim_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
fn trim_left_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
fn trim_right_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
fn is_char_boundary(&self, index: usize) -> bool;
fn char_range_at(&self, start: usize) -> CharRange;
fn char_range_at_reverse(&self, start: usize) -> CharRange;
fn char_at(&self, i: usize) -> char;
fn char_at_reverse(&self, i: usize) -> char;
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Matcher: DoubleEndedMatcher<'a>;
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Matcher: ReverseMatcher<'a>;
fn is_char_boundary(&self, index: uint) -> bool;
fn char_range_at(&self, start: uint) -> CharRange;
fn char_range_at_reverse(&self, start: uint) -> CharRange;
fn char_at(&self, i: uint) -> char;
fn char_at_reverse(&self, i: uint) -> char;
fn as_bytes<'a>(&'a self) -> &'a [u8];
fn find<P: CharEq>(&self, pat: P) -> Option<usize>;
fn rfind<P: CharEq>(&self, pat: P) -> Option<usize>;
fn find_str(&self, pat: &str) -> Option<usize>;
fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>;
fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
where P::Matcher: ReverseMatcher<'a>;
fn find_str(&self, pat: &str) -> Option<uint>;
fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
fn subslice_offset(&self, inner: &str) -> usize;
fn subslice_offset(&self, inner: &str) -> uint;
fn as_ptr(&self) -> *const u8;
fn len(&self) -> usize;
fn len(&self) -> uint;
fn is_empty(&self) -> bool;
fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
}
#[inline(never)]
fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
fn slice_error_fail(s: &str, begin: uint, end: uint) -> ! {
assert!(begin <= end);
panic!("index {} and/or {} in `{}` do not lie on character boundary",
begin, end, s);
@ -1375,13 +1402,13 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
impl StrExt for str {
#[inline]
fn contains(&self, needle: &str) -> bool {
self.find_str(needle).is_some()
fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
pat.is_contained_in(self)
}
#[inline]
fn contains_char<P: CharEq>(&self, pat: P) -> bool {
self.find(pat).is_some()
fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
pat.is_contained_in(self)
}
#[inline]
@ -1411,7 +1438,7 @@ impl StrExt for str {
}
#[inline]
fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
SplitN(CharSplitsN {
iter: self.split(pat).0,
count: count,
@ -1428,7 +1455,7 @@ impl StrExt for str {
}
#[inline]
fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
RSplitN(CharSplitsN {
iter: self.split(pat).0,
count: count,
@ -1438,7 +1465,6 @@ impl StrExt for str {
#[inline]
fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a> {
assert!(!sep.is_empty());
MatchIndices {
haystack: self,
needle: sep,
@ -1472,9 +1498,9 @@ impl StrExt for str {
}
#[inline]
fn char_len(&self) -> usize { self.chars().count() }
fn char_len(&self) -> uint { self.chars().count() }
fn slice_chars(&self, begin: usize, end: usize) -> &str {
fn slice_chars(&self, begin: uint, end: uint) -> &str {
assert!(begin <= end);
let mut count = 0;
let mut begin_byte = None;
@ -1498,9 +1524,9 @@ impl StrExt for str {
}
#[inline]
unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
mem::transmute(Slice {
data: self.as_ptr().offset(begin as isize),
data: self.as_ptr().offset(begin as int),
len: end - begin,
})
}
@ -1518,41 +1544,71 @@ impl StrExt for str {
}
#[inline]
fn trim_matches<P: CharEq>(&self, mut pat: P) -> &str {
let cur = match self.find(|c: char| !pat.matches(c)) {
None => "",
Some(i) => unsafe { self.slice_unchecked(i, self.len()) }
};
match cur.rfind(|c: char| !pat.matches(c)) {
None => "",
Some(i) => {
let right = cur.char_range_at(i).next;
unsafe { cur.slice_unchecked(0, right) }
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Matcher: DoubleEndedMatcher<'a> {
let mut i = 0;
let mut matcher = pat.into_matcher(self);
let mut possible_end_match = None;
while let Some((a, b)) = Matcher::next(&mut matcher) {
if a == i {
i = b;
} else {
possible_end_match = Some((a, b));
break;
}
}
}
#[inline]
fn trim_left_matches<P: CharEq>(&self, mut pat: P) -> &str {
match self.find(|c: char| !pat.matches(c)) {
None => "",
Some(first) => unsafe { self.slice_unchecked(first, self.len()) }
}
}
#[inline]
fn trim_right_matches<P: CharEq>(&self, mut pat: P) -> &str {
match self.rfind(|c: char| !pat.matches(c)) {
None => "",
Some(last) => {
let next = self.char_range_at(last).next;
unsafe { self.slice_unchecked(0, next) }
let mut j = self.len();
while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher)
.or_else(|| possible_end_match.take()) {
if b == j {
j = a;
} else {
break;
}
}
unsafe {
// Matcher is known to return valid indices
self.slice_unchecked(i, j)
}
}
#[inline]
fn is_char_boundary(&self, index: usize) -> bool {
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str {
let mut i = 0;
let mut matcher = pat.into_matcher(self);
while let Some((a, b)) = Matcher::next(&mut matcher) {
if a == i {
i = b;
} else {
break;
}
}
unsafe {
// Matcher is known to return valid indices
self.slice_unchecked(i, self.len())
}
}
#[inline]
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str
where P::Matcher: ReverseMatcher<'a> {
let mut i = self.len();
let mut matcher = pat.into_matcher(self);
while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher) {
if b == i {
i = a;
} else {
break;
}
}
unsafe {
// Matcher is known to return valid indices
self.slice_unchecked(0, i)
}
}
#[inline]
fn is_char_boundary(&self, index: uint) -> bool {
if index == self.len() { return true; }
match self.as_bytes().get(index) {
None => false,
@ -1561,13 +1617,13 @@ impl StrExt for str {
}
#[inline]
fn char_range_at(&self, i: usize) -> CharRange {
fn char_range_at(&self, i: uint) -> CharRange {
let (c, n) = char_range_at_raw(self.as_bytes(), i);
CharRange { ch: unsafe { mem::transmute(c) }, next: n }
}
#[inline]
fn char_range_at_reverse(&self, start: usize) -> CharRange {
fn char_range_at_reverse(&self, start: uint) -> CharRange {
let mut prev = start;
prev = prev.saturating_sub(1);
@ -1576,14 +1632,14 @@ impl StrExt for str {
}
// Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
// while there is a previous byte == 10......
while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
i -= 1;
}
let mut val = s.as_bytes()[i] as u32;
let w = UTF8_CHAR_WIDTH[val as usize] as usize;
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
assert!((w != 0));
val = utf8_first_byte!(val, w);
@ -1598,12 +1654,12 @@ impl StrExt for str {
}
#[inline]
fn char_at(&self, i: usize) -> char {
fn char_at(&self, i: uint) -> char {
self.char_range_at(i).ch
}
#[inline]
fn char_at_reverse(&self, i: usize) -> char {
fn char_at_reverse(&self, i: uint) -> char {
self.char_range_at_reverse(i).ch
}
@ -1612,29 +1668,16 @@ impl StrExt for str {
unsafe { mem::transmute(self) }
}
fn find<P: CharEq>(&self, mut pat: P) -> Option<usize> {
if pat.only_ascii() {
self.bytes().position(|b| pat.matches(b as char))
} else {
for (index, c) in self.char_indices() {
if pat.matches(c) { return Some(index); }
}
None
}
fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint> {
Matcher::next(&mut pat.into_matcher(self)).map(|(i, _)| i)
}
fn rfind<P: CharEq>(&self, mut pat: P) -> Option<usize> {
if pat.only_ascii() {
self.bytes().rposition(|b| pat.matches(b as char))
} else {
for (index, c) in self.char_indices().rev() {
if pat.matches(c) { return Some(index); }
}
None
}
fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
where P::Matcher: ReverseMatcher<'a> {
ReverseMatcher::next_back(&mut pat.into_matcher(self)).map(|(i, _)| i)
}
fn find_str(&self, needle: &str) -> Option<usize> {
fn find_str(&self, needle: &str) -> Option<uint> {
if needle.is_empty() {
Some(0)
} else {
@ -1655,10 +1698,10 @@ impl StrExt for str {
}
}
fn subslice_offset(&self, inner: &str) -> usize {
let a_start = self.as_ptr() as usize;
fn subslice_offset(&self, inner: &str) -> uint {
let a_start = self.as_ptr() as uint;
let a_end = a_start + self.len();
let b_start = inner.as_ptr() as usize;
let b_start = inner.as_ptr() as uint;
let b_end = b_start + inner.len();
assert!(a_start <= b_start);
@ -1672,7 +1715,7 @@ impl StrExt for str {
}
#[inline]
fn len(&self) -> usize { self.repr().len }
fn len(&self) -> uint { self.repr().len }
#[inline]
fn is_empty(&self) -> bool { self.len() == 0 }
@ -1685,15 +1728,15 @@ impl StrExt for str {
/// index of the next code point.
#[inline]
#[unstable(feature = "core")]
pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
pub fn char_range_at_raw(bytes: &[u8], i: uint) -> (u32, usize) {
if bytes[i] < 128u8 {
return (bytes[i] as u32, i + 1);
}
// Multibyte case is a fn to allow char_range_at to inline cleanly
fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
fn multibyte_char_range_at(bytes: &[u8], i: uint) -> (u32, usize) {
let mut val = bytes[i] as u32;
let w = UTF8_CHAR_WIDTH[val as usize] as usize;
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
assert!((w != 0));
val = utf8_first_byte!(val, w);
@ -1720,7 +1763,7 @@ impl<'a> Iterator for Lines<'a> {
#[inline]
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
}
#[stable(feature = "rust1", since = "1.0.0")]
@ -1736,7 +1779,7 @@ impl<'a> Iterator for LinesAny<'a> {
#[inline]
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
}
#[stable(feature = "rust1", since = "1.0.0")]

113
src/libcore/str/pattern.rs Normal file
View File

@ -0,0 +1,113 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![allow(missing_docs)]
use prelude::*;
use super::CharEq;
// Pattern
pub trait Pattern<'a>: Sized {
type Matcher: Matcher<'a>;
fn into_matcher(self, haystack: &'a str) -> Self::Matcher;
#[inline]
fn is_contained_in(self, haystack: &'a str) -> bool {
Matcher::next(&mut self.into_matcher(haystack)).is_some()
}
}
// Matcher
pub unsafe trait Matcher<'a> {
fn haystack(&self) -> &'a str;
fn next(&mut self) -> Option<(usize, usize)>;
}
pub unsafe trait ReverseMatcher<'a>: Matcher<'a> {
fn next_back(&mut self) -> Option<(usize, usize)>;
}
pub trait DoubleEndedMatcher<'a>: ReverseMatcher<'a> {}
// Impl for CharEq
struct CharEqMatcher<'a, C>(C, &'a str, super::CharIndices<'a>);
impl<'a, C: CharEq> Pattern<'a> for C {
type Matcher = CharEqMatcher<'a, C>;
#[inline]
fn into_matcher(self, haystack: &'a str) -> CharEqMatcher<'a, C> {
CharEqMatcher(self, haystack, haystack.char_indices())
}
}
unsafe impl<'a, C: CharEq> Matcher<'a> for CharEqMatcher<'a, C> {
#[inline]
fn haystack(&self) -> &'a str {
self.1
}
#[inline]
fn next(&mut self) -> Option<(usize, usize)> {
while let Some((i, c)) = self.2.next() {
if self.0.matches(c) {
return Some((i, i + c.len_utf8()));
}
}
None
}
}
unsafe impl<'a, C: CharEq> ReverseMatcher<'a> for CharEqMatcher<'a, C> {
#[inline]
fn next_back(&mut self) -> Option<(usize, usize)> {
while let Some((i, c)) = self.2.next_back() {
if self.0.matches(c) {
return Some((i, i + c.len_utf8()));
}
}
None
}
}
impl<'a, C: CharEq> DoubleEndedMatcher<'a> for CharEqMatcher<'a, C> {}
// Impl for &str
struct StrMatcher<'a>(super::MatchIndices<'a>);
impl<'a> Pattern<'a> for &'a str {
type Matcher = StrMatcher<'a>;
#[inline]
fn into_matcher(self, haystack: &'a str) -> StrMatcher<'a> {
let mi = super::MatchIndices {
haystack: haystack,
needle: self,
searcher: super::Searcher::new(haystack.as_bytes(), self.as_bytes())
};
StrMatcher(mi)
}
}
unsafe impl<'a> Matcher<'a> for StrMatcher<'a> {
#[inline]
fn haystack(&self) -> &'a str {
self.0.haystack
}
#[inline]
fn next(&mut self) -> Option<(usize, usize)> {
self.0.next()
}
}

View File

@ -8,6 +8,13 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[test]
fn test_empty_match_indices() {
let data = "aä中!";
let vec: Vec<_> = data.match_indices("").collect();
assert_eq!(vec, vec![(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
}
#[test]
fn test_bool_from_str() {
assert_eq!("true".parse().ok(), Some(true));