Added string pattern traits and basic implementantions
This commit is contained in:
parent
d68eb3d248
commit
54f0bead81
@ -36,12 +36,16 @@ use result::Result::{self, Ok, Err};
|
||||
use slice::{self, SliceExt};
|
||||
use usize;
|
||||
|
||||
pub use self::pattern::{Pattern, Matcher, ReverseMatcher, DoubleEndedMatcher};
|
||||
|
||||
mod pattern;
|
||||
|
||||
macro_rules! delegate_iter {
|
||||
(exact $te:ty : $ti:ty) => {
|
||||
delegate_iter!{$te : $ti}
|
||||
impl<'a> ExactSizeIterator for $ti {
|
||||
#[inline]
|
||||
fn len(&self) -> usize {
|
||||
fn len(&self) -> uint {
|
||||
self.0.len()
|
||||
}
|
||||
}
|
||||
@ -56,7 +60,7 @@ macro_rules! delegate_iter {
|
||||
self.0.next()
|
||||
}
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||
self.0.size_hint()
|
||||
}
|
||||
}
|
||||
@ -78,7 +82,7 @@ macro_rules! delegate_iter {
|
||||
self.0.next()
|
||||
}
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||
self.0.size_hint()
|
||||
}
|
||||
}
|
||||
@ -100,7 +104,7 @@ macro_rules! delegate_iter {
|
||||
self.0.next()
|
||||
}
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||
self.0.size_hint()
|
||||
}
|
||||
}
|
||||
@ -149,6 +153,7 @@ impl FromStr for bool {
|
||||
|
||||
/// An error returned when parsing a `bool` from a string fails.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub struct ParseBoolError { _priv: () }
|
||||
|
||||
@ -178,7 +183,7 @@ pub enum Utf8Error {
|
||||
/// The offset is guaranteed to be in bounds of the slice in question, and
|
||||
/// the byte at the specified offset was the first invalid byte in the
|
||||
/// sequence detected.
|
||||
InvalidByte(usize),
|
||||
InvalidByte(uint),
|
||||
|
||||
/// The byte slice was invalid because more bytes were needed but no more
|
||||
/// bytes were available.
|
||||
@ -227,7 +232,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
|
||||
pub unsafe fn from_c_str(s: *const i8) -> &'static str {
|
||||
let s = s as *const u8;
|
||||
let mut len = 0;
|
||||
while *s.offset(len as isize) != 0 {
|
||||
while *s.offset(len as int) != 0 {
|
||||
len += 1;
|
||||
}
|
||||
let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
|
||||
@ -250,7 +255,7 @@ impl CharEq for char {
|
||||
fn matches(&mut self, c: char) -> bool { *self == c }
|
||||
|
||||
#[inline]
|
||||
fn only_ascii(&self) -> bool { (*self as u32) < 128 }
|
||||
fn only_ascii(&self) -> bool { (*self as uint) < 128 }
|
||||
}
|
||||
|
||||
impl<F> CharEq for F where F: FnMut(char) -> bool {
|
||||
@ -383,7 +388,7 @@ impl<'a> Iterator for Chars<'a> {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||
let (len, _) = self.iter.size_hint();
|
||||
(len.saturating_add(3) / 4, Some(len))
|
||||
}
|
||||
@ -428,16 +433,16 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
|
||||
#[derive(Clone)]
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub struct CharIndices<'a> {
|
||||
front_offset: usize,
|
||||
front_offset: uint,
|
||||
iter: Chars<'a>,
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl<'a> Iterator for CharIndices<'a> {
|
||||
type Item = (usize, char);
|
||||
type Item = (uint, char);
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<(usize, char)> {
|
||||
fn next(&mut self) -> Option<(uint, char)> {
|
||||
let (pre_len, _) = self.iter.iter.size_hint();
|
||||
match self.iter.next() {
|
||||
None => None,
|
||||
@ -451,7 +456,7 @@ impl<'a> Iterator for CharIndices<'a> {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
}
|
||||
@ -459,7 +464,7 @@ impl<'a> Iterator for CharIndices<'a> {
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl<'a> DoubleEndedIterator for CharIndices<'a> {
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<(usize, char)> {
|
||||
fn next_back(&mut self) -> Option<(uint, char)> {
|
||||
match self.iter.next_back() {
|
||||
None => None,
|
||||
Some(ch) => {
|
||||
@ -512,7 +517,7 @@ struct CharSplits<'a, Sep> {
|
||||
struct CharSplitsN<'a, Sep> {
|
||||
iter: CharSplits<'a, Sep>,
|
||||
/// The number of splits remaining
|
||||
count: usize,
|
||||
count: uint,
|
||||
invert: bool,
|
||||
}
|
||||
|
||||
@ -636,7 +641,7 @@ impl<'a, Sep: CharEq> Iterator for CharSplitsN<'a, Sep> {
|
||||
/// within a larger string using naive search
|
||||
#[derive(Clone)]
|
||||
struct NaiveSearcher {
|
||||
position: usize
|
||||
position: uint
|
||||
}
|
||||
|
||||
impl NaiveSearcher {
|
||||
@ -644,7 +649,7 @@ impl NaiveSearcher {
|
||||
NaiveSearcher { position: 0 }
|
||||
}
|
||||
|
||||
fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(usize, usize)> {
|
||||
fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(uint, uint)> {
|
||||
while self.position + needle.len() <= haystack.len() {
|
||||
if &haystack[self.position .. self.position + needle.len()] == needle {
|
||||
let match_pos = self.position;
|
||||
@ -663,13 +668,13 @@ impl NaiveSearcher {
|
||||
#[derive(Clone)]
|
||||
struct TwoWaySearcher {
|
||||
// constants
|
||||
crit_pos: usize,
|
||||
period: usize,
|
||||
crit_pos: uint,
|
||||
period: uint,
|
||||
byteset: u64,
|
||||
|
||||
// variables
|
||||
position: usize,
|
||||
memory: usize
|
||||
position: uint,
|
||||
memory: uint
|
||||
}
|
||||
|
||||
/*
|
||||
@ -756,7 +761,7 @@ impl TwoWaySearcher {
|
||||
|
||||
// This isn't in the original algorithm, as far as I'm aware.
|
||||
let byteset = needle.iter()
|
||||
.fold(0, |a, &b| (1 << ((b & 0x3f) as usize)) | a);
|
||||
.fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a);
|
||||
|
||||
// A particularly readable explanation of what's going on here can be found
|
||||
// in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
|
||||
@ -794,8 +799,7 @@ impl TwoWaySearcher {
|
||||
// How far we can jump when we encounter a mismatch is all based on the fact
|
||||
// that (u, v) is a critical factorization for the needle.
|
||||
#[inline]
|
||||
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
|
||||
-> Option<(usize, usize)> {
|
||||
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(uint, uint)> {
|
||||
'search: loop {
|
||||
// Check that we have room to search in
|
||||
if self.position + needle.len() > haystack.len() {
|
||||
@ -805,7 +809,7 @@ impl TwoWaySearcher {
|
||||
// Quickly skip by large portions unrelated to our substring
|
||||
if (self.byteset >>
|
||||
((haystack[self.position + needle.len() - 1] & 0x3f)
|
||||
as usize)) & 1 == 0 {
|
||||
as uint)) & 1 == 0 {
|
||||
self.position += needle.len();
|
||||
if !long_period {
|
||||
self.memory = 0;
|
||||
@ -852,7 +856,7 @@ impl TwoWaySearcher {
|
||||
// Specifically, returns (i, p), where i is the starting index of v in some
|
||||
// critical factorization (u, v) and p = period(v)
|
||||
#[inline]
|
||||
fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) {
|
||||
fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) {
|
||||
let mut left = -1; // Corresponds to i in the paper
|
||||
let mut right = 0; // Corresponds to j in the paper
|
||||
let mut offset = 1; // Corresponds to k in the paper
|
||||
@ -897,6 +901,7 @@ impl TwoWaySearcher {
|
||||
/// within a larger string using a dynamically chosen search algorithm
|
||||
#[derive(Clone)]
|
||||
enum Searcher {
|
||||
EmptyNeedle { pos: usize, done: bool },
|
||||
Naive(NaiveSearcher),
|
||||
TwoWay(TwoWaySearcher),
|
||||
TwoWayLong(TwoWaySearcher)
|
||||
@ -904,11 +909,16 @@ enum Searcher {
|
||||
|
||||
impl Searcher {
|
||||
fn new(haystack: &[u8], needle: &[u8]) -> Searcher {
|
||||
if needle.len() == 0 {
|
||||
Searcher::EmptyNeedle {
|
||||
pos: 0,
|
||||
done: false
|
||||
}
|
||||
// FIXME: Tune this.
|
||||
// FIXME(#16715): This unsigned integer addition will probably not
|
||||
// overflow because that would mean that the memory almost solely
|
||||
// consists of the needle. Needs #16715 to be formally fixed.
|
||||
if needle.len() + 20 > haystack.len() {
|
||||
} else if needle.len() + 20 > haystack.len() {
|
||||
Naive(NaiveSearcher::new())
|
||||
} else {
|
||||
let searcher = TwoWaySearcher::new(needle);
|
||||
@ -938,23 +948,37 @@ pub struct MatchIndices<'a> {
|
||||
#[unstable(feature = "core", reason = "type may be removed")]
|
||||
pub struct SplitStr<'a> {
|
||||
it: MatchIndices<'a>,
|
||||
last_end: usize,
|
||||
last_end: uint,
|
||||
finished: bool
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl<'a> Iterator for MatchIndices<'a> {
|
||||
type Item = (usize, usize);
|
||||
type Item = (uint, uint);
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<(usize, usize)> {
|
||||
fn next(&mut self) -> Option<(uint, uint)> {
|
||||
match self.searcher {
|
||||
Naive(ref mut searcher)
|
||||
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes()),
|
||||
TwoWay(ref mut searcher)
|
||||
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
|
||||
TwoWayLong(ref mut searcher)
|
||||
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true)
|
||||
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true),
|
||||
Searcher::EmptyNeedle { ref mut pos, ref mut done } => {
|
||||
if !*done {
|
||||
let r = Some((*pos, *pos));
|
||||
if *pos == self.haystack.len() {
|
||||
*done = true;
|
||||
} else {
|
||||
use char::CharExt;
|
||||
*pos += self.haystack.char_at(*pos).len_utf8();
|
||||
}
|
||||
r
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -994,7 +1018,7 @@ Section: Comparing strings
|
||||
fn eq_slice_(a: &str, b: &str) -> bool {
|
||||
// NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
|
||||
#[allow(improper_ctypes)]
|
||||
extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
|
||||
extern { fn memcmp(s1: *const i8, s2: *const i8, n: uint) -> i32; }
|
||||
a.len() == b.len() && unsafe {
|
||||
memcmp(a.as_ptr() as *const i8,
|
||||
b.as_ptr() as *const i8,
|
||||
@ -1051,7 +1075,7 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
|
||||
// ASCII characters are always valid, so only large
|
||||
// bytes need more examination.
|
||||
if first >= 128 {
|
||||
let w = UTF8_CHAR_WIDTH[first as usize] as usize;
|
||||
let w = UTF8_CHAR_WIDTH[first as uint] as uint;
|
||||
let second = next!();
|
||||
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
|
||||
// first C2 80 last DF BF
|
||||
@ -1126,7 +1150,7 @@ pub struct CharRange {
|
||||
/// Current `char`
|
||||
pub ch: char,
|
||||
/// Index of the first byte of the next `char`
|
||||
pub next: usize,
|
||||
pub next: uint,
|
||||
}
|
||||
|
||||
/// Mask of the value bits of a continuation byte
|
||||
@ -1211,10 +1235,10 @@ mod traits {
|
||||
/// // &s[3 .. 100];
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl ops::Index<ops::Range<usize>> for str {
|
||||
impl ops::Index<ops::Range<uint>> for str {
|
||||
type Output = str;
|
||||
#[inline]
|
||||
fn index(&self, index: &ops::Range<usize>) -> &str {
|
||||
fn index(&self, index: &ops::Range<uint>) -> &str {
|
||||
// is_char_boundary checks that the index is in [0, .len()]
|
||||
if index.start <= index.end &&
|
||||
self.is_char_boundary(index.start) &&
|
||||
@ -1234,10 +1258,10 @@ mod traits {
|
||||
/// Panics when `end` does not point to a valid character, or is
|
||||
/// out of bounds.
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl ops::Index<ops::RangeTo<usize>> for str {
|
||||
impl ops::Index<ops::RangeTo<uint>> for str {
|
||||
type Output = str;
|
||||
#[inline]
|
||||
fn index(&self, index: &ops::RangeTo<usize>) -> &str {
|
||||
fn index(&self, index: &ops::RangeTo<uint>) -> &str {
|
||||
// is_char_boundary checks that the index is in [0, .len()]
|
||||
if self.is_char_boundary(index.end) {
|
||||
unsafe { self.slice_unchecked(0, index.end) }
|
||||
@ -1254,10 +1278,10 @@ mod traits {
|
||||
/// Panics when `begin` does not point to a valid character, or is
|
||||
/// out of bounds.
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl ops::Index<ops::RangeFrom<usize>> for str {
|
||||
impl ops::Index<ops::RangeFrom<uint>> for str {
|
||||
type Output = str;
|
||||
#[inline]
|
||||
fn index(&self, index: &ops::RangeFrom<usize>) -> &str {
|
||||
fn index(&self, index: &ops::RangeFrom<uint>) -> &str {
|
||||
// is_char_boundary checks that the index is in [0, .len()]
|
||||
if self.is_char_boundary(index.start) {
|
||||
unsafe { self.slice_unchecked(index.start, self.len()) }
|
||||
@ -1328,46 +1352,49 @@ pub trait StrExt {
|
||||
// NB there are no docs here are they're all located on the StrExt trait in
|
||||
// libcollections, not here.
|
||||
|
||||
fn contains(&self, pat: &str) -> bool;
|
||||
fn contains_char<P: CharEq>(&self, pat: P) -> bool;
|
||||
fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
|
||||
fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
|
||||
fn chars<'a>(&'a self) -> Chars<'a>;
|
||||
fn bytes<'a>(&'a self) -> Bytes<'a>;
|
||||
fn char_indices<'a>(&'a self) -> CharIndices<'a>;
|
||||
fn split<'a, P: CharEq>(&'a self, pat: P) -> Split<'a, P>;
|
||||
fn splitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
|
||||
fn splitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> SplitN<'a, P>;
|
||||
fn split_terminator<'a, P: CharEq>(&'a self, pat: P) -> SplitTerminator<'a, P>;
|
||||
fn rsplitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>;
|
||||
fn rsplitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> RSplitN<'a, P>;
|
||||
fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a>;
|
||||
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a>;
|
||||
fn lines<'a>(&'a self) -> Lines<'a>;
|
||||
fn lines_any<'a>(&'a self) -> LinesAny<'a>;
|
||||
fn char_len(&self) -> usize;
|
||||
fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str;
|
||||
unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str;
|
||||
fn char_len(&self) -> uint;
|
||||
fn slice_chars<'a>(&'a self, begin: uint, end: uint) -> &'a str;
|
||||
unsafe fn slice_unchecked<'a>(&'a self, begin: uint, end: uint) -> &'a str;
|
||||
fn starts_with(&self, pat: &str) -> bool;
|
||||
fn ends_with(&self, pat: &str) -> bool;
|
||||
fn trim_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
|
||||
fn trim_left_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
|
||||
fn trim_right_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
|
||||
fn is_char_boundary(&self, index: usize) -> bool;
|
||||
fn char_range_at(&self, start: usize) -> CharRange;
|
||||
fn char_range_at_reverse(&self, start: usize) -> CharRange;
|
||||
fn char_at(&self, i: usize) -> char;
|
||||
fn char_at_reverse(&self, i: usize) -> char;
|
||||
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
|
||||
where P::Matcher: DoubleEndedMatcher<'a>;
|
||||
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
|
||||
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
|
||||
where P::Matcher: ReverseMatcher<'a>;
|
||||
fn is_char_boundary(&self, index: uint) -> bool;
|
||||
fn char_range_at(&self, start: uint) -> CharRange;
|
||||
fn char_range_at_reverse(&self, start: uint) -> CharRange;
|
||||
fn char_at(&self, i: uint) -> char;
|
||||
fn char_at_reverse(&self, i: uint) -> char;
|
||||
fn as_bytes<'a>(&'a self) -> &'a [u8];
|
||||
fn find<P: CharEq>(&self, pat: P) -> Option<usize>;
|
||||
fn rfind<P: CharEq>(&self, pat: P) -> Option<usize>;
|
||||
fn find_str(&self, pat: &str) -> Option<usize>;
|
||||
fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>;
|
||||
fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
|
||||
where P::Matcher: ReverseMatcher<'a>;
|
||||
fn find_str(&self, pat: &str) -> Option<uint>;
|
||||
fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
|
||||
fn subslice_offset(&self, inner: &str) -> usize;
|
||||
fn subslice_offset(&self, inner: &str) -> uint;
|
||||
fn as_ptr(&self) -> *const u8;
|
||||
fn len(&self) -> usize;
|
||||
fn len(&self) -> uint;
|
||||
fn is_empty(&self) -> bool;
|
||||
fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
|
||||
fn slice_error_fail(s: &str, begin: uint, end: uint) -> ! {
|
||||
assert!(begin <= end);
|
||||
panic!("index {} and/or {} in `{}` do not lie on character boundary",
|
||||
begin, end, s);
|
||||
@ -1375,13 +1402,13 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
|
||||
|
||||
impl StrExt for str {
|
||||
#[inline]
|
||||
fn contains(&self, needle: &str) -> bool {
|
||||
self.find_str(needle).is_some()
|
||||
fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
|
||||
pat.is_contained_in(self)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains_char<P: CharEq>(&self, pat: P) -> bool {
|
||||
self.find(pat).is_some()
|
||||
fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
|
||||
pat.is_contained_in(self)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -1411,7 +1438,7 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
|
||||
fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
|
||||
SplitN(CharSplitsN {
|
||||
iter: self.split(pat).0,
|
||||
count: count,
|
||||
@ -1428,7 +1455,7 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
|
||||
fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
|
||||
RSplitN(CharSplitsN {
|
||||
iter: self.split(pat).0,
|
||||
count: count,
|
||||
@ -1438,7 +1465,6 @@ impl StrExt for str {
|
||||
|
||||
#[inline]
|
||||
fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a> {
|
||||
assert!(!sep.is_empty());
|
||||
MatchIndices {
|
||||
haystack: self,
|
||||
needle: sep,
|
||||
@ -1472,9 +1498,9 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn char_len(&self) -> usize { self.chars().count() }
|
||||
fn char_len(&self) -> uint { self.chars().count() }
|
||||
|
||||
fn slice_chars(&self, begin: usize, end: usize) -> &str {
|
||||
fn slice_chars(&self, begin: uint, end: uint) -> &str {
|
||||
assert!(begin <= end);
|
||||
let mut count = 0;
|
||||
let mut begin_byte = None;
|
||||
@ -1498,9 +1524,9 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
|
||||
unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
|
||||
mem::transmute(Slice {
|
||||
data: self.as_ptr().offset(begin as isize),
|
||||
data: self.as_ptr().offset(begin as int),
|
||||
len: end - begin,
|
||||
})
|
||||
}
|
||||
@ -1518,41 +1544,71 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim_matches<P: CharEq>(&self, mut pat: P) -> &str {
|
||||
let cur = match self.find(|c: char| !pat.matches(c)) {
|
||||
None => "",
|
||||
Some(i) => unsafe { self.slice_unchecked(i, self.len()) }
|
||||
};
|
||||
match cur.rfind(|c: char| !pat.matches(c)) {
|
||||
None => "",
|
||||
Some(i) => {
|
||||
let right = cur.char_range_at(i).next;
|
||||
unsafe { cur.slice_unchecked(0, right) }
|
||||
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
|
||||
where P::Matcher: DoubleEndedMatcher<'a> {
|
||||
let mut i = 0;
|
||||
let mut matcher = pat.into_matcher(self);
|
||||
let mut possible_end_match = None;
|
||||
while let Some((a, b)) = Matcher::next(&mut matcher) {
|
||||
if a == i {
|
||||
i = b;
|
||||
} else {
|
||||
possible_end_match = Some((a, b));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim_left_matches<P: CharEq>(&self, mut pat: P) -> &str {
|
||||
match self.find(|c: char| !pat.matches(c)) {
|
||||
None => "",
|
||||
Some(first) => unsafe { self.slice_unchecked(first, self.len()) }
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim_right_matches<P: CharEq>(&self, mut pat: P) -> &str {
|
||||
match self.rfind(|c: char| !pat.matches(c)) {
|
||||
None => "",
|
||||
Some(last) => {
|
||||
let next = self.char_range_at(last).next;
|
||||
unsafe { self.slice_unchecked(0, next) }
|
||||
let mut j = self.len();
|
||||
while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher)
|
||||
.or_else(|| possible_end_match.take()) {
|
||||
if b == j {
|
||||
j = a;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
// Matcher is known to return valid indices
|
||||
self.slice_unchecked(i, j)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_char_boundary(&self, index: usize) -> bool {
|
||||
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str {
|
||||
let mut i = 0;
|
||||
let mut matcher = pat.into_matcher(self);
|
||||
while let Some((a, b)) = Matcher::next(&mut matcher) {
|
||||
if a == i {
|
||||
i = b;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
// Matcher is known to return valid indices
|
||||
self.slice_unchecked(i, self.len())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str
|
||||
where P::Matcher: ReverseMatcher<'a> {
|
||||
let mut i = self.len();
|
||||
let mut matcher = pat.into_matcher(self);
|
||||
while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher) {
|
||||
if b == i {
|
||||
i = a;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
// Matcher is known to return valid indices
|
||||
self.slice_unchecked(0, i)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_char_boundary(&self, index: uint) -> bool {
|
||||
if index == self.len() { return true; }
|
||||
match self.as_bytes().get(index) {
|
||||
None => false,
|
||||
@ -1561,13 +1617,13 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn char_range_at(&self, i: usize) -> CharRange {
|
||||
fn char_range_at(&self, i: uint) -> CharRange {
|
||||
let (c, n) = char_range_at_raw(self.as_bytes(), i);
|
||||
CharRange { ch: unsafe { mem::transmute(c) }, next: n }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn char_range_at_reverse(&self, start: usize) -> CharRange {
|
||||
fn char_range_at_reverse(&self, start: uint) -> CharRange {
|
||||
let mut prev = start;
|
||||
|
||||
prev = prev.saturating_sub(1);
|
||||
@ -1576,14 +1632,14 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
// Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
|
||||
fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
|
||||
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
|
||||
// while there is a previous byte == 10......
|
||||
while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
|
||||
i -= 1;
|
||||
}
|
||||
|
||||
let mut val = s.as_bytes()[i] as u32;
|
||||
let w = UTF8_CHAR_WIDTH[val as usize] as usize;
|
||||
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
|
||||
assert!((w != 0));
|
||||
|
||||
val = utf8_first_byte!(val, w);
|
||||
@ -1598,12 +1654,12 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn char_at(&self, i: usize) -> char {
|
||||
fn char_at(&self, i: uint) -> char {
|
||||
self.char_range_at(i).ch
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn char_at_reverse(&self, i: usize) -> char {
|
||||
fn char_at_reverse(&self, i: uint) -> char {
|
||||
self.char_range_at_reverse(i).ch
|
||||
}
|
||||
|
||||
@ -1612,29 +1668,16 @@ impl StrExt for str {
|
||||
unsafe { mem::transmute(self) }
|
||||
}
|
||||
|
||||
fn find<P: CharEq>(&self, mut pat: P) -> Option<usize> {
|
||||
if pat.only_ascii() {
|
||||
self.bytes().position(|b| pat.matches(b as char))
|
||||
} else {
|
||||
for (index, c) in self.char_indices() {
|
||||
if pat.matches(c) { return Some(index); }
|
||||
}
|
||||
None
|
||||
}
|
||||
fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint> {
|
||||
Matcher::next(&mut pat.into_matcher(self)).map(|(i, _)| i)
|
||||
}
|
||||
|
||||
fn rfind<P: CharEq>(&self, mut pat: P) -> Option<usize> {
|
||||
if pat.only_ascii() {
|
||||
self.bytes().rposition(|b| pat.matches(b as char))
|
||||
} else {
|
||||
for (index, c) in self.char_indices().rev() {
|
||||
if pat.matches(c) { return Some(index); }
|
||||
}
|
||||
None
|
||||
}
|
||||
fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
|
||||
where P::Matcher: ReverseMatcher<'a> {
|
||||
ReverseMatcher::next_back(&mut pat.into_matcher(self)).map(|(i, _)| i)
|
||||
}
|
||||
|
||||
fn find_str(&self, needle: &str) -> Option<usize> {
|
||||
fn find_str(&self, needle: &str) -> Option<uint> {
|
||||
if needle.is_empty() {
|
||||
Some(0)
|
||||
} else {
|
||||
@ -1655,10 +1698,10 @@ impl StrExt for str {
|
||||
}
|
||||
}
|
||||
|
||||
fn subslice_offset(&self, inner: &str) -> usize {
|
||||
let a_start = self.as_ptr() as usize;
|
||||
fn subslice_offset(&self, inner: &str) -> uint {
|
||||
let a_start = self.as_ptr() as uint;
|
||||
let a_end = a_start + self.len();
|
||||
let b_start = inner.as_ptr() as usize;
|
||||
let b_start = inner.as_ptr() as uint;
|
||||
let b_end = b_start + inner.len();
|
||||
|
||||
assert!(a_start <= b_start);
|
||||
@ -1672,7 +1715,7 @@ impl StrExt for str {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn len(&self) -> usize { self.repr().len }
|
||||
fn len(&self) -> uint { self.repr().len }
|
||||
|
||||
#[inline]
|
||||
fn is_empty(&self) -> bool { self.len() == 0 }
|
||||
@ -1685,15 +1728,15 @@ impl StrExt for str {
|
||||
/// index of the next code point.
|
||||
#[inline]
|
||||
#[unstable(feature = "core")]
|
||||
pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
|
||||
pub fn char_range_at_raw(bytes: &[u8], i: uint) -> (u32, usize) {
|
||||
if bytes[i] < 128u8 {
|
||||
return (bytes[i] as u32, i + 1);
|
||||
}
|
||||
|
||||
// Multibyte case is a fn to allow char_range_at to inline cleanly
|
||||
fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
|
||||
fn multibyte_char_range_at(bytes: &[u8], i: uint) -> (u32, usize) {
|
||||
let mut val = bytes[i] as u32;
|
||||
let w = UTF8_CHAR_WIDTH[val as usize] as usize;
|
||||
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
|
||||
assert!((w != 0));
|
||||
|
||||
val = utf8_first_byte!(val, w);
|
||||
@ -1720,7 +1763,7 @@ impl<'a> Iterator for Lines<'a> {
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
|
||||
fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
@ -1736,7 +1779,7 @@ impl<'a> Iterator for LinesAny<'a> {
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
|
||||
fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
|
113
src/libcore/str/pattern.rs
Normal file
113
src/libcore/str/pattern.rs
Normal file
@ -0,0 +1,113 @@
|
||||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![allow(missing_docs)]
|
||||
|
||||
use prelude::*;
|
||||
use super::CharEq;
|
||||
|
||||
// Pattern
|
||||
|
||||
pub trait Pattern<'a>: Sized {
|
||||
type Matcher: Matcher<'a>;
|
||||
fn into_matcher(self, haystack: &'a str) -> Self::Matcher;
|
||||
|
||||
#[inline]
|
||||
fn is_contained_in(self, haystack: &'a str) -> bool {
|
||||
Matcher::next(&mut self.into_matcher(haystack)).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
// Matcher
|
||||
|
||||
pub unsafe trait Matcher<'a> {
|
||||
fn haystack(&self) -> &'a str;
|
||||
fn next(&mut self) -> Option<(usize, usize)>;
|
||||
}
|
||||
|
||||
pub unsafe trait ReverseMatcher<'a>: Matcher<'a> {
|
||||
fn next_back(&mut self) -> Option<(usize, usize)>;
|
||||
}
|
||||
|
||||
pub trait DoubleEndedMatcher<'a>: ReverseMatcher<'a> {}
|
||||
|
||||
// Impl for CharEq
|
||||
|
||||
struct CharEqMatcher<'a, C>(C, &'a str, super::CharIndices<'a>);
|
||||
|
||||
impl<'a, C: CharEq> Pattern<'a> for C {
|
||||
type Matcher = CharEqMatcher<'a, C>;
|
||||
|
||||
#[inline]
|
||||
fn into_matcher(self, haystack: &'a str) -> CharEqMatcher<'a, C> {
|
||||
CharEqMatcher(self, haystack, haystack.char_indices())
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a, C: CharEq> Matcher<'a> for CharEqMatcher<'a, C> {
|
||||
#[inline]
|
||||
fn haystack(&self) -> &'a str {
|
||||
self.1
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<(usize, usize)> {
|
||||
while let Some((i, c)) = self.2.next() {
|
||||
if self.0.matches(c) {
|
||||
return Some((i, i + c.len_utf8()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a, C: CharEq> ReverseMatcher<'a> for CharEqMatcher<'a, C> {
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<(usize, usize)> {
|
||||
while let Some((i, c)) = self.2.next_back() {
|
||||
if self.0.matches(c) {
|
||||
return Some((i, i + c.len_utf8()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C: CharEq> DoubleEndedMatcher<'a> for CharEqMatcher<'a, C> {}
|
||||
|
||||
// Impl for &str
|
||||
|
||||
struct StrMatcher<'a>(super::MatchIndices<'a>);
|
||||
|
||||
impl<'a> Pattern<'a> for &'a str {
|
||||
type Matcher = StrMatcher<'a>;
|
||||
|
||||
#[inline]
|
||||
fn into_matcher(self, haystack: &'a str) -> StrMatcher<'a> {
|
||||
let mi = super::MatchIndices {
|
||||
haystack: haystack,
|
||||
needle: self,
|
||||
searcher: super::Searcher::new(haystack.as_bytes(), self.as_bytes())
|
||||
};
|
||||
StrMatcher(mi)
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a> Matcher<'a> for StrMatcher<'a> {
|
||||
#[inline]
|
||||
fn haystack(&self) -> &'a str {
|
||||
self.0.haystack
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<(usize, usize)> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
@ -8,6 +8,13 @@
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#[test]
|
||||
fn test_empty_match_indices() {
|
||||
let data = "aä中!";
|
||||
let vec: Vec<_> = data.match_indices("").collect();
|
||||
assert_eq!(vec, vec![(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bool_from_str() {
|
||||
assert_eq!("true".parse().ok(), Some(true));
|
||||
|
Loading…
x
Reference in New Issue
Block a user