Refactored code into Searcher traits with naive implementations
Made the family of Split iterators use the Pattern API Renamed the Matcher traits into Searcher
This commit is contained in:
parent
13ea9062a9
commit
f9ef8cd555
@ -23,7 +23,7 @@
|
||||
#![feature(env)]
|
||||
#![feature(core)]
|
||||
|
||||
#![deny(warnings)]
|
||||
// #![deny(warnings)]
|
||||
|
||||
extern crate test;
|
||||
extern crate getopts;
|
||||
|
@ -706,7 +706,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
|
||||
/// ```
|
||||
#[unstable(feature = "collections",
|
||||
reason = "might have its iterator type changed")]
|
||||
fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
|
||||
fn match_indices<'a, 'b>(&'a self, pat: &'b str) -> MatchIndices<'a, &'b str> {
|
||||
core_str::StrExt::match_indices(&self[..], pat)
|
||||
}
|
||||
|
||||
@ -723,7 +723,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
|
||||
/// ```
|
||||
#[unstable(feature = "collections",
|
||||
reason = "might get removed in the future in favor of a more generic split()")]
|
||||
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
|
||||
fn split_str<'a, 'b>(&'a self, pat: &'b str) -> SplitStr<'a, &'b str> {
|
||||
core_str::StrExt::split_str(&self[..], pat)
|
||||
}
|
||||
|
||||
|
@ -22,13 +22,13 @@ use option::Option;
|
||||
use slice::SliceExt;
|
||||
|
||||
// UTF-8 ranges and tags for encoding characters
|
||||
static TAG_CONT: u8 = 0b1000_0000u8;
|
||||
static TAG_TWO_B: u8 = 0b1100_0000u8;
|
||||
static TAG_THREE_B: u8 = 0b1110_0000u8;
|
||||
static TAG_FOUR_B: u8 = 0b1111_0000u8;
|
||||
static MAX_ONE_B: u32 = 0x80u32;
|
||||
static MAX_TWO_B: u32 = 0x800u32;
|
||||
static MAX_THREE_B: u32 = 0x10000u32;
|
||||
const TAG_CONT: u8 = 0b1000_0000u8;
|
||||
const TAG_TWO_B: u8 = 0b1100_0000u8;
|
||||
const TAG_THREE_B: u8 = 0b1110_0000u8;
|
||||
const TAG_FOUR_B: u8 = 0b1111_0000u8;
|
||||
const MAX_ONE_B: u32 = 0x80u32;
|
||||
const MAX_TWO_B: u32 = 0x800u32;
|
||||
const MAX_THREE_B: u32 = 0x10000u32;
|
||||
|
||||
/*
|
||||
Lu Uppercase_Letter an uppercase letter
|
||||
@ -398,11 +398,14 @@ impl CharExt for char {
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
fn len_utf8(self) -> usize {
|
||||
let code = self as u32;
|
||||
match () {
|
||||
_ if code < MAX_ONE_B => 1,
|
||||
_ if code < MAX_TWO_B => 2,
|
||||
_ if code < MAX_THREE_B => 3,
|
||||
_ => 4,
|
||||
if code < MAX_ONE_B {
|
||||
1
|
||||
} else if code < MAX_TWO_B {
|
||||
2
|
||||
} else if code < MAX_THREE_B {
|
||||
3
|
||||
} else {
|
||||
4
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -657,6 +657,8 @@ macro_rules! iterator {
|
||||
fn next(&mut self) -> Option<$elem> {
|
||||
// could be implemented with slices, but this avoids bounds checks
|
||||
unsafe {
|
||||
::intrinsics::assume(!self.ptr.is_null());
|
||||
::intrinsics::assume(!self.end.is_null());
|
||||
if self.ptr == self.end {
|
||||
None
|
||||
} else {
|
||||
@ -693,6 +695,8 @@ macro_rules! iterator {
|
||||
fn next_back(&mut self) -> Option<$elem> {
|
||||
// could be implemented with slices, but this avoids bounds checks
|
||||
unsafe {
|
||||
::intrinsics::assume(!self.ptr.is_null());
|
||||
::intrinsics::assume(!self.end.is_null());
|
||||
if self.end == self.ptr {
|
||||
None
|
||||
} else {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,99 +16,280 @@ use super::CharEq;
|
||||
// Pattern
|
||||
|
||||
pub trait Pattern<'a>: Sized {
|
||||
type Matcher: Matcher<'a>;
|
||||
fn into_matcher(self, haystack: &'a str) -> Self::Matcher;
|
||||
type Searcher: Searcher<'a>;
|
||||
fn into_matcher(self, haystack: &'a str) -> Self::Searcher;
|
||||
|
||||
#[inline]
|
||||
fn is_contained_in(self, haystack: &'a str) -> bool {
|
||||
Matcher::next(&mut self.into_matcher(haystack)).is_some()
|
||||
self.into_matcher(haystack).next_match().is_some()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn match_starts_at(self, haystack: &'a str, idx: usize) -> bool {
|
||||
let mut matcher = self.into_matcher(haystack);
|
||||
loop {
|
||||
match matcher.next() {
|
||||
SearchStep::Match(i, _) if i == idx => return true,
|
||||
SearchStep::Match(i, _)
|
||||
| SearchStep::Reject(i, _) if i >= idx => break,
|
||||
SearchStep::Done => break,
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn match_ends_at(self, haystack: &'a str, idx: usize) -> bool
|
||||
where Self::Searcher: ReverseSearcher<'a> {
|
||||
let mut matcher = self.into_matcher(haystack);
|
||||
loop {
|
||||
match matcher.next_back() {
|
||||
SearchStep::Match(_, j) if idx == j => return true,
|
||||
SearchStep::Match(_, j)
|
||||
| SearchStep::Reject(_, j) if idx >= j => break,
|
||||
SearchStep::Done => break,
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
// Matcher
|
||||
// Searcher
|
||||
|
||||
pub unsafe trait Matcher<'a> {
|
||||
pub enum SearchStep {
|
||||
Match(usize, usize),
|
||||
Reject(usize, usize),
|
||||
Done
|
||||
}
|
||||
|
||||
pub unsafe trait Searcher<'a> {
|
||||
fn haystack(&self) -> &'a str;
|
||||
fn next(&mut self) -> Option<(usize, usize)>;
|
||||
fn next(&mut self) -> SearchStep;
|
||||
#[inline]
|
||||
fn next_match(&mut self) -> Option<(usize, usize)> {
|
||||
loop {
|
||||
match self.next() {
|
||||
SearchStep::Match(a, b) => return Some((a, b)),
|
||||
SearchStep::Done => return None,
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn next_reject(&mut self) -> Option<(usize, usize)>{
|
||||
loop {
|
||||
match self.next() {
|
||||
SearchStep::Reject(a, b) => return Some((a, b)),
|
||||
SearchStep::Done => return None,
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe trait ReverseMatcher<'a>: Matcher<'a> {
|
||||
fn next_back(&mut self) -> Option<(usize, usize)>;
|
||||
pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
|
||||
fn next_back(&mut self) -> SearchStep;
|
||||
#[inline]
|
||||
fn next_match_back(&mut self) -> Option<(usize, usize)>{
|
||||
loop {
|
||||
match self.next_back() {
|
||||
SearchStep::Match(a, b) => return Some((a, b)),
|
||||
SearchStep::Done => return None,
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn next_reject_back(&mut self) -> Option<(usize, usize)>{
|
||||
loop {
|
||||
match self.next_back() {
|
||||
SearchStep::Reject(a, b) => return Some((a, b)),
|
||||
SearchStep::Done => return None,
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait DoubleEndedMatcher<'a>: ReverseMatcher<'a> {}
|
||||
pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
|
||||
|
||||
// Impl for CharEq
|
||||
|
||||
struct CharEqMatcher<'a, C>(C, &'a str, super::CharIndices<'a>);
|
||||
pub struct CharEqSearcher<'a, C> {
|
||||
char_eq: C,
|
||||
haystack: &'a str,
|
||||
char_indices: super::CharIndices<'a>,
|
||||
#[allow(dead_code)]
|
||||
ascii_only: bool,
|
||||
}
|
||||
|
||||
impl<'a, C: CharEq> Pattern<'a> for C {
|
||||
type Matcher = CharEqMatcher<'a, C>;
|
||||
type Searcher = CharEqSearcher<'a, C>;
|
||||
|
||||
#[inline]
|
||||
fn into_matcher(self, haystack: &'a str) -> CharEqMatcher<'a, C> {
|
||||
CharEqMatcher(self, haystack, haystack.char_indices())
|
||||
fn into_matcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
|
||||
CharEqSearcher {
|
||||
ascii_only: self.only_ascii(),
|
||||
haystack: haystack,
|
||||
char_eq: self,
|
||||
char_indices: haystack.char_indices(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a, C: CharEq> Matcher<'a> for CharEqMatcher<'a, C> {
|
||||
unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
|
||||
#[inline]
|
||||
fn haystack(&self) -> &'a str {
|
||||
self.1
|
||||
self.haystack
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<(usize, usize)> {
|
||||
while let Some((i, c)) = self.2.next() {
|
||||
if self.0.matches(c) {
|
||||
return Some((i, i + c.len_utf8()));
|
||||
fn next(&mut self) -> SearchStep {
|
||||
let s = &mut self.char_indices;
|
||||
// Compare lengths of the internal byte slice iterator
|
||||
// to find length of current char
|
||||
let (pre_len, _) = s.iter.iter.size_hint();
|
||||
if let Some((i, c)) = s.next() {
|
||||
let (len, _) = s.iter.iter.size_hint();
|
||||
let char_len = pre_len - len;
|
||||
if self.char_eq.matches(c) {
|
||||
return SearchStep::Match(i, i + char_len);
|
||||
} else {
|
||||
return SearchStep::Reject(i, i + char_len);
|
||||
}
|
||||
}
|
||||
None
|
||||
SearchStep::Done
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a, C: CharEq> ReverseMatcher<'a> for CharEqMatcher<'a, C> {
|
||||
unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<(usize, usize)> {
|
||||
while let Some((i, c)) = self.2.next_back() {
|
||||
if self.0.matches(c) {
|
||||
return Some((i, i + c.len_utf8()));
|
||||
fn next_back(&mut self) -> SearchStep {
|
||||
let s = &mut self.char_indices;
|
||||
// Compare lengths of the internal byte slice iterator
|
||||
// to find length of current char
|
||||
let (pre_len, _) = s.iter.iter.size_hint();
|
||||
if let Some((i, c)) = s.next_back() {
|
||||
let (len, _) = s.iter.iter.size_hint();
|
||||
let char_len = pre_len - len;
|
||||
if self.char_eq.matches(c) {
|
||||
return SearchStep::Match(i, i + char_len);
|
||||
} else {
|
||||
return SearchStep::Reject(i, i + char_len);
|
||||
}
|
||||
}
|
||||
None
|
||||
SearchStep::Done
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C: CharEq> DoubleEndedMatcher<'a> for CharEqMatcher<'a, C> {}
|
||||
impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
|
||||
|
||||
// Impl for &str
|
||||
|
||||
// TODO: Optimize the naive implementation here
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StrMatcher<'a, 'b>(super::OldMatchIndices<'a, 'b>);
|
||||
pub struct StrSearcher<'a, 'b> {
|
||||
haystack: &'a str,
|
||||
needle: &'b str,
|
||||
start: usize,
|
||||
end: usize,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Pattern<'a> for &'b str {
|
||||
type Matcher = StrMatcher<'a, 'b>;
|
||||
type Searcher = StrSearcher<'a, 'b>;
|
||||
|
||||
#[inline]
|
||||
fn into_matcher(self, haystack: &'a str) -> StrMatcher<'a, 'b> {
|
||||
let mi = super::OldMatchIndices {
|
||||
fn into_matcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
|
||||
StrSearcher {
|
||||
haystack: haystack,
|
||||
needle: self,
|
||||
searcher: super::Searcher::new(haystack.as_bytes(), self.as_bytes())
|
||||
};
|
||||
StrMatcher(mi)
|
||||
start: 0,
|
||||
end: haystack.len(),
|
||||
done: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a, 'b> Matcher<'a> for StrMatcher<'a, 'b> {
|
||||
unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
|
||||
#[inline]
|
||||
fn haystack(&self) -> &'a str {
|
||||
self.0.haystack
|
||||
self.haystack
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<(usize, usize)> {
|
||||
self.0.next()
|
||||
fn next(&mut self) -> SearchStep {
|
||||
str_search_step(self,
|
||||
|m: &mut StrSearcher| {
|
||||
// Forward step for empty needle
|
||||
let current_start = m.start;
|
||||
if !m.done {
|
||||
m.start = m.haystack.char_range_at(current_start).next;
|
||||
}
|
||||
SearchStep::Match(current_start, current_start)
|
||||
},
|
||||
|m: &mut StrSearcher| {
|
||||
// Forward step for nonempty needle
|
||||
let possible_match = &m.haystack[m.start .. m.start + m.needle.len()];
|
||||
let current_start = m.start;
|
||||
if possible_match == m.needle {
|
||||
m.start += m.needle.len();
|
||||
SearchStep::Match(current_start, m.start)
|
||||
} else {
|
||||
m.start += possible_match.chars().next().unwrap().len_utf8();
|
||||
SearchStep::Reject(current_start, m.start)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> SearchStep {
|
||||
str_search_step(self,
|
||||
|m: &mut StrSearcher| {
|
||||
// Backward step for empty needle
|
||||
let current_end = m.end;
|
||||
if !m.done {
|
||||
m.end = m.haystack.char_range_at_reverse(current_end).next;
|
||||
}
|
||||
SearchStep::Match(current_end, current_end)
|
||||
},
|
||||
|m: &mut StrSearcher| {
|
||||
// Backward step for nonempty needle
|
||||
let possible_match = &m.haystack[m.end - m.needle.len() .. m.end];
|
||||
let current_end = m.end;
|
||||
if possible_match == m.needle {
|
||||
m.end -= m.needle.len();
|
||||
SearchStep::Match(m.end, current_end)
|
||||
} else {
|
||||
m.end -= possible_match.chars().rev().next().unwrap().len_utf8();
|
||||
SearchStep::Reject(m.end, current_end)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn str_search_step<F, G>(mut m: &mut StrSearcher, f: F, g: G) -> SearchStep
|
||||
where F: FnOnce(&mut StrSearcher) -> SearchStep,
|
||||
G: FnOnce(&mut StrSearcher) -> SearchStep
|
||||
{
|
||||
if m.done {
|
||||
SearchStep::Done
|
||||
} else if m.needle.len() == 0 && m.start <= m.end {
|
||||
// Case for needle == ""
|
||||
if m.start == m.end {
|
||||
m.done = true;
|
||||
}
|
||||
f(&mut m)
|
||||
} else if m.start + m.needle.len() <= m.end {
|
||||
// Case for needle != ""
|
||||
g(&mut m)
|
||||
} else {
|
||||
m.done = true;
|
||||
SearchStep::Done
|
||||
}
|
||||
}
|
||||
|
@ -207,15 +207,15 @@ malesuada sollicitudin quam eu fermentum!");
|
||||
|
||||
make_test!(trim_ascii_char, s, {
|
||||
use std::ascii::AsciiExt;
|
||||
s.trim_matches(|&mut: c: char| c.is_ascii())
|
||||
s.trim_matches(|c: char| c.is_ascii())
|
||||
});
|
||||
make_test!(trim_left_ascii_char, s, {
|
||||
use std::ascii::AsciiExt;
|
||||
s.trim_left_matches(|&mut: c: char| c.is_ascii())
|
||||
s.trim_left_matches(|c: char| c.is_ascii())
|
||||
});
|
||||
make_test!(trim_right_ascii_char, s, {
|
||||
use std::ascii::AsciiExt;
|
||||
s.trim_right_matches(|&mut: c: char| c.is_ascii())
|
||||
s.trim_right_matches(|c: char| c.is_ascii())
|
||||
});
|
||||
|
||||
make_test!(find_underscore_char, s, s.find('_'));
|
||||
|
Loading…
x
Reference in New Issue
Block a user