496 lines
16 KiB
Rust
496 lines
16 KiB
Rust
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
|
// file at the top-level directory of this distribution and at
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
#![allow(deprecated) /* for CharEq */ ]
|
|
|
|
use prelude::*;
|
|
use super::CharEq;
|
|
|
|
// Pattern
|
|
|
|
/// A string pattern.
|
|
///
|
|
/// A `Pattern<'a>` expresses that the implementing type
|
|
/// can be used as a string pattern for searching in a `&'a str`.
|
|
///
|
|
/// For example, both `'a'` and `"aa"` are patterns that
|
|
/// would match at index `1` in the string `"baaaab"`.
|
|
///
|
|
/// The trait itself acts as a builder for an associated
|
|
/// `Searcher` type, which does the actual work of finding
|
|
/// occurences of the pattern in a string.
|
|
pub trait Pattern<'a>: Sized {
|
|
/// Associated searcher for this pattern
|
|
type Searcher: Searcher<'a>;
|
|
|
|
/// Construct the associated searcher from
|
|
/// `self` and the `haystack` to search in.
|
|
fn into_searcher(self, haystack: &'a str) -> Self::Searcher;
|
|
|
|
/// Check whether the pattern matches anywhere in the haystack
|
|
#[inline]
|
|
fn is_contained_in(self, haystack: &'a str) -> bool {
|
|
self.into_searcher(haystack).next_match().is_some()
|
|
}
|
|
|
|
/// Check whether the pattern matches at the front of the haystack
|
|
#[inline]
|
|
fn is_prefix_of(self, haystack: &'a str) -> bool {
|
|
match self.into_searcher(haystack).next() {
|
|
SearchStep::Match(0, _) => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
/// Check whether the pattern matches at the back of the haystack
|
|
#[inline]
|
|
fn is_suffix_of(self, haystack: &'a str) -> bool
|
|
where Self::Searcher: ReverseSearcher<'a>
|
|
{
|
|
match self.into_searcher(haystack).next_back() {
|
|
SearchStep::Match(_, j) if haystack.len() == j => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Searcher
|
|
|
|
/// Result of calling `Searcher::next()` or `ReverseSearcher::next_back()`.
|
|
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
|
pub enum SearchStep {
|
|
/// Expresses that a match of the pattern has been found at
|
|
/// `haystack[a..b]`.
|
|
Match(usize, usize),
|
|
/// Expresses that `haystack[a..b]` has been rejected as a possible match
|
|
/// of the pattern.
|
|
///
|
|
/// Note that there might be more than one `Reject` betwen two `Match`es,
|
|
/// there is no requirement for them to be combined into one.
|
|
Reject(usize, usize),
|
|
/// Expresses that every byte of the haystack has been visted, ending
|
|
/// the iteration.
|
|
Done
|
|
}
|
|
|
|
/// A searcher for a string pattern.
|
|
///
|
|
/// This trait provides methods for searching for non-overlapping
|
|
/// matches of a pattern starting from the front (left) of a string.
|
|
///
|
|
/// It will be implemented by associated `Searcher`
|
|
/// types of the `Pattern` trait.
|
|
///
|
|
/// The trait is marked unsafe because the indices returned by the
|
|
/// `next()` methods are required to lie on valid utf8 boundaries in
|
|
/// the haystack. This enables consumers of this trait to
|
|
/// slice the haystack without additional runtime checks.
|
|
pub unsafe trait Searcher<'a> {
|
|
/// Getter for the underlaying string to be searched in
|
|
///
|
|
/// Will always return the same `&str`
|
|
fn haystack(&self) -> &'a str;
|
|
|
|
/// Performs the next search step starting from the front.
|
|
///
|
|
/// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
|
|
/// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
|
|
/// pattern, even partially.
|
|
/// - Returns `Done` if every byte of the haystack has been visited
|
|
///
|
|
/// The stream of `Match` and `Reject` values up to a `Done`
|
|
/// will contain index ranges that are adjacent, non-overlapping,
|
|
/// covering the whole haystack, and laying on utf8 boundaries.
|
|
///
|
|
/// A `Match` result needs to contain the whole matched pattern,
|
|
/// however `Reject` results may be split up into arbitrary
|
|
/// many adjacent fragments. Both ranges may have zero length.
|
|
///
|
|
/// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
|
|
/// might produce the stream
|
|
/// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
|
|
fn next(&mut self) -> SearchStep;
|
|
|
|
/// Find the next `Match` result. See `next()`
|
|
#[inline]
|
|
fn next_match(&mut self) -> Option<(usize, usize)> {
|
|
loop {
|
|
match self.next() {
|
|
SearchStep::Match(a, b) => return Some((a, b)),
|
|
SearchStep::Done => return None,
|
|
_ => continue,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Find the next `Reject` result. See `next()`
|
|
#[inline]
|
|
fn next_reject(&mut self) -> Option<(usize, usize)> {
|
|
loop {
|
|
match self.next() {
|
|
SearchStep::Reject(a, b) => return Some((a, b)),
|
|
SearchStep::Done => return None,
|
|
_ => continue,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A reverse searcher for a string pattern.
|
|
///
|
|
/// This trait provides methods for searching for non-overlapping
|
|
/// matches of a pattern starting from the back (right) of a string.
|
|
///
|
|
/// It will be implemented by associated `Searcher`
|
|
/// types of the `Pattern` trait if the pattern supports searching
|
|
/// for it from the back.
|
|
///
|
|
/// The index ranges returned by this trait are not required
|
|
/// to exactly match those of the forward search in reverse.
|
|
///
|
|
/// For the reason why this trait is marked unsafe, see them
|
|
/// parent trait `Searcher`.
|
|
pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
|
|
/// Performs the next search step starting from the back.
|
|
///
|
|
/// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
|
|
/// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
|
|
/// pattern, even partially.
|
|
/// - Returns `Done` if every byte of the haystack has been visited
|
|
///
|
|
/// The stream of `Match` and `Reject` values up to a `Done`
|
|
/// will contain index ranges that are adjacent, non-overlapping,
|
|
/// covering the whole haystack, and laying on utf8 boundaries.
|
|
///
|
|
/// A `Match` result needs to contain the whole matched pattern,
|
|
/// however `Reject` results may be split up into arbitrary
|
|
/// many adjacent fragments. Both ranges may have zero length.
|
|
///
|
|
/// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
|
|
/// might produce the stream
|
|
/// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`
|
|
fn next_back(&mut self) -> SearchStep;
|
|
|
|
/// Find the next `Match` result. See `next_back()`
|
|
#[inline]
|
|
fn next_match_back(&mut self) -> Option<(usize, usize)>{
|
|
loop {
|
|
match self.next_back() {
|
|
SearchStep::Match(a, b) => return Some((a, b)),
|
|
SearchStep::Done => return None,
|
|
_ => continue,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Find the next `Reject` result. See `next_back()`
|
|
#[inline]
|
|
fn next_reject_back(&mut self) -> Option<(usize, usize)>{
|
|
loop {
|
|
match self.next_back() {
|
|
SearchStep::Reject(a, b) => return Some((a, b)),
|
|
SearchStep::Done => return None,
|
|
_ => continue,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A marker trait to express that a `ReverseSearcher`
|
|
/// can be used for a `DoubleEndedIterator` implementation.
|
|
///
|
|
/// For this, the impl of `Searcher` and `ReverseSearcher` need
|
|
/// to follow these conditions:
|
|
///
|
|
/// - All results of `next()` need to be identical
|
|
/// to the results of `next_back()` in reverse order.
|
|
/// - `next()` and `next_back()` need to behave as
|
|
/// the two ends of a range of values, that is they
|
|
/// can not "walk past each other".
|
|
///
|
|
/// # Example
|
|
///
|
|
/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
|
|
/// `char` only requires looking at one at a time, which behaves the same
|
|
/// from both ends.
|
|
///
|
|
/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
|
|
/// the pattern `"aa"` in the haystack `"aaa"` matches as either
|
|
/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
|
|
pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
|
|
|
|
// Impl for a CharEq wrapper
|
|
|
|
struct CharEqPattern<C: CharEq>(C);
|
|
|
|
struct CharEqSearcher<'a, C: CharEq> {
|
|
char_eq: C,
|
|
haystack: &'a str,
|
|
char_indices: super::CharIndices<'a>,
|
|
#[allow(dead_code)]
|
|
ascii_only: bool,
|
|
}
|
|
|
|
impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
|
|
type Searcher = CharEqSearcher<'a, C>;
|
|
|
|
#[inline]
|
|
fn into_searcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
|
|
CharEqSearcher {
|
|
ascii_only: self.0.only_ascii(),
|
|
haystack: haystack,
|
|
char_eq: self.0,
|
|
char_indices: haystack.char_indices(),
|
|
}
|
|
}
|
|
}
|
|
|
|
unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
|
|
#[inline]
|
|
fn haystack(&self) -> &'a str {
|
|
self.haystack
|
|
}
|
|
|
|
#[inline]
|
|
fn next(&mut self) -> SearchStep {
|
|
let s = &mut self.char_indices;
|
|
// Compare lengths of the internal byte slice iterator
|
|
// to find length of current char
|
|
let (pre_len, _) = s.iter.iter.size_hint();
|
|
if let Some((i, c)) = s.next() {
|
|
let (len, _) = s.iter.iter.size_hint();
|
|
let char_len = pre_len - len;
|
|
if self.char_eq.matches(c) {
|
|
return SearchStep::Match(i, i + char_len);
|
|
} else {
|
|
return SearchStep::Reject(i, i + char_len);
|
|
}
|
|
}
|
|
SearchStep::Done
|
|
}
|
|
}
|
|
|
|
unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
|
|
#[inline]
|
|
fn next_back(&mut self) -> SearchStep {
|
|
let s = &mut self.char_indices;
|
|
// Compare lengths of the internal byte slice iterator
|
|
// to find length of current char
|
|
let (pre_len, _) = s.iter.iter.size_hint();
|
|
if let Some((i, c)) = s.next_back() {
|
|
let (len, _) = s.iter.iter.size_hint();
|
|
let char_len = pre_len - len;
|
|
if self.char_eq.matches(c) {
|
|
return SearchStep::Match(i, i + char_len);
|
|
} else {
|
|
return SearchStep::Reject(i, i + char_len);
|
|
}
|
|
}
|
|
SearchStep::Done
|
|
}
|
|
}
|
|
|
|
impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
|
|
|
|
// Impl for &str
|
|
|
|
// Todo: Optimize the naive implementation here
|
|
|
|
#[derive(Clone)]
|
|
struct StrSearcher<'a, 'b> {
|
|
haystack: &'a str,
|
|
needle: &'b str,
|
|
start: usize,
|
|
end: usize,
|
|
done: bool,
|
|
}
|
|
|
|
/// Non-allocating substring search.
|
|
///
|
|
/// Will handle the pattern `""` as returning empty matches at each utf8
|
|
/// boundary.
|
|
impl<'a, 'b> Pattern<'a> for &'b str {
|
|
type Searcher = StrSearcher<'a, 'b>;
|
|
|
|
#[inline]
|
|
fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
|
|
StrSearcher {
|
|
haystack: haystack,
|
|
needle: self,
|
|
start: 0,
|
|
end: haystack.len(),
|
|
done: false,
|
|
}
|
|
}
|
|
}
|
|
|
|
unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
|
|
#[inline]
|
|
fn haystack(&self) -> &'a str {
|
|
self.haystack
|
|
}
|
|
|
|
#[inline]
|
|
fn next(&mut self) -> SearchStep {
|
|
str_search_step(self,
|
|
|m: &mut StrSearcher| {
|
|
// Forward step for empty needle
|
|
let current_start = m.start;
|
|
if !m.done {
|
|
m.start = m.haystack.char_range_at(current_start).next;
|
|
}
|
|
SearchStep::Match(current_start, current_start)
|
|
},
|
|
|m: &mut StrSearcher| {
|
|
// Forward step for nonempty needle
|
|
let current_start = m.start;
|
|
// Compare byte window because this might break utf8 boundaries
|
|
let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
|
|
if possible_match == m.needle.as_bytes() {
|
|
m.start += m.needle.len();
|
|
SearchStep::Match(current_start, m.start)
|
|
} else {
|
|
// Skip a char
|
|
let haystack_suffix = &m.haystack[m.start..];
|
|
m.start += haystack_suffix.chars().next().unwrap().len_utf8();
|
|
SearchStep::Reject(current_start, m.start)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
|
|
#[inline]
|
|
fn next_back(&mut self) -> SearchStep {
|
|
str_search_step(self,
|
|
|m: &mut StrSearcher| {
|
|
// Backward step for empty needle
|
|
let current_end = m.end;
|
|
if !m.done {
|
|
m.end = m.haystack.char_range_at_reverse(current_end).next;
|
|
}
|
|
SearchStep::Match(current_end, current_end)
|
|
},
|
|
|m: &mut StrSearcher| {
|
|
// Backward step for nonempty needle
|
|
let current_end = m.end;
|
|
// Compare byte window because this might break utf8 boundaries
|
|
let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
|
|
if possible_match == m.needle.as_bytes() {
|
|
m.end -= m.needle.len();
|
|
SearchStep::Match(m.end, current_end)
|
|
} else {
|
|
// Skip a char
|
|
let haystack_prefix = &m.haystack[..m.end];
|
|
m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
|
|
SearchStep::Reject(m.end, current_end)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// Helper function for encapsulating the common control flow
|
|
// of doing a search step from the front or doing a search step from the back
|
|
fn str_search_step<F, G>(mut m: &mut StrSearcher,
|
|
empty_needle_step: F,
|
|
nonempty_needle_step: G) -> SearchStep
|
|
where F: FnOnce(&mut StrSearcher) -> SearchStep,
|
|
G: FnOnce(&mut StrSearcher) -> SearchStep
|
|
{
|
|
if m.done {
|
|
SearchStep::Done
|
|
} else if m.needle.len() == 0 && m.start <= m.end {
|
|
// Case for needle == ""
|
|
if m.start == m.end {
|
|
m.done = true;
|
|
}
|
|
empty_needle_step(&mut m)
|
|
} else if m.start + m.needle.len() <= m.end {
|
|
// Case for needle != ""
|
|
nonempty_needle_step(&mut m)
|
|
} else if m.start < m.end {
|
|
// Remaining slice shorter than needle, reject it
|
|
m.done = true;
|
|
SearchStep::Reject(m.start, m.end)
|
|
} else {
|
|
m.done = true;
|
|
SearchStep::Done
|
|
}
|
|
}
|
|
|
|
macro_rules! associated_items {
|
|
($t:ty, $s:ident, $e:expr) => {
|
|
// FIXME: #22463
|
|
//type Searcher = $t;
|
|
|
|
fn into_searcher(self, haystack: &'a str) -> $t {
|
|
let $s = self;
|
|
$e.into_searcher(haystack)
|
|
}
|
|
|
|
#[inline]
|
|
fn is_contained_in(self, haystack: &'a str) -> bool {
|
|
let $s = self;
|
|
$e.is_contained_in(haystack)
|
|
}
|
|
|
|
#[inline]
|
|
fn is_prefix_of(self, haystack: &'a str) -> bool {
|
|
let $s = self;
|
|
$e.is_prefix_of(haystack)
|
|
}
|
|
|
|
// FIXME: #21750
|
|
/*#[inline]
|
|
fn is_suffix_of(self, haystack: &'a str) -> bool
|
|
where $t: ReverseSearcher<'a>
|
|
{
|
|
let $s = self;
|
|
$e.is_suffix_of(haystack)
|
|
}*/
|
|
}
|
|
}
|
|
|
|
// CharEq delegation impls
|
|
|
|
/// Searches for chars that are equal to a given char
|
|
impl<'a> Pattern<'a> for char {
|
|
type Searcher = <CharEqPattern<Self> as Pattern<'a>>::Searcher;
|
|
associated_items!(<CharEqPattern<Self> as Pattern<'a>>::Searcher,
|
|
s, CharEqPattern(s));
|
|
}
|
|
|
|
/// Searches for chars that are equal to any of the chars in the array
|
|
impl<'a, 'b> Pattern<'a> for &'b [char] {
|
|
type Searcher = <CharEqPattern<Self> as Pattern<'a>>::Searcher;
|
|
associated_items!(<CharEqPattern<Self> as Pattern<'a>>::Searcher,
|
|
s, CharEqPattern(s));
|
|
}
|
|
|
|
/// Searches for chars that match the given predicate
|
|
impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
|
|
type Searcher = <CharEqPattern<Self> as Pattern<'a>>::Searcher;
|
|
associated_items!(<CharEqPattern<Self> as Pattern<'a>>::Searcher,
|
|
s, CharEqPattern(s));
|
|
}
|
|
|
|
// Deref-forward impl
|
|
|
|
use ops::Deref;
|
|
|
|
/// Delegates to the next deref coercion of `Self` that implements `Pattern`
|
|
impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T
|
|
where &'b P: Pattern<'a>
|
|
{
|
|
type Searcher = <&'b P as Pattern<'a>>::Searcher;
|
|
associated_items!(<&'b P as Pattern<'a>>::Searcher,
|
|
s, (&**s));
|
|
}
|