Fix tidy and rebase fallout
Added a few bugfixes and additional testcases
This commit is contained in:
parent
c1de0a0f9e
commit
a641996796
@ -2893,22 +2893,6 @@ mod bench {
|
||||
b.iter(|| assert_eq!(s.split('V').count(), 3));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_unicode_not_ascii(b: &mut Bencher) {
|
||||
struct NotAscii(char);
|
||||
impl CharEq for NotAscii {
|
||||
fn matches(&mut self, c: char) -> bool {
|
||||
let NotAscii(cc) = *self;
|
||||
cc == c
|
||||
}
|
||||
fn only_ascii(&self) -> bool { false }
|
||||
}
|
||||
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
|
||||
|
||||
b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
|
||||
}
|
||||
|
||||
|
||||
#[bench]
|
||||
fn split_ascii(b: &mut Bencher) {
|
||||
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
||||
@ -2917,23 +2901,6 @@ mod bench {
|
||||
b.iter(|| assert_eq!(s.split(' ').count(), len));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_not_ascii(b: &mut Bencher) {
|
||||
struct NotAscii(char);
|
||||
impl CharEq for NotAscii {
|
||||
#[inline]
|
||||
fn matches(&mut self, c: char) -> bool {
|
||||
let NotAscii(cc) = *self;
|
||||
cc == c
|
||||
}
|
||||
fn only_ascii(&self) -> bool { false }
|
||||
}
|
||||
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
||||
let len = s.split(' ').count();
|
||||
|
||||
b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_extern_fn(b: &mut Bencher) {
|
||||
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
||||
|
@ -156,7 +156,6 @@ impl FromStr for bool {
|
||||
|
||||
/// An error returned when parsing a `bool` from a string fails.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub struct ParseBoolError { _priv: () }
|
||||
|
||||
@ -235,7 +234,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
|
||||
pub unsafe fn from_c_str(s: *const i8) -> &'static str {
|
||||
let s = s as *const u8;
|
||||
let mut len = 0;
|
||||
while *s.offset(len as int) != 0 {
|
||||
while *s.offset(len as isize) != 0 {
|
||||
len += 1;
|
||||
}
|
||||
let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
|
||||
@ -258,7 +257,7 @@ impl CharEq for char {
|
||||
fn matches(&mut self, c: char) -> bool { *self == c }
|
||||
|
||||
#[inline]
|
||||
fn only_ascii(&self) -> bool { (*self as usize) < 128 }
|
||||
fn only_ascii(&self) -> bool { (*self as u32) < 128 }
|
||||
}
|
||||
|
||||
impl<F> CharEq for F where F: FnMut(char) -> bool {
|
||||
@ -764,7 +763,8 @@ impl TwoWaySearcher {
|
||||
// How far we can jump when we encounter a mismatch is all based on the fact
|
||||
// that (u, v) is a critical factorization for the needle.
|
||||
#[inline]
|
||||
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(usize, usize)> {
|
||||
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
|
||||
-> Option<(usize, usize)> {
|
||||
'search: loop {
|
||||
// Check that we have room to search in
|
||||
if self.position + needle.len() > haystack.len() {
|
||||
@ -955,6 +955,7 @@ Section: Comparing strings
|
||||
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
|
||||
#[inline]
|
||||
fn eq_slice_(a: &str, b: &str) -> bool {
|
||||
// NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
|
||||
#[allow(improper_ctypes)]
|
||||
extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
|
||||
a.len() == b.len() && unsafe {
|
||||
@ -1489,7 +1490,7 @@ impl StrExt for str {
|
||||
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
|
||||
where P::Searcher: DoubleEndedSearcher<'a> {
|
||||
let mut i = 0;
|
||||
let mut j = self.len();
|
||||
let mut j = 0;
|
||||
let mut matcher = pat.into_searcher(self);
|
||||
if let Some((a, b)) = matcher.next_reject() {
|
||||
i = a;
|
||||
@ -1507,7 +1508,7 @@ impl StrExt for str {
|
||||
|
||||
#[inline]
|
||||
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
|
||||
let mut i = 0;
|
||||
let mut i = self.len();
|
||||
let mut matcher = pat.into_searcher(self);
|
||||
if let Some((a, _)) = matcher.next_reject() {
|
||||
i = a;
|
||||
@ -1521,7 +1522,7 @@ impl StrExt for str {
|
||||
#[inline]
|
||||
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
|
||||
where P::Searcher: ReverseSearcher<'a> {
|
||||
let mut j = self.len();
|
||||
let mut j = 0;
|
||||
let mut matcher = pat.into_searcher(self);
|
||||
if let Some((_, b)) = matcher.next_reject_back() {
|
||||
j = b;
|
||||
|
@ -58,6 +58,7 @@ pub trait Pattern<'a>: Sized {
|
||||
|
||||
// Searcher
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
pub enum SearchStep {
|
||||
Match(usize, usize),
|
||||
Reject(usize, usize),
|
||||
@ -190,7 +191,7 @@ impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
|
||||
|
||||
// Impl for &str
|
||||
|
||||
// TODO: Optimize the naive implementation here
|
||||
// Todo: Optimize the naive implementation here
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StrSearcher<'a, 'b> {
|
||||
@ -235,13 +236,16 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
|
||||
},
|
||||
|m: &mut StrSearcher| {
|
||||
// Forward step for nonempty needle
|
||||
let possible_match = &m.haystack[m.start .. m.start + m.needle.len()];
|
||||
// Compare if bytes are equal
|
||||
let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
|
||||
let current_start = m.start;
|
||||
if possible_match == m.needle {
|
||||
if possible_match == m.needle.as_bytes() {
|
||||
m.start += m.needle.len();
|
||||
SearchStep::Match(current_start, m.start)
|
||||
} else {
|
||||
m.start += possible_match.chars().next().unwrap().len_utf8();
|
||||
// Skip a char
|
||||
let haystack_suffix = &m.haystack[m.start..];
|
||||
m.start += haystack_suffix.chars().next().unwrap().len_utf8();
|
||||
SearchStep::Reject(current_start, m.start)
|
||||
}
|
||||
})
|
||||
@ -262,13 +266,16 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
|
||||
},
|
||||
|m: &mut StrSearcher| {
|
||||
// Backward step for nonempty needle
|
||||
let possible_match = &m.haystack[m.end - m.needle.len() .. m.end];
|
||||
// Compare if bytes are equal
|
||||
let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
|
||||
let current_end = m.end;
|
||||
if possible_match == m.needle {
|
||||
if possible_match == m.needle.as_bytes() {
|
||||
m.end -= m.needle.len();
|
||||
SearchStep::Match(m.end, current_end)
|
||||
} else {
|
||||
m.end -= possible_match.chars().rev().next().unwrap().len_utf8();
|
||||
// Skip a char
|
||||
let haystack_prefix = &m.haystack[..m.end];
|
||||
m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
|
||||
SearchStep::Reject(m.end, current_end)
|
||||
}
|
||||
})
|
||||
@ -290,6 +297,9 @@ where F: FnOnce(&mut StrSearcher) -> SearchStep,
|
||||
} else if m.start + m.needle.len() <= m.end {
|
||||
// Case for needle != ""
|
||||
g(&mut m)
|
||||
} else if m.start < m.end {
|
||||
m.done = true;
|
||||
SearchStep::Reject(m.start, m.end)
|
||||
} else {
|
||||
m.done = true;
|
||||
SearchStep::Done
|
||||
@ -352,7 +362,8 @@ impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
|
||||
|
||||
use ops::Deref;
|
||||
|
||||
impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T where &'b P: Pattern<'a> {
|
||||
impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T
|
||||
where &'b P: Pattern<'a> {
|
||||
type Searcher = <&'b P as Pattern<'a>>::Searcher;
|
||||
associated_items!(<&'b P as Pattern<'a>>::Searcher,
|
||||
s, (&**s));
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
@ -139,8 +139,150 @@ fn test_utf16_code_units() {
|
||||
vec![0xE9, 0xD83D, 0xDCA9])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn starts_with_in_unicode() {
|
||||
assert!(!"├── Cargo.toml".starts_with("# "));
|
||||
}
|
||||
|
||||
// rm x86_64-unknown-linux-gnu/stage1/test/coretesttest-x86_64-unknown-linux-gnu; env PLEASE_BENCH=1 make check-stage1-coretest TESTNAME=str::bench
|
||||
#[test]
|
||||
fn starts_short_long() {
|
||||
assert!(!"".starts_with("##"));
|
||||
assert!(!"##".starts_with("####"));
|
||||
assert!("####".starts_with("##"));
|
||||
assert!(!"##ä".starts_with("####"));
|
||||
assert!("####ä".starts_with("##"));
|
||||
assert!(!"##".starts_with("####ä"));
|
||||
assert!("##ä##".starts_with("##ä"));
|
||||
|
||||
assert!("".starts_with(""));
|
||||
assert!("ä".starts_with(""));
|
||||
assert!("#ä".starts_with(""));
|
||||
assert!("##ä".starts_with(""));
|
||||
assert!("ä###".starts_with(""));
|
||||
assert!("#ä##".starts_with(""));
|
||||
assert!("##ä#".starts_with(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_weird_cases() {
|
||||
assert!("* \t".contains_char(' '));
|
||||
assert!(!"* \t".contains_char('?'));
|
||||
assert!(!"* \t".contains_char('\u{1F4A9}'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_ws() {
|
||||
assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()),
|
||||
"a \t ");
|
||||
assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()),
|
||||
" \t a");
|
||||
assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()),
|
||||
"a");
|
||||
assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()),
|
||||
"");
|
||||
assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()),
|
||||
"");
|
||||
assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()),
|
||||
"");
|
||||
}
|
||||
|
||||
mod pattern {
|
||||
use std::str::Pattern;
|
||||
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
|
||||
use std::str::SearchStep::{self, Match, Reject, Done};
|
||||
|
||||
macro_rules! make_test {
|
||||
($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
|
||||
mod $name {
|
||||
use std::str::Pattern;
|
||||
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
|
||||
use std::str::SearchStep::{self, Match, Reject, Done};
|
||||
use super::{cmp_search_to_vec};
|
||||
#[test]
|
||||
fn fwd() {
|
||||
cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
|
||||
}
|
||||
#[test]
|
||||
fn bwd() {
|
||||
cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
|
||||
right: Vec<SearchStep>)
|
||||
where P::Searcher: ReverseSearcher<'a>
|
||||
{
|
||||
let mut searcher = pat.into_searcher(haystack);
|
||||
let mut v = vec![];
|
||||
loop {
|
||||
match if !rev {searcher.next()} else {searcher.next_back()} {
|
||||
Match(a, b) => v.push(Match(a, b)),
|
||||
Reject(a, b) => v.push(Reject(a, b)),
|
||||
Done => break,
|
||||
}
|
||||
}
|
||||
if rev {
|
||||
v.reverse();
|
||||
}
|
||||
assert_eq!(v, right);
|
||||
}
|
||||
|
||||
make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
|
||||
Reject(0, 1),
|
||||
Match (1, 3),
|
||||
Reject(3, 4),
|
||||
Match (4, 6),
|
||||
Reject(6, 7),
|
||||
]);
|
||||
make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
|
||||
Match(0, 0),
|
||||
Match(1, 1),
|
||||
Match(2, 2),
|
||||
Match(3, 3),
|
||||
Match(4, 4),
|
||||
Match(5, 5),
|
||||
Match(6, 6),
|
||||
Match(7, 7),
|
||||
]);
|
||||
make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
|
||||
Reject(0, 3),
|
||||
Reject(3, 6),
|
||||
Reject(6, 9),
|
||||
]);
|
||||
make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
|
||||
Match(0, 0),
|
||||
Match(3, 3),
|
||||
Match(6, 6),
|
||||
Match(9, 9),
|
||||
]);
|
||||
make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
|
||||
Match(0, 0),
|
||||
]);
|
||||
make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [
|
||||
]);
|
||||
make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
|
||||
Reject(0, 1),
|
||||
Match (1, 2),
|
||||
Match (2, 3),
|
||||
Reject(3, 4),
|
||||
Match (4, 5),
|
||||
Match (5, 6),
|
||||
Reject(6, 7),
|
||||
]);
|
||||
make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
|
||||
Reject(0, 3),
|
||||
Reject(3, 6),
|
||||
Reject(6, 9),
|
||||
]);
|
||||
make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
|
||||
Reject(0, 1),
|
||||
Reject(1, 2),
|
||||
Reject(2, 3),
|
||||
]);
|
||||
|
||||
}
|
||||
|
||||
mod bench {
|
||||
macro_rules! make_test_inner {
|
||||
|
Loading…
x
Reference in New Issue
Block a user