2014-01-30 12:29:35 -06:00
|
|
|
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
|
2012-12-10 17:44:02 -06:00
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2013-09-17 21:42:07 -05:00
|
|
|
/*!
|
|
|
|
|
2013-12-24 10:08:28 -06:00
|
|
|
Unicode string manipulation (`str` type)
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
# Basic Usage
|
|
|
|
|
|
|
|
Rust's string type is one of the core primitive types of the language. While
|
|
|
|
represented by the name `str`, the name `str` is not actually a valid type in
|
2013-10-04 23:24:29 -05:00
|
|
|
Rust. Each string must also be decorated with its ownership. This means that
|
2014-02-20 09:56:22 -06:00
|
|
|
there are two common kinds of strings in Rust:
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
* `~str` - This is an owned string. This type obeys all of the normal semantics
|
|
|
|
of the `~T` types, meaning that it has one, and only one, owner. This
|
|
|
|
type cannot be implicitly copied, and is moved out of when passed to
|
|
|
|
other functions.
|
|
|
|
|
2014-01-31 18:50:45 -06:00
|
|
|
* `&str` - This is the borrowed string type. This type of string can only be
|
|
|
|
created from the other kind of string. As the name "borrowed"
|
|
|
|
implies, this type of string is owned elsewhere, and this string
|
|
|
|
cannot be moved out of.
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
As an example, here's a few different kinds of strings.
|
|
|
|
|
2013-09-23 19:20:36 -05:00
|
|
|
```rust
|
2013-12-31 00:51:11 -06:00
|
|
|
fn main() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let owned_string = "I am an owned string".to_owned();
|
2013-12-31 00:51:11 -06:00
|
|
|
let borrowed_string1 = "This string is borrowed with the 'static lifetime";
|
|
|
|
let borrowed_string2: &str = owned_string; // owned strings can be borrowed
|
|
|
|
}
|
2013-09-23 19:20:36 -05:00
|
|
|
```
|
2013-09-17 21:42:07 -05:00
|
|
|
|
2014-02-20 09:56:22 -06:00
|
|
|
From the example above, you can see that Rust has 2 different kinds of string
|
2014-01-31 18:50:45 -06:00
|
|
|
literals. The owned literals correspond to the owned string types, but the
|
|
|
|
"borrowed literal" is actually more akin to C's concept of a static string.
|
2013-09-17 21:42:07 -05:00
|
|
|
|
2014-01-31 18:50:45 -06:00
|
|
|
When a string is declared without a `~` sigil, then the string is allocated
|
|
|
|
statically in the rodata of the executable/library. The string then has the
|
|
|
|
type `&'static str` meaning that the string is valid for the `'static`
|
2013-09-17 21:42:07 -05:00
|
|
|
lifetime, otherwise known as the lifetime of the entire program. As can be
|
|
|
|
inferred from the type, these static strings are not mutable.
|
|
|
|
|
|
|
|
# Mutability
|
|
|
|
|
2014-02-20 09:56:22 -06:00
|
|
|
Many languages have immutable strings by default, and Rust has a particular
|
2013-09-17 21:42:07 -05:00
|
|
|
flavor on this idea. As with the rest of Rust types, strings are immutable by
|
|
|
|
default. If a string is declared as `mut`, however, it may be mutated. This
|
|
|
|
works the same way as the rest of Rust's type system in the sense that if
|
|
|
|
there's a mutable reference to a string, there may only be one mutable reference
|
|
|
|
to that string. With these guarantees, strings can easily transition between
|
|
|
|
being mutable/immutable with the same benefits of having mutable strings in
|
|
|
|
other languages.
|
|
|
|
|
|
|
|
# Representation
|
|
|
|
|
|
|
|
Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
|
|
|
|
stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
|
2013-09-24 07:26:10 -05:00
|
|
|
encoded UTF-8 sequences. Additionally, strings are not null-terminated
|
|
|
|
and can contain null codepoints.
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
The actual representation of strings have direct mappings to vectors:
|
|
|
|
|
|
|
|
* `~str` is the same as `~[u8]`
|
|
|
|
* `&str` is the same as `&[u8]`
|
|
|
|
|
|
|
|
*/
|
2012-01-24 03:29:45 -06:00
|
|
|
|
2012-12-23 16:41:37 -06:00
|
|
|
use cast;
|
2013-09-03 18:24:12 -05:00
|
|
|
use cast::transmute;
|
2012-12-23 16:41:37 -06:00
|
|
|
use char;
|
2013-06-09 22:09:51 -05:00
|
|
|
use char::Char;
|
2014-03-05 00:19:14 -06:00
|
|
|
use clone::Clone;
|
2014-02-07 18:36:59 -06:00
|
|
|
use cmp::{Eq, TotalEq, Ord, TotalOrd, Equiv, Ordering};
|
2014-04-17 17:28:14 -05:00
|
|
|
use container::Container;
|
2014-02-07 18:36:59 -06:00
|
|
|
use fmt;
|
2014-02-25 10:03:41 -06:00
|
|
|
use io::Writer;
|
2013-09-10 17:53:21 -05:00
|
|
|
use iter::{Iterator, FromIterator, Extendable, range};
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::{Filter, AdditiveIterator, Map};
|
2014-01-23 13:41:57 -06:00
|
|
|
use iter::{Rev, DoubleEndedIterator, ExactSize};
|
2012-12-23 16:41:37 -06:00
|
|
|
use libc;
|
2014-01-31 07:03:20 -06:00
|
|
|
use num::Saturating;
|
2013-01-08 21:37:25 -06:00
|
|
|
use option::{None, Option, Some};
|
2012-12-23 16:41:37 -06:00
|
|
|
use ptr;
|
2013-10-02 08:37:59 -05:00
|
|
|
use from_str::FromStr;
|
2014-03-08 17:11:52 -06:00
|
|
|
use slice;
|
2014-04-17 17:28:14 -05:00
|
|
|
use slice::{OwnedVector, ImmutableVector, MutableVector};
|
2014-03-06 12:22:21 -06:00
|
|
|
use slice::{Vector};
|
2014-03-20 02:35:51 -05:00
|
|
|
use vec::Vec;
|
2013-08-10 08:38:00 -05:00
|
|
|
use default::Default;
|
2014-02-16 02:04:33 -06:00
|
|
|
use raw::Repr;
|
2014-04-02 18:54:22 -05:00
|
|
|
use strbuf::StrBuf;
|
2012-06-04 19:26:17 -05:00
|
|
|
|
2011-12-13 18:25:51 -06:00
|
|
|
/*
|
2012-01-24 03:29:45 -06:00
|
|
|
Section: Creating a string
|
2011-12-13 18:25:51 -06:00
|
|
|
*/
|
|
|
|
|
2013-08-25 19:07:29 -05:00
|
|
|
/// Consumes a vector of bytes to create a new utf-8 string.
|
|
|
|
/// Returns None if the vector contains invalid UTF-8.
|
2013-12-23 10:45:01 -06:00
|
|
|
pub fn from_utf8_owned(vv: ~[u8]) -> Option<~str> {
|
2013-08-25 19:07:29 -05:00
|
|
|
if is_utf8(vv) {
|
2013-09-05 07:17:24 -05:00
|
|
|
Some(unsafe { raw::from_utf8_owned(vv) })
|
2013-08-25 19:07:29 -05:00
|
|
|
} else {
|
|
|
|
None
|
2013-06-28 16:05:10 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Converts a vector to a string slice without performing any allocations.
|
|
|
|
///
|
|
|
|
/// Once the slice has been validated as utf-8, it is transmuted in-place and
|
|
|
|
/// returned as a '&str' instead of a '&[u8]'
|
|
|
|
///
|
2013-08-25 19:07:29 -05:00
|
|
|
/// Returns None if the slice is not utf-8.
|
2013-12-23 10:30:49 -06:00
|
|
|
pub fn from_utf8<'a>(v: &'a [u8]) -> Option<&'a str> {
|
2013-08-25 19:07:29 -05:00
|
|
|
if is_utf8(v) {
|
2013-12-01 07:33:04 -06:00
|
|
|
Some(unsafe { raw::from_utf8(v) })
|
2013-08-25 19:07:29 -05:00
|
|
|
} else { None }
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2013-10-02 08:37:59 -05:00
|
|
|
impl FromStr for ~str {
|
|
|
|
#[inline]
|
|
|
|
fn from_str(s: &str) -> Option<~str> { Some(s.to_owned()) }
|
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Convert a byte to a UTF-8 string
|
|
|
|
///
|
|
|
|
/// # Failure
|
|
|
|
///
|
|
|
|
/// Fails if invalid UTF-8
|
2013-08-04 15:22:56 -05:00
|
|
|
pub fn from_byte(b: u8) -> ~str {
|
|
|
|
assert!(b < 128u8);
|
|
|
|
unsafe { ::cast::transmute(~[b]) }
|
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Convert a char to a string
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn from_char(ch: char) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut buf = StrBuf::new();
|
2013-06-10 02:42:24 -05:00
|
|
|
buf.push_char(ch);
|
2014-04-02 18:54:22 -05:00
|
|
|
buf.into_owned()
|
2011-12-13 18:25:51 -06:00
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Convert a vector of chars to a string
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn from_chars(chs: &[char]) -> ~str {
|
2014-01-31 07:03:20 -06:00
|
|
|
chs.iter().map(|c| *c).collect()
|
2011-12-13 18:25:51 -06:00
|
|
|
}
|
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Methods for vectors of strings
|
2013-06-02 22:19:37 -05:00
|
|
|
pub trait StrVector {
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Concatenate a vector of strings.
|
2013-08-09 03:25:24 -05:00
|
|
|
fn concat(&self) -> ~str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
/// Concatenate a vector of strings, placing a given separator between each.
|
2013-08-09 03:25:24 -05:00
|
|
|
fn connect(&self, sep: &str) -> ~str;
|
2013-06-02 22:19:37 -05:00
|
|
|
}
|
2013-05-02 04:24:41 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a, S: Str> StrVector for &'a [S] {
|
2013-08-09 03:25:24 -05:00
|
|
|
fn concat(&self) -> ~str {
|
2014-04-15 20:17:48 -05:00
|
|
|
if self.is_empty() { return "".to_owned(); }
|
2013-06-02 22:19:37 -05:00
|
|
|
|
2013-09-10 17:53:21 -05:00
|
|
|
// `len` calculation may overflow but push_str but will check boundaries
|
2013-08-09 22:09:47 -05:00
|
|
|
let len = self.iter().map(|s| s.as_slice().len()).sum();
|
2013-05-02 04:24:41 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::with_capacity(len);
|
2013-06-02 22:19:37 -05:00
|
|
|
|
2013-09-10 17:53:21 -05:00
|
|
|
for s in self.iter() {
|
|
|
|
result.push_str(s.as_slice())
|
2013-05-02 04:24:41 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
|
|
|
|
result.into_owned()
|
2013-06-02 22:19:37 -05:00
|
|
|
}
|
|
|
|
|
2013-08-09 03:25:24 -05:00
|
|
|
fn connect(&self, sep: &str) -> ~str {
|
2014-04-15 20:17:48 -05:00
|
|
|
if self.is_empty() { return "".to_owned(); }
|
2013-08-04 15:22:56 -05:00
|
|
|
|
|
|
|
// concat is faster
|
|
|
|
if sep.is_empty() { return self.concat(); }
|
|
|
|
|
|
|
|
// this is wrong without the guarantee that `self` is non-empty
|
2013-09-10 17:53:21 -05:00
|
|
|
// `len` calculation may overflow but push_str but will check boundaries
|
2013-08-04 15:22:56 -05:00
|
|
|
let len = sep.len() * (self.len() - 1)
|
2013-08-09 22:09:47 -05:00
|
|
|
+ self.iter().map(|s| s.as_slice().len()).sum();
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::with_capacity(len);
|
2013-08-04 15:22:56 -05:00
|
|
|
let mut first = true;
|
|
|
|
|
2013-09-10 17:53:21 -05:00
|
|
|
for s in self.iter() {
|
|
|
|
if first {
|
|
|
|
first = false;
|
|
|
|
} else {
|
|
|
|
result.push_str(sep);
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2013-09-10 17:53:21 -05:00
|
|
|
result.push_str(s.as_slice());
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
result.into_owned()
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2013-02-07 21:33:12 -06:00
|
|
|
}
|
|
|
|
|
2014-02-18 23:36:51 -06:00
|
|
|
impl<'a, S: Str> StrVector for Vec<S> {
|
|
|
|
#[inline]
|
|
|
|
fn concat(&self) -> ~str {
|
|
|
|
self.as_slice().concat()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn connect(&self, sep: &str) -> ~str {
|
|
|
|
self.as_slice().connect(sep)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-09 22:09:51 -05:00
|
|
|
/// Something that can be used to compare against a character
|
|
|
|
pub trait CharEq {
|
|
|
|
/// Determine if the splitter should split at the given character
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, char) -> bool;
|
2013-06-09 22:09:51 -05:00
|
|
|
/// Indicate if this is only concerned about ASCII characters,
|
|
|
|
/// which can allow for a faster implementation.
|
|
|
|
fn only_ascii(&self) -> bool;
|
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-06-09 22:09:51 -05:00
|
|
|
impl CharEq for char {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool { *self == c }
|
2013-06-09 22:09:51 -05:00
|
|
|
|
2014-04-21 22:58:34 -05:00
|
|
|
#[inline]
|
2013-06-09 22:09:51 -05:00
|
|
|
fn only_ascii(&self) -> bool { (*self as uint) < 128 }
|
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2014-04-07 15:30:48 -05:00
|
|
|
impl<'a> CharEq for |char|: 'a -> bool {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool { (*self)(c) }
|
2013-06-09 22:09:51 -05:00
|
|
|
|
2014-04-21 22:58:34 -05:00
|
|
|
#[inline]
|
2013-06-09 22:09:51 -05:00
|
|
|
fn only_ascii(&self) -> bool { false }
|
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-06-09 22:09:51 -05:00
|
|
|
impl CharEq for extern "Rust" fn(char) -> bool {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool { (*self)(c) }
|
2013-06-09 22:09:51 -05:00
|
|
|
|
2014-04-21 22:58:34 -05:00
|
|
|
#[inline]
|
2013-06-09 22:09:51 -05:00
|
|
|
fn only_ascii(&self) -> bool { false }
|
|
|
|
}
|
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
impl<'a> CharEq for &'a [char] {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool {
|
|
|
|
self.iter().any(|&mut m| m.matches(c))
|
2013-06-11 10:32:49 -05:00
|
|
|
}
|
|
|
|
|
2014-04-21 22:58:34 -05:00
|
|
|
#[inline]
|
2013-06-11 10:32:49 -05:00
|
|
|
fn only_ascii(&self) -> bool {
|
|
|
|
self.iter().all(|m| m.only_ascii())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/*
|
|
|
|
Section: Iterators
|
|
|
|
*/
|
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
/// External iterator for a string's characters.
|
2013-11-03 17:01:00 -06:00
|
|
|
/// Use with the `std::iter` module.
|
2013-07-27 16:38:38 -05:00
|
|
|
#[deriving(Clone)]
|
2014-01-14 21:32:24 -06:00
|
|
|
pub struct Chars<'a> {
|
2013-08-19 04:18:30 -05:00
|
|
|
/// The slice remaining to be iterated
|
2014-03-27 17:09:47 -05:00
|
|
|
string: &'a str,
|
2013-07-27 16:38:38 -05:00
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a> Iterator<char> for Chars<'a> {
|
2013-07-27 16:38:38 -05:00
|
|
|
#[inline]
|
2013-08-18 06:57:34 -05:00
|
|
|
fn next(&mut self) -> Option<char> {
|
2013-08-19 04:18:30 -05:00
|
|
|
// Decode the next codepoint, then update
|
|
|
|
// the slice to be just the remaining part
|
2013-08-18 06:57:34 -05:00
|
|
|
if self.string.len() != 0 {
|
|
|
|
let CharRange {ch, next} = self.string.char_range_at(0);
|
|
|
|
unsafe {
|
|
|
|
self.string = raw::slice_unchecked(self.string, next, self.string.len());
|
|
|
|
}
|
|
|
|
Some(ch)
|
2013-07-27 16:38:38 -05:00
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
2013-08-18 06:57:34 -05:00
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
|
|
|
(self.string.len().saturating_add(3)/4, Some(self.string.len()))
|
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a> DoubleEndedIterator<char> for Chars<'a> {
|
2013-07-27 16:38:38 -05:00
|
|
|
#[inline]
|
2013-08-18 06:57:34 -05:00
|
|
|
fn next_back(&mut self) -> Option<char> {
|
|
|
|
if self.string.len() != 0 {
|
|
|
|
let CharRange {ch, next} = self.string.char_range_at_reverse(self.string.len());
|
|
|
|
unsafe {
|
|
|
|
self.string = raw::slice_unchecked(self.string, 0, next);
|
|
|
|
}
|
|
|
|
Some(ch)
|
2013-07-27 16:38:38 -05:00
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
/// External iterator for a string's characters and their byte offsets.
|
2013-11-03 17:01:00 -06:00
|
|
|
/// Use with the `std::iter` module.
|
2013-08-18 06:57:34 -05:00
|
|
|
#[deriving(Clone)]
|
2014-01-14 21:32:24 -06:00
|
|
|
pub struct CharOffsets<'a> {
|
2013-08-19 04:18:30 -05:00
|
|
|
/// The original string to be iterated
|
2014-03-27 17:09:47 -05:00
|
|
|
string: &'a str,
|
|
|
|
iter: Chars<'a>,
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
|
2013-08-18 06:57:34 -05:00
|
|
|
#[inline]
|
|
|
|
fn next(&mut self) -> Option<(uint, char)> {
|
2013-08-19 04:18:30 -05:00
|
|
|
// Compute the byte offset by using the pointer offset between
|
|
|
|
// the original string slice and the iterator's remaining part
|
2013-12-17 09:37:30 -06:00
|
|
|
let offset = self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
|
2013-09-20 01:08:47 -05:00
|
|
|
self.iter.next().map(|ch| (offset, ch))
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
|
|
|
self.iter.size_hint()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a> DoubleEndedIterator<(uint, char)> for CharOffsets<'a> {
|
2013-08-18 06:57:34 -05:00
|
|
|
#[inline]
|
|
|
|
fn next_back(&mut self) -> Option<(uint, char)> {
|
2013-09-20 01:08:47 -05:00
|
|
|
self.iter.next_back().map(|ch| {
|
2013-12-17 09:37:30 -06:00
|
|
|
let offset = self.iter.string.len() +
|
|
|
|
self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
|
2013-08-18 06:57:34 -05:00
|
|
|
(offset, ch)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by Rev<Chars<'a>>"]
|
2014-01-23 13:41:57 -06:00
|
|
|
pub type RevChars<'a> = Rev<Chars<'a>>;
|
2013-08-18 06:57:34 -05:00
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by Rev<CharOffsets<'a>>"]
|
2014-01-23 13:41:57 -06:00
|
|
|
pub type RevCharOffsets<'a> = Rev<CharOffsets<'a>>;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
|
|
|
/// External iterator for a string's bytes.
|
2013-11-03 17:01:00 -06:00
|
|
|
/// Use with the `std::iter` module.
|
2014-01-14 21:32:24 -06:00
|
|
|
pub type Bytes<'a> =
|
2014-03-08 17:11:52 -06:00
|
|
|
Map<'a, &'a u8, u8, slice::Items<'a, u8>>;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by Rev<Bytes<'a>>"]
|
2014-01-23 13:41:57 -06:00
|
|
|
pub type RevBytes<'a> = Rev<Bytes<'a>>;
|
2013-06-09 22:09:51 -05:00
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
/// An iterator over the substrings of a string, separated by `sep`.
|
2013-07-18 10:38:17 -05:00
|
|
|
#[deriving(Clone)]
|
2014-01-14 21:32:24 -06:00
|
|
|
pub struct CharSplits<'a, Sep> {
|
2013-08-25 01:54:47 -05:00
|
|
|
/// The slice remaining to be iterated
|
2014-03-27 17:09:47 -05:00
|
|
|
string: &'a str,
|
|
|
|
sep: Sep,
|
2013-06-09 08:10:50 -05:00
|
|
|
/// Whether an empty string at the end is allowed
|
2014-03-27 17:09:47 -05:00
|
|
|
allow_trailing_empty: bool,
|
|
|
|
only_ascii: bool,
|
|
|
|
finished: bool,
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by Rev<CharSplits<'a, Sep>>"]
|
2014-01-23 13:41:57 -06:00
|
|
|
pub type RevCharSplits<'a, Sep> = Rev<CharSplits<'a, Sep>>;
|
2013-08-25 01:54:47 -05:00
|
|
|
|
|
|
|
/// An iterator over the substrings of a string, separated by `sep`,
|
|
|
|
/// splitting at most `count` times.
|
|
|
|
#[deriving(Clone)]
|
2014-01-14 21:32:24 -06:00
|
|
|
pub struct CharSplitsN<'a, Sep> {
|
2014-03-27 17:09:47 -05:00
|
|
|
iter: CharSplits<'a, Sep>,
|
2013-08-25 01:54:47 -05:00
|
|
|
/// The number of splits remaining
|
2014-03-27 17:09:47 -05:00
|
|
|
count: uint,
|
|
|
|
invert: bool,
|
2013-08-25 01:54:47 -05:00
|
|
|
}
|
|
|
|
|
2014-01-30 12:29:35 -06:00
|
|
|
/// An iterator over the words of a string, separated by a sequence of whitespace
|
2014-01-14 21:32:24 -06:00
|
|
|
pub type Words<'a> =
|
|
|
|
Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>;
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-06-13 10:39:06 -05:00
|
|
|
/// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
|
2014-01-14 21:32:24 -06:00
|
|
|
pub type AnyLines<'a> =
|
|
|
|
Map<'a, &'a str, &'a str, CharSplits<'a, char>>;
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a, Sep> CharSplits<'a, Sep> {
|
2013-08-25 01:54:47 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn get_end(&mut self) -> Option<&'a str> {
|
2013-08-25 01:54:47 -05:00
|
|
|
if !self.finished && (self.allow_trailing_empty || self.string.len() > 0) {
|
|
|
|
self.finished = true;
|
|
|
|
Some(self.string)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplits<'a, Sep> {
|
2013-06-09 21:46:35 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn next(&mut self) -> Option<&'a str> {
|
2013-06-09 08:10:50 -05:00
|
|
|
if self.finished { return None }
|
2011-12-13 18:25:51 -06:00
|
|
|
|
2013-08-26 04:48:49 -05:00
|
|
|
let mut next_split = None;
|
|
|
|
if self.only_ascii {
|
2013-11-23 04:18:51 -06:00
|
|
|
for (idx, byte) in self.string.bytes().enumerate() {
|
2013-08-26 04:48:49 -05:00
|
|
|
if self.sep.matches(byte as char) && byte < 128u8 {
|
|
|
|
next_split = Some((idx, idx + 1));
|
|
|
|
break;
|
2013-08-25 01:54:47 -05:00
|
|
|
}
|
|
|
|
}
|
2013-08-26 04:48:49 -05:00
|
|
|
} else {
|
2013-11-23 04:18:51 -06:00
|
|
|
for (idx, ch) in self.string.char_indices() {
|
2013-08-26 04:48:49 -05:00
|
|
|
if self.sep.matches(ch) {
|
|
|
|
next_split = Some((idx, self.string.char_range_at(idx).next));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
match next_split {
|
|
|
|
Some((a, b)) => unsafe {
|
|
|
|
let elt = raw::slice_unchecked(self.string, 0, a);
|
|
|
|
self.string = raw::slice_unchecked(self.string, b, self.string.len());
|
|
|
|
Some(elt)
|
|
|
|
},
|
|
|
|
None => self.get_end(),
|
2013-08-25 01:54:47 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-08-19 08:34:48 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a, Sep: CharEq> DoubleEndedIterator<&'a str>
|
2014-01-14 21:32:24 -06:00
|
|
|
for CharSplits<'a, Sep> {
|
2013-08-25 01:54:47 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn next_back(&mut self) -> Option<&'a str> {
|
2013-08-25 01:54:47 -05:00
|
|
|
if self.finished { return None }
|
|
|
|
|
|
|
|
if !self.allow_trailing_empty {
|
|
|
|
self.allow_trailing_empty = true;
|
|
|
|
match self.next_back() {
|
|
|
|
Some(elt) if !elt.is_empty() => return Some(elt),
|
|
|
|
_ => if self.finished { return None }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let len = self.string.len();
|
2013-08-26 04:48:49 -05:00
|
|
|
let mut next_split = None;
|
|
|
|
|
|
|
|
if self.only_ascii {
|
2014-01-23 13:41:57 -06:00
|
|
|
for (idx, byte) in self.string.bytes().enumerate().rev() {
|
2013-08-26 04:48:49 -05:00
|
|
|
if self.sep.matches(byte as char) && byte < 128u8 {
|
|
|
|
next_split = Some((idx, idx + 1));
|
|
|
|
break;
|
2013-08-25 01:54:47 -05:00
|
|
|
}
|
2012-02-23 06:41:10 -06:00
|
|
|
}
|
2013-08-26 04:48:49 -05:00
|
|
|
} else {
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
for (idx, ch) in self.string.char_indices().rev() {
|
2013-08-26 04:48:49 -05:00
|
|
|
if self.sep.matches(ch) {
|
|
|
|
next_split = Some((idx, self.string.char_range_at(idx).next));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
match next_split {
|
|
|
|
Some((a, b)) => unsafe {
|
|
|
|
let elt = raw::slice_unchecked(self.string, b, len);
|
|
|
|
self.string = raw::slice_unchecked(self.string, 0, a);
|
|
|
|
Some(elt)
|
|
|
|
},
|
|
|
|
None => { self.finished = true; Some(self.string) }
|
2012-02-01 22:31:01 -06:00
|
|
|
}
|
2013-08-25 01:54:47 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplitsN<'a, Sep> {
|
2013-08-25 01:54:47 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn next(&mut self) -> Option<&'a str> {
|
2013-08-25 01:54:47 -05:00
|
|
|
if self.count != 0 {
|
|
|
|
self.count -= 1;
|
|
|
|
if self.invert { self.iter.next_back() } else { self.iter.next() }
|
2013-06-09 08:10:50 -05:00
|
|
|
} else {
|
2013-08-25 01:54:47 -05:00
|
|
|
self.iter.get_end()
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
2012-02-23 06:41:10 -06:00
|
|
|
}
|
2011-12-13 18:25:51 -06:00
|
|
|
}
|
|
|
|
|
2013-08-16 00:41:28 -05:00
|
|
|
/// An iterator over the start and end indices of the matches of a
|
2013-06-09 21:46:35 -05:00
|
|
|
/// substring within a larger string
|
2013-07-18 10:38:17 -05:00
|
|
|
#[deriving(Clone)]
|
2014-01-14 21:32:24 -06:00
|
|
|
pub struct MatchIndices<'a> {
|
2014-03-27 17:09:47 -05:00
|
|
|
haystack: &'a str,
|
|
|
|
needle: &'a str,
|
|
|
|
position: uint,
|
2013-06-09 21:46:35 -05:00
|
|
|
}
|
2013-05-02 17:33:18 -05:00
|
|
|
|
2013-06-09 21:46:35 -05:00
|
|
|
/// An iterator over the substrings of a string separated by a given
|
|
|
|
/// search string
|
2013-07-18 10:38:17 -05:00
|
|
|
#[deriving(Clone)]
|
2014-01-14 21:32:24 -06:00
|
|
|
pub struct StrSplits<'a> {
|
2014-03-27 17:09:47 -05:00
|
|
|
it: MatchIndices<'a>,
|
|
|
|
last_end: uint,
|
|
|
|
finished: bool
|
2013-06-09 21:46:35 -05:00
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a> Iterator<(uint, uint)> for MatchIndices<'a> {
|
2013-06-09 21:46:35 -05:00
|
|
|
#[inline]
|
|
|
|
fn next(&mut self) -> Option<(uint, uint)> {
|
|
|
|
// See Issue #1932 for why this is a naive search
|
|
|
|
let (h_len, n_len) = (self.haystack.len(), self.needle.len());
|
2013-06-10 17:55:51 -05:00
|
|
|
let mut match_start = 0;
|
|
|
|
let mut match_i = 0;
|
2013-06-09 21:46:35 -05:00
|
|
|
|
|
|
|
while self.position < h_len {
|
|
|
|
if self.haystack[self.position] == self.needle[match_i] {
|
|
|
|
if match_i == 0 { match_start = self.position; }
|
|
|
|
match_i += 1;
|
|
|
|
self.position += 1;
|
|
|
|
|
|
|
|
if match_i == n_len {
|
|
|
|
// found a match!
|
|
|
|
return Some((match_start, self.position));
|
|
|
|
}
|
2013-05-02 17:33:18 -05:00
|
|
|
} else {
|
2013-06-09 21:46:35 -05:00
|
|
|
// failed match, backtrack
|
|
|
|
if match_i > 0 {
|
|
|
|
match_i = 0;
|
|
|
|
self.position = match_start;
|
|
|
|
}
|
|
|
|
self.position += 1;
|
2013-05-02 17:33:18 -05:00
|
|
|
}
|
|
|
|
}
|
2013-06-09 21:46:35 -05:00
|
|
|
None
|
2013-05-02 17:33:18 -05:00
|
|
|
}
|
|
|
|
}
|
2012-02-23 07:59:27 -06:00
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a> Iterator<&'a str> for StrSplits<'a> {
|
2013-06-09 21:46:35 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn next(&mut self) -> Option<&'a str> {
|
2013-06-09 21:46:35 -05:00
|
|
|
if self.finished { return None; }
|
2012-02-23 07:59:27 -06:00
|
|
|
|
2013-06-09 21:46:35 -05:00
|
|
|
match self.it.next() {
|
|
|
|
Some((from, to)) => {
|
|
|
|
let ret = Some(self.it.haystack.slice(self.last_end, from));
|
|
|
|
self.last_end = to;
|
|
|
|
ret
|
|
|
|
}
|
|
|
|
None => {
|
|
|
|
self.finished = true;
|
|
|
|
Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
|
|
|
|
}
|
2013-05-02 17:33:18 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-10 20:36:38 -05:00
|
|
|
// Helper functions used for Unicode normalization
|
|
|
|
fn canonical_sort(comb: &mut [(char, u8)]) {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::range;
|
2014-02-16 03:36:43 -06:00
|
|
|
use tuple::Tuple2;
|
2013-08-10 20:36:38 -05:00
|
|
|
|
|
|
|
let len = comb.len();
|
|
|
|
for i in range(0, len) {
|
|
|
|
let mut swapped = false;
|
|
|
|
for j in range(1, len-i) {
|
2014-02-15 15:15:03 -06:00
|
|
|
let class_a = *comb[j-1].ref1();
|
|
|
|
let class_b = *comb[j].ref1();
|
|
|
|
if class_a != 0 && class_b != 0 && class_a > class_b {
|
2013-08-10 20:36:38 -05:00
|
|
|
comb.swap(j-1, j);
|
|
|
|
swapped = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !swapped { break; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[deriving(Clone)]
|
|
|
|
enum NormalizationForm {
|
|
|
|
NFD,
|
|
|
|
NFKD
|
|
|
|
}
|
|
|
|
|
|
|
|
/// External iterator for a string's normalization's characters.
|
2013-11-03 17:01:00 -06:00
|
|
|
/// Use with the `std::iter` module.
|
2013-08-10 20:36:38 -05:00
|
|
|
#[deriving(Clone)]
|
2014-01-25 20:25:02 -06:00
|
|
|
pub struct Normalizations<'a> {
|
2014-03-27 17:09:47 -05:00
|
|
|
kind: NormalizationForm,
|
|
|
|
iter: Chars<'a>,
|
2014-04-17 17:28:14 -05:00
|
|
|
buffer: Vec<(char, u8)>,
|
2014-03-27 17:09:47 -05:00
|
|
|
sorted: bool
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
impl<'a> Iterator<char> for Normalizations<'a> {
|
2013-08-10 20:36:38 -05:00
|
|
|
#[inline]
|
|
|
|
fn next(&mut self) -> Option<char> {
|
|
|
|
use unicode::decompose::canonical_combining_class;
|
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
match self.buffer.as_slice().head() {
|
2013-08-10 20:36:38 -05:00
|
|
|
Some(&(c, 0)) => {
|
|
|
|
self.sorted = false;
|
|
|
|
self.buffer.shift();
|
|
|
|
return Some(c);
|
|
|
|
}
|
|
|
|
Some(&(c, _)) if self.sorted => {
|
|
|
|
self.buffer.shift();
|
|
|
|
return Some(c);
|
|
|
|
}
|
|
|
|
_ => self.sorted = false
|
|
|
|
}
|
|
|
|
|
|
|
|
let decomposer = match self.kind {
|
|
|
|
NFD => char::decompose_canonical,
|
|
|
|
NFKD => char::decompose_compatible
|
|
|
|
};
|
|
|
|
|
2013-08-29 10:11:11 -05:00
|
|
|
if !self.sorted {
|
|
|
|
for ch in self.iter {
|
2014-02-07 16:00:45 -06:00
|
|
|
let buffer = &mut self.buffer;
|
|
|
|
let sorted = &mut self.sorted;
|
2013-11-20 16:17:12 -06:00
|
|
|
decomposer(ch, |d| {
|
2013-08-29 10:11:11 -05:00
|
|
|
let class = canonical_combining_class(d);
|
2014-02-07 16:00:45 -06:00
|
|
|
if class == 0 && !*sorted {
|
2014-04-17 17:28:14 -05:00
|
|
|
canonical_sort(buffer.as_mut_slice());
|
2014-02-07 16:00:45 -06:00
|
|
|
*sorted = true;
|
2013-08-29 10:11:11 -05:00
|
|
|
}
|
2014-02-07 16:00:45 -06:00
|
|
|
buffer.push((d, class));
|
2013-11-20 16:17:12 -06:00
|
|
|
});
|
2014-02-07 16:00:45 -06:00
|
|
|
if *sorted { break }
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !self.sorted {
|
2014-04-17 17:28:14 -05:00
|
|
|
canonical_sort(self.buffer.as_mut_slice());
|
2013-08-10 20:36:38 -05:00
|
|
|
self.sorted = true;
|
|
|
|
}
|
|
|
|
|
2013-12-23 09:40:42 -06:00
|
|
|
match self.buffer.shift() {
|
2013-08-10 20:36:38 -05:00
|
|
|
Some((c, 0)) => {
|
|
|
|
self.sorted = false;
|
|
|
|
Some(c)
|
|
|
|
}
|
|
|
|
Some((c, _)) => Some(c),
|
|
|
|
None => None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-29 10:11:11 -05:00
|
|
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
|
|
|
let (lower, _) = self.iter.size_hint();
|
|
|
|
(lower, None)
|
|
|
|
}
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Replace all occurrences of one string with another
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * s - The string containing substrings to replace
|
|
|
|
/// * from - The string to replace
|
|
|
|
/// * to - The replacement string
|
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
2014-04-20 23:49:39 -05:00
|
|
|
/// The original string with all occurrences of `from` replaced with `to`
|
2013-06-06 20:54:14 -05:00
|
|
|
pub fn replace(s: &str, from: &str, to: &str) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::new();
|
2013-06-06 20:54:14 -05:00
|
|
|
let mut last_end = 0;
|
2013-11-23 04:18:51 -06:00
|
|
|
for (start, end) in s.match_indices(from) {
|
2013-06-06 20:54:14 -05:00
|
|
|
result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
|
|
|
|
result.push_str(to);
|
|
|
|
last_end = end;
|
|
|
|
}
|
|
|
|
result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
|
2014-04-02 18:54:22 -05:00
|
|
|
result.into_owned()
|
2013-06-06 20:54:14 -05:00
|
|
|
}
|
|
|
|
|
2011-12-13 18:25:51 -06:00
|
|
|
/*
|
2012-01-24 03:29:45 -06:00
|
|
|
Section: Comparing strings
|
2011-12-13 18:25:51 -06:00
|
|
|
*/
|
|
|
|
|
2013-12-17 09:37:30 -06:00
|
|
|
// share the implementation of the lang-item vs. non-lang-item
|
|
|
|
// eq_slice.
|
|
|
|
#[inline]
|
|
|
|
fn eq_slice_(a: &str, b: &str) -> bool {
|
|
|
|
a.len() == b.len() && unsafe {
|
|
|
|
libc::memcmp(a.as_ptr() as *libc::c_void,
|
|
|
|
b.as_ptr() as *libc::c_void,
|
|
|
|
a.len() as libc::size_t) == 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-02 17:42:56 -05:00
|
|
|
/// Bytewise slice equality
|
2013-08-12 16:25:14 -05:00
|
|
|
#[cfg(not(test))]
|
2013-08-04 15:22:56 -05:00
|
|
|
#[lang="str_eq"]
|
|
|
|
#[inline]
|
|
|
|
pub fn eq_slice(a: &str, b: &str) -> bool {
|
2013-12-17 09:37:30 -06:00
|
|
|
eq_slice_(a, b)
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Bytewise slice equality
|
2013-08-12 16:25:14 -05:00
|
|
|
#[cfg(test)]
|
2013-08-04 15:22:56 -05:00
|
|
|
#[inline]
|
|
|
|
pub fn eq_slice(a: &str, b: &str) -> bool {
|
2013-12-17 09:37:30 -06:00
|
|
|
eq_slice_(a, b)
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2012-08-02 17:42:56 -05:00
|
|
|
/// Bytewise string equality
|
2013-05-08 06:11:23 -05:00
|
|
|
#[cfg(not(test))]
|
2012-09-10 19:22:20 -05:00
|
|
|
#[lang="uniq_str_eq"]
|
2013-05-02 02:49:11 -05:00
|
|
|
#[inline]
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn eq(a: &~str, b: &~str) -> bool {
|
2012-09-10 19:22:20 -05:00
|
|
|
eq_slice(*a, *b)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
2013-05-02 02:49:11 -05:00
|
|
|
#[inline]
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn eq(a: &~str, b: &~str) -> bool {
|
2012-08-02 17:42:56 -05:00
|
|
|
eq_slice(*a, *b)
|
|
|
|
}
|
|
|
|
|
2012-01-24 03:29:45 -06:00
|
|
|
/*
|
|
|
|
Section: Misc
|
|
|
|
*/
|
|
|
|
|
2014-02-15 23:11:47 -06:00
|
|
|
/// Walk through `iter` checking that it's a valid UTF-8 sequence,
|
|
|
|
/// returning `true` in that case, or, if it is invalid, `false` with
|
|
|
|
/// `iter` reset such that it is pointing at the first byte in the
|
|
|
|
/// invalid sequence.
|
|
|
|
#[inline(always)]
|
2014-03-08 17:11:52 -06:00
|
|
|
fn run_utf8_validation_iterator(iter: &mut slice::Items<u8>) -> bool {
|
2014-02-15 23:11:47 -06:00
|
|
|
loop {
|
|
|
|
// save the current thing we're pointing at.
|
|
|
|
let old = *iter;
|
|
|
|
|
|
|
|
// restore the iterator we had at the start of this codepoint.
|
|
|
|
macro_rules! err ( () => { {*iter = old; return false} });
|
|
|
|
macro_rules! next ( () => {
|
|
|
|
match iter.next() {
|
|
|
|
Some(a) => *a,
|
|
|
|
// we needed data, but there was none: error!
|
|
|
|
None => err!()
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
let first = match iter.next() {
|
|
|
|
Some(&b) => b,
|
|
|
|
// we're at the end of the iterator and a codepoint
|
|
|
|
// boundary at the same time, so this string is valid.
|
|
|
|
None => return true
|
|
|
|
};
|
|
|
|
|
|
|
|
// ASCII characters are always valid, so only large
|
|
|
|
// bytes need more examination.
|
|
|
|
if first >= 128 {
|
|
|
|
let w = utf8_char_width(first);
|
|
|
|
let second = next!();
|
|
|
|
// 2-byte encoding is for codepoints \u0080 to \u07ff
|
|
|
|
// first C2 80 last DF BF
|
|
|
|
// 3-byte encoding is for codepoints \u0800 to \uffff
|
|
|
|
// first E0 A0 80 last EF BF BF
|
|
|
|
// excluding surrogates codepoints \ud800 to \udfff
|
|
|
|
// ED A0 80 to ED BF BF
|
|
|
|
// 4-byte encoding is for codepoints \u10000 to \u10ffff
|
|
|
|
// first F0 90 80 80 last F4 8F BF BF
|
|
|
|
//
|
|
|
|
// Use the UTF-8 syntax from the RFC
|
|
|
|
//
|
|
|
|
// https://tools.ietf.org/html/rfc3629
|
|
|
|
// UTF8-1 = %x00-7F
|
|
|
|
// UTF8-2 = %xC2-DF UTF8-tail
|
|
|
|
// UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
|
|
|
|
// %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
|
|
|
|
// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
|
|
|
|
// %xF4 %x80-8F 2( UTF8-tail )
|
|
|
|
match w {
|
|
|
|
2 => if second & 192 != TAG_CONT_U8 {err!()},
|
|
|
|
3 => {
|
|
|
|
match (first, second, next!() & 192) {
|
|
|
|
(0xE0 , 0xA0 .. 0xBF, TAG_CONT_U8) |
|
|
|
|
(0xE1 .. 0xEC, 0x80 .. 0xBF, TAG_CONT_U8) |
|
|
|
|
(0xED , 0x80 .. 0x9F, TAG_CONT_U8) |
|
|
|
|
(0xEE .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => {}
|
|
|
|
_ => err!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
4 => {
|
|
|
|
match (first, second, next!() & 192, next!() & 192) {
|
|
|
|
(0xF0 , 0x90 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
|
|
|
|
(0xF1 .. 0xF3, 0x80 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
|
|
|
|
(0xF4 , 0x80 .. 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
|
|
|
|
_ => err!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => err!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Determines if a vector of bytes contains valid UTF-8.
|
2013-06-23 22:44:11 -05:00
|
|
|
pub fn is_utf8(v: &[u8]) -> bool {
|
2014-02-15 23:11:47 -06:00
|
|
|
run_utf8_validation_iterator(&mut v.iter())
|
2014-02-07 16:58:37 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
fn first_non_utf8_index(v: &[u8]) -> Option<uint> {
|
2014-02-16 00:12:47 -06:00
|
|
|
let mut it = v.iter();
|
2013-07-10 16:06:16 -05:00
|
|
|
|
2014-02-16 00:12:47 -06:00
|
|
|
let ok = run_utf8_validation_iterator(&mut it);
|
|
|
|
if ok {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
// work out how many valid bytes we've consumed
|
|
|
|
// (run_utf8_validation_iterator resets the iterator to just
|
|
|
|
// after the last good byte), which we can do because the
|
|
|
|
// vector iterator size_hint is exact.
|
|
|
|
let (remaining, _) = it.size_hint();
|
|
|
|
Some(v.len() - remaining)
|
2012-01-24 03:29:45 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Determines if a vector of `u16` contains valid UTF-16
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn is_utf16(v: &[u16]) -> bool {
|
2014-02-16 06:52:14 -06:00
|
|
|
let mut it = v.iter();
|
|
|
|
macro_rules! next ( ($ret:expr) => {
|
|
|
|
match it.next() { Some(u) => *u, None => return $ret }
|
|
|
|
}
|
|
|
|
)
|
|
|
|
loop {
|
|
|
|
let u = next!(true);
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-02-16 06:52:14 -06:00
|
|
|
match char::from_u32(u as u32) {
|
|
|
|
Some(_) => {}
|
|
|
|
None => {
|
|
|
|
let u2 = next!(false);
|
|
|
|
if u < 0xD7FF || u > 0xDBFF ||
|
|
|
|
u2 < 0xDC00 || u2 > 0xDFFF { return false; }
|
|
|
|
}
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-16 07:09:45 -06:00
|
|
|
/// An iterator that decodes UTF-16 encoded codepoints from a vector
|
|
|
|
/// of `u16`s.
|
2014-02-16 07:52:58 -06:00
|
|
|
#[deriving(Clone)]
|
|
|
|
pub struct UTF16Items<'a> {
|
2014-03-27 17:09:47 -05:00
|
|
|
iter: slice::Items<'a, u16>
|
2014-02-16 07:09:45 -06:00
|
|
|
}
|
2014-02-16 07:52:58 -06:00
|
|
|
/// The possibilities for values decoded from a `u16` stream.
|
2014-02-28 03:23:06 -06:00
|
|
|
#[deriving(Eq, TotalEq, Clone, Show)]
|
2014-02-16 07:52:58 -06:00
|
|
|
pub enum UTF16Item {
|
|
|
|
/// A valid codepoint.
|
|
|
|
ScalarValue(char),
|
|
|
|
/// An invalid surrogate without its pair.
|
|
|
|
LoneSurrogate(u16)
|
|
|
|
}
|
|
|
|
|
|
|
|
impl UTF16Item {
|
|
|
|
/// Convert `self` to a `char`, taking `LoneSurrogate`s to the
|
|
|
|
/// replacement character (U+FFFD).
|
|
|
|
#[inline]
|
|
|
|
pub fn to_char_lossy(&self) -> char {
|
|
|
|
match *self {
|
|
|
|
ScalarValue(c) => c,
|
|
|
|
LoneSurrogate(_) => '\uFFFD'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Iterator<UTF16Item> for UTF16Items<'a> {
|
|
|
|
fn next(&mut self) -> Option<UTF16Item> {
|
2014-02-16 07:09:45 -06:00
|
|
|
let u = match self.iter.next() {
|
|
|
|
Some(u) => *u,
|
|
|
|
None => return None
|
|
|
|
};
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-02-16 07:52:58 -06:00
|
|
|
if u < 0xD800 || 0xDFFF < u {
|
|
|
|
// not a surrogate
|
|
|
|
Some(ScalarValue(unsafe {cast::transmute(u as u32)}))
|
|
|
|
} else if u >= 0xDC00 {
|
|
|
|
// a trailing surrogate
|
|
|
|
Some(LoneSurrogate(u))
|
2012-03-02 17:47:14 -06:00
|
|
|
} else {
|
2014-02-16 07:52:58 -06:00
|
|
|
// preserve state for rewinding.
|
|
|
|
let old = self.iter;
|
|
|
|
|
|
|
|
let u2 = match self.iter.next() {
|
|
|
|
Some(u2) => *u2,
|
|
|
|
// eof
|
|
|
|
None => return Some(LoneSurrogate(u))
|
|
|
|
};
|
|
|
|
if u2 < 0xDC00 || u2 > 0xDFFF {
|
|
|
|
// not a trailing surrogate so we're not a valid
|
|
|
|
// surrogate pair, so rewind to redecode u2 next time.
|
|
|
|
self.iter = old;
|
|
|
|
return Some(LoneSurrogate(u))
|
2014-02-16 07:09:45 -06:00
|
|
|
}
|
2014-02-16 07:52:58 -06:00
|
|
|
|
|
|
|
// all ok, so lets decode it.
|
2014-02-16 07:57:16 -06:00
|
|
|
let c = ((u - 0xD800) as u32 << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
|
2014-02-16 07:52:58 -06:00
|
|
|
Some(ScalarValue(unsafe {cast::transmute(c)}))
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
}
|
2014-02-16 07:09:45 -06:00
|
|
|
|
2014-02-16 07:52:58 -06:00
|
|
|
#[inline]
|
2014-02-16 07:09:45 -06:00
|
|
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
|
|
|
let (low, high) = self.iter.size_hint();
|
2014-02-16 07:52:58 -06:00
|
|
|
// we could be entirely valid surrogates (2 elements per
|
|
|
|
// char), or entirely non-surrogates (1 element per char)
|
2014-02-16 07:09:45 -06:00
|
|
|
(low / 2, high)
|
|
|
|
}
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
|
2014-02-16 07:52:58 -06:00
|
|
|
/// Create an iterator over the UTF-16 encoded codepoints in `v`,
|
|
|
|
/// returning invalid surrogates as `LoneSurrogate`s.
|
2013-05-28 16:35:52 -05:00
|
|
|
///
|
2014-02-16 07:52:58 -06:00
|
|
|
/// # Example
|
2013-05-28 16:35:52 -05:00
|
|
|
///
|
2014-02-16 07:52:58 -06:00
|
|
|
/// ```rust
|
|
|
|
/// use std::str;
|
|
|
|
/// use std::str::{ScalarValue, LoneSurrogate};
|
2014-02-16 07:09:45 -06:00
|
|
|
///
|
2014-02-16 07:52:58 -06:00
|
|
|
/// // 𝄞mus<invalid>ic<invalid>
|
|
|
|
/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
|
|
|
|
/// 0x0073, 0xDD1E, 0x0069, 0x0063,
|
|
|
|
/// 0xD834];
|
|
|
|
///
|
2014-03-22 18:18:37 -05:00
|
|
|
/// assert_eq!(str::utf16_items(v).collect::<~[_]>(),
|
2014-02-16 07:52:58 -06:00
|
|
|
/// ~[ScalarValue('𝄞'),
|
|
|
|
/// ScalarValue('m'), ScalarValue('u'), ScalarValue('s'),
|
|
|
|
/// LoneSurrogate(0xDD1E),
|
|
|
|
/// ScalarValue('i'), ScalarValue('c'),
|
|
|
|
/// LoneSurrogate(0xD834)]);
|
|
|
|
/// ```
|
|
|
|
pub fn utf16_items<'a>(v: &'a [u16]) -> UTF16Items<'a> {
|
|
|
|
UTF16Items { iter : v.iter() }
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
|
2014-02-18 05:25:32 -06:00
|
|
|
/// Return a slice of `v` ending at (and not including) the first NUL
|
|
|
|
/// (0).
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// use std::str;
|
|
|
|
///
|
|
|
|
/// // "abcd"
|
|
|
|
/// let mut v = ['a' as u16, 'b' as u16, 'c' as u16, 'd' as u16];
|
|
|
|
/// // no NULs so no change
|
|
|
|
/// assert_eq!(str::truncate_utf16_at_nul(v), v.as_slice());
|
|
|
|
///
|
|
|
|
/// // "ab\0d"
|
|
|
|
/// v[2] = 0;
|
|
|
|
/// assert_eq!(str::truncate_utf16_at_nul(v),
|
|
|
|
/// &['a' as u16, 'b' as u16]);
|
|
|
|
/// ```
|
|
|
|
pub fn truncate_utf16_at_nul<'a>(v: &'a [u16]) -> &'a [u16] {
|
|
|
|
match v.iter().position(|c| *c == 0) {
|
|
|
|
// don't include the 0
|
|
|
|
Some(i) => v.slice_to(i),
|
|
|
|
None => v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-16 16:57:56 -06:00
|
|
|
/// Decode a UTF-16 encoded vector `v` into a string, returning `None`
|
|
|
|
/// if `v` contains any invalid data.
|
2014-02-16 07:52:58 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// use std::str;
|
|
|
|
///
|
|
|
|
/// // 𝄞music
|
2014-02-16 16:57:56 -06:00
|
|
|
/// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
|
|
|
|
/// 0x0073, 0x0069, 0x0063];
|
2014-04-15 20:17:48 -05:00
|
|
|
/// assert_eq!(str::from_utf16(v), Some("𝄞music".to_owned()));
|
2014-02-16 16:57:56 -06:00
|
|
|
///
|
|
|
|
/// // 𝄞mu<invalid>ic
|
|
|
|
/// v[4] = 0xD800;
|
|
|
|
/// assert_eq!(str::from_utf16(v), None);
|
2014-02-16 07:52:58 -06:00
|
|
|
/// ```
|
2014-02-16 16:57:56 -06:00
|
|
|
pub fn from_utf16(v: &[u16]) -> Option<~str> {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut s = StrBuf::with_capacity(v.len() / 2);
|
2014-02-16 16:57:56 -06:00
|
|
|
for c in utf16_items(v) {
|
|
|
|
match c {
|
|
|
|
ScalarValue(c) => s.push_char(c),
|
|
|
|
LoneSurrogate(_) => return None
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
Some(s.into_owned())
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
|
2014-02-16 07:52:58 -06:00
|
|
|
/// Decode a UTF-16 encoded vector `v` into a string, replacing
|
|
|
|
/// invalid data with the replacement character (U+FFFD).
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
/// ```rust
|
|
|
|
/// use std::str;
|
|
|
|
///
|
|
|
|
/// // 𝄞mus<invalid>ic<invalid>
|
|
|
|
/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
|
|
|
|
/// 0x0073, 0xDD1E, 0x0069, 0x0063,
|
|
|
|
/// 0xD834];
|
|
|
|
///
|
|
|
|
/// assert_eq!(str::from_utf16_lossy(v),
|
2014-04-15 20:17:48 -05:00
|
|
|
/// "𝄞mus\uFFFDic\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
/// ```
|
|
|
|
pub fn from_utf16_lossy(v: &[u16]) -> ~str {
|
|
|
|
utf16_items(v).map(|c| c.to_char_lossy()).collect()
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
|
2013-07-10 13:32:59 -05:00
|
|
|
// https://tools.ietf.org/html/rfc3629
|
2013-08-07 01:03:31 -05:00
|
|
|
static UTF8_CHAR_WIDTH: [u8, ..256] = [
|
2013-07-10 13:32:59 -05:00
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
|
2013-07-30 10:17:21 -05:00
|
|
|
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
2013-07-10 13:32:59 -05:00
|
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
|
|
|
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
|
|
|
|
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
|
|
|
|
];
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Given a first byte, determine how many bytes are in this UTF-8 character
|
2014-02-22 16:11:36 -06:00
|
|
|
#[inline]
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn utf8_char_width(b: u8) -> uint {
|
2014-04-01 22:39:26 -05:00
|
|
|
return UTF8_CHAR_WIDTH[b as uint] as uint;
|
2012-01-24 03:29:45 -06:00
|
|
|
}
|
|
|
|
|
2013-12-14 23:26:09 -06:00
|
|
|
/// Struct that contains a `char` and the index of the first byte of
|
|
|
|
/// the next `char` in a string. This can be used as a data structure
|
|
|
|
/// for iterating over the UTF-8 bytes of a string.
|
2012-11-26 22:05:19 -06:00
|
|
|
pub struct CharRange {
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Current `char`
|
2014-03-27 17:09:47 -05:00
|
|
|
pub ch: char,
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Index of the first byte of the next `char`
|
2014-03-27 17:09:47 -05:00
|
|
|
pub next: uint,
|
2012-11-26 22:05:19 -06:00
|
|
|
}
|
|
|
|
|
2013-08-02 11:34:00 -05:00
|
|
|
// Return the initial codepoint accumulator for the first byte.
|
|
|
|
// The first byte is special, only want bottom 5 bits for width 2, 4 bits
|
|
|
|
// for width 3, and 3 bits for width 4
|
|
|
|
macro_rules! utf8_first_byte(
|
2014-02-06 01:56:27 -06:00
|
|
|
($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
|
2013-08-02 11:34:00 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
// return the value of $ch updated with continuation byte $byte
|
|
|
|
macro_rules! utf8_acc_cont_byte(
|
2014-02-06 01:56:27 -06:00
|
|
|
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
|
2013-08-02 11:34:00 -05:00
|
|
|
)
|
|
|
|
|
2013-08-07 01:03:31 -05:00
|
|
|
static TAG_CONT_U8: u8 = 128u8;
|
2012-01-24 03:29:45 -06:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
/// Converts a vector of bytes to a new utf-8 string.
|
|
|
|
/// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
|
|
|
|
/// let output = std::str::from_utf8_lossy(input);
|
2014-02-07 16:58:37 -06:00
|
|
|
/// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
|
2014-02-06 01:56:27 -06:00
|
|
|
/// ```
|
2014-02-07 16:58:37 -06:00
|
|
|
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
|
|
|
|
let firstbad = match first_non_utf8_index(v) {
|
|
|
|
None => return Slice(unsafe { cast::transmute(v) }),
|
|
|
|
Some(i) => i
|
|
|
|
};
|
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
|
2014-02-07 16:58:37 -06:00
|
|
|
let mut i = firstbad;
|
2014-02-06 01:56:27 -06:00
|
|
|
let total = v.len();
|
|
|
|
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
|
|
|
|
unsafe { *xs.unsafe_ref(i) }
|
|
|
|
}
|
|
|
|
fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
|
|
|
|
if i >= total {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
unsafe_get(xs, i)
|
|
|
|
}
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
|
|
|
|
let mut res = StrBuf::with_capacity(total);
|
2014-02-06 01:56:27 -06:00
|
|
|
|
2014-02-07 16:58:37 -06:00
|
|
|
if i > 0 {
|
2014-04-02 18:54:22 -05:00
|
|
|
unsafe {
|
|
|
|
res.push_bytes(v.slice_to(i))
|
|
|
|
};
|
2014-02-07 16:58:37 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// subseqidx is the index of the first byte of the subsequence we're looking at.
|
|
|
|
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
|
|
|
|
// them one by one.
|
|
|
|
let mut subseqidx = firstbad;
|
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
while i < total {
|
|
|
|
let i_ = i;
|
|
|
|
let byte = unsafe_get(v, i);
|
|
|
|
i += 1;
|
|
|
|
|
2014-02-07 16:58:37 -06:00
|
|
|
macro_rules! error(() => ({
|
2014-02-06 01:56:27 -06:00
|
|
|
unsafe {
|
2014-02-07 16:58:37 -06:00
|
|
|
if subseqidx != i_ {
|
2014-04-02 18:54:22 -05:00
|
|
|
res.push_bytes(v.slice(subseqidx, i_));
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
2014-02-07 16:58:37 -06:00
|
|
|
subseqidx = i;
|
2014-04-02 18:54:22 -05:00
|
|
|
res.push_bytes(REPLACEMENT);
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
2014-02-07 16:58:37 -06:00
|
|
|
}))
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
if byte < 128u8 {
|
2014-02-07 16:58:37 -06:00
|
|
|
// subseqidx handles this
|
2014-02-06 01:56:27 -06:00
|
|
|
} else {
|
|
|
|
let w = utf8_char_width(byte);
|
|
|
|
|
|
|
|
match w {
|
|
|
|
2 => {
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
3 => {
|
|
|
|
match (byte, safe_get(v, i, total)) {
|
|
|
|
(0xE0 , 0xA0 .. 0xBF) => (),
|
|
|
|
(0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
|
|
|
|
(0xED , 0x80 .. 0x9F) => (),
|
|
|
|
(0xEE .. 0xEF, 0x80 .. 0xBF) => (),
|
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
4 => {
|
|
|
|
match (byte, safe_get(v, i, total)) {
|
|
|
|
(0xF0 , 0x90 .. 0xBF) => (),
|
|
|
|
(0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
|
|
|
|
(0xF4 , 0x80 .. 0x8F) => (),
|
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-02-07 16:58:37 -06:00
|
|
|
if subseqidx < total {
|
2014-04-02 18:54:22 -05:00
|
|
|
unsafe {
|
|
|
|
res.push_bytes(v.slice(subseqidx, total))
|
|
|
|
};
|
2014-02-07 16:58:37 -06:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
Owned(res.into_owned())
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
|
|
|
|
2014-02-07 18:36:59 -06:00
|
|
|
/*
|
|
|
|
Section: MaybeOwned
|
|
|
|
*/
|
|
|
|
|
|
|
|
/// A MaybeOwned is a string that can hold either a ~str or a &str.
|
|
|
|
/// This can be useful as an optimization when an allocation is sometimes
|
|
|
|
/// needed but not always.
|
|
|
|
pub enum MaybeOwned<'a> {
|
|
|
|
/// A borrowed string
|
|
|
|
Slice(&'a str),
|
|
|
|
/// An owned string
|
|
|
|
Owned(~str)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// SendStr is a specialization of `MaybeOwned` to be sendable
|
|
|
|
pub type SendStr = MaybeOwned<'static>;
|
|
|
|
|
|
|
|
impl<'a> MaybeOwned<'a> {
|
|
|
|
/// Returns `true` if this `MaybeOwned` wraps an owned string
|
|
|
|
#[inline]
|
|
|
|
pub fn is_owned(&self) -> bool {
|
|
|
|
match *self {
|
|
|
|
Slice(_) => false,
|
|
|
|
Owned(_) => true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns `true` if this `MaybeOwned` wraps a borrowed string
|
|
|
|
#[inline]
|
|
|
|
pub fn is_slice(&self) -> bool {
|
|
|
|
match *self {
|
|
|
|
Slice(_) => true,
|
|
|
|
Owned(_) => false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Trait for moving into a `MaybeOwned`
|
|
|
|
pub trait IntoMaybeOwned<'a> {
|
|
|
|
/// Moves self into a `MaybeOwned`
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a>;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> IntoMaybeOwned<'a> for ~str {
|
|
|
|
#[inline]
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self) }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> IntoMaybeOwned<'a> for &'a str {
|
|
|
|
#[inline]
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Eq for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &MaybeOwned) -> bool {
|
2014-03-23 06:54:42 -05:00
|
|
|
self.as_slice() == other.as_slice()
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-23 06:54:42 -05:00
|
|
|
impl<'a> TotalEq for MaybeOwned<'a> {}
|
2014-02-07 18:36:59 -06:00
|
|
|
|
|
|
|
impl<'a> Ord for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn lt(&self, other: &MaybeOwned) -> bool {
|
|
|
|
self.as_slice().lt(&other.as_slice())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> TotalOrd for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn cmp(&self, other: &MaybeOwned) -> Ordering {
|
|
|
|
self.as_slice().cmp(&other.as_slice())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn equiv(&self, other: &S) -> bool {
|
2014-03-23 06:54:42 -05:00
|
|
|
self.as_slice() == other.as_slice()
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Str for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn as_slice<'b>(&'b self) -> &'b str {
|
|
|
|
match *self {
|
|
|
|
Slice(s) => s,
|
|
|
|
Owned(ref s) => s.as_slice()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn into_owned(self) -> ~str {
|
|
|
|
match self {
|
|
|
|
Slice(s) => s.to_owned(),
|
|
|
|
Owned(s) => s
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Container for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn len(&self) -> uint { self.as_slice().len() }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Clone for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn clone(&self) -> MaybeOwned<'a> {
|
|
|
|
match *self {
|
|
|
|
Slice(s) => Slice(s),
|
|
|
|
Owned(ref s) => Owned(s.to_owned())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Default for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn default() -> MaybeOwned<'a> { Slice("") }
|
|
|
|
}
|
|
|
|
|
2014-02-25 10:03:41 -06:00
|
|
|
impl<'a, H: Writer> ::hash::Hash<H> for MaybeOwned<'a> {
|
2014-02-07 18:36:59 -06:00
|
|
|
#[inline]
|
2014-02-25 10:03:41 -06:00
|
|
|
fn hash(&self, hasher: &mut H) {
|
|
|
|
match *self {
|
|
|
|
Slice(s) => s.hash(hasher),
|
|
|
|
Owned(ref s) => s.hash(hasher),
|
|
|
|
}
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> fmt::Show for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
match *self {
|
|
|
|
Slice(ref s) => s.fmt(f),
|
|
|
|
Owned(ref s) => s.fmt(f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Unsafe operations
|
2012-09-28 17:41:10 -05:00
|
|
|
pub mod raw {
|
2012-12-23 16:41:37 -06:00
|
|
|
use cast;
|
2014-01-06 18:48:51 -06:00
|
|
|
use container::Container;
|
2014-04-17 17:28:14 -05:00
|
|
|
use iter::Iterator;
|
2012-12-23 16:41:37 -06:00
|
|
|
use libc;
|
2014-01-06 18:48:51 -06:00
|
|
|
use ptr::RawPtr;
|
2014-04-17 17:28:14 -05:00
|
|
|
use ptr;
|
2014-02-16 02:04:33 -06:00
|
|
|
use raw::Slice;
|
2014-04-17 17:28:14 -05:00
|
|
|
use slice::{MutableVector, ImmutableVector, OwnedVector, Vector};
|
|
|
|
use str::{is_utf8, StrSlice};
|
|
|
|
use vec::Vec;
|
2012-01-30 21:52:38 -06:00
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Create a Rust string from a *u8 buffer of the given length
|
2013-08-04 15:22:56 -05:00
|
|
|
pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
|
2014-04-17 17:28:14 -05:00
|
|
|
let mut v = Vec::with_capacity(len);
|
2013-12-24 17:53:05 -06:00
|
|
|
ptr::copy_memory(v.as_mut_ptr(), buf, len);
|
2013-12-15 06:05:30 -06:00
|
|
|
v.set_len(len);
|
2013-08-04 15:22:56 -05:00
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
assert!(is_utf8(v.as_slice()));
|
|
|
|
::cast::transmute(v.move_iter().collect::<~[u8]>())
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2013-08-10 12:42:53 -05:00
|
|
|
#[lang="strdup_uniq"]
|
|
|
|
#[cfg(not(test))]
|
|
|
|
#[inline]
|
2014-03-16 18:50:22 -05:00
|
|
|
unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
|
2013-08-10 12:42:53 -05:00
|
|
|
from_buf_len(ptr, len)
|
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Create a Rust string from a null-terminated C string
|
2013-08-04 16:08:20 -05:00
|
|
|
pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
|
|
|
|
let mut curr = buf;
|
|
|
|
let mut i = 0;
|
|
|
|
while *curr != 0 {
|
|
|
|
i += 1;
|
2014-02-10 15:50:42 -06:00
|
|
|
curr = buf.offset(i);
|
2013-08-04 16:08:20 -05:00
|
|
|
}
|
|
|
|
from_buf_len(buf as *u8, i as uint)
|
2012-03-19 17:25:26 -05:00
|
|
|
}
|
|
|
|
|
2013-11-28 06:52:11 -06:00
|
|
|
/// Converts a slice of bytes to a string slice without checking
|
|
|
|
/// that the string contains valid UTF-8.
|
2013-12-01 07:33:04 -06:00
|
|
|
pub unsafe fn from_utf8<'a>(v: &'a [u8]) -> &'a str {
|
2013-11-28 06:52:11 -06:00
|
|
|
cast::transmute(v)
|
2012-07-24 14:35:34 -05:00
|
|
|
}
|
2012-01-30 21:52:38 -06:00
|
|
|
|
2013-06-28 16:05:10 -05:00
|
|
|
/// Converts an owned vector of bytes to a new owned string. This assumes
|
|
|
|
/// that the utf-8-ness of the vector has already been validated
|
2013-08-04 15:22:56 -05:00
|
|
|
#[inline]
|
2013-09-05 07:17:24 -05:00
|
|
|
pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
|
2013-08-04 15:22:56 -05:00
|
|
|
cast::transmute(v)
|
|
|
|
}
|
|
|
|
|
2012-07-24 14:35:34 -05:00
|
|
|
/// Converts a byte to a string.
|
2013-11-28 06:52:11 -06:00
|
|
|
pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(~[u]) }
|
2012-02-01 05:25:04 -06:00
|
|
|
|
2013-06-10 16:27:18 -05:00
|
|
|
/// Form a slice from a C string. Unsafe because the caller must ensure the
|
|
|
|
/// C string has the static lifetime, or else the return value may be
|
|
|
|
/// invalidated later.
|
2013-08-04 15:22:56 -05:00
|
|
|
pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
|
|
|
|
let s = s as *u8;
|
|
|
|
let mut curr = s;
|
|
|
|
let mut len = 0u;
|
|
|
|
while *curr != 0u8 {
|
|
|
|
len += 1u;
|
2014-02-10 15:50:42 -06:00
|
|
|
curr = s.offset(len as int);
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
let v = Slice { data: s, len: len };
|
|
|
|
assert!(is_utf8(::cast::transmute(v)));
|
|
|
|
::cast::transmute(v)
|
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Takes a bytewise (not UTF-8) slice from a string.
|
|
|
|
///
|
|
|
|
/// Returns the substring from [`begin`..`end`).
|
|
|
|
///
|
|
|
|
/// # Failure
|
|
|
|
///
|
|
|
|
/// If begin is greater than end.
|
|
|
|
/// If end is greater than the length of the string.
|
2013-08-04 15:22:56 -05:00
|
|
|
#[inline]
|
2013-08-10 09:13:47 -05:00
|
|
|
pub unsafe fn slice_bytes<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
|
2013-08-18 06:57:34 -05:00
|
|
|
assert!(begin <= end);
|
|
|
|
assert!(end <= s.len());
|
|
|
|
slice_unchecked(s, begin, end)
|
|
|
|
}
|
2013-08-04 15:22:56 -05:00
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
/// Takes a bytewise (not UTF-8) slice from a string.
|
|
|
|
///
|
|
|
|
/// Returns the substring from [`begin`..`end`).
|
|
|
|
///
|
|
|
|
/// Caller must check slice boundaries!
|
|
|
|
#[inline]
|
|
|
|
pub unsafe fn slice_unchecked<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
|
2013-12-17 09:37:30 -06:00
|
|
|
cast::transmute(Slice {
|
|
|
|
data: s.as_ptr().offset(begin as int),
|
|
|
|
len: end - begin,
|
|
|
|
})
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2013-09-10 17:53:21 -05:00
|
|
|
/// Access the str in its vector representation.
|
|
|
|
/// The caller must preserve the valid UTF-8 property when modifying.
|
|
|
|
#[inline]
|
|
|
|
pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
|
|
|
|
cast::transmute(s)
|
|
|
|
}
|
|
|
|
|
2013-08-04 15:22:56 -05:00
|
|
|
/// Sets the length of a string
|
|
|
|
///
|
|
|
|
/// This will explicitly set the size of the string, without actually
|
2014-04-20 23:49:39 -05:00
|
|
|
/// modifying its buffers, so it is up to the caller to ensure that
|
2013-08-04 15:22:56 -05:00
|
|
|
/// the string is actually the specified size.
|
2012-04-12 23:46:41 -05:00
|
|
|
#[test]
|
2012-06-24 22:18:18 -05:00
|
|
|
fn test_from_buf_len() {
|
|
|
|
unsafe {
|
2012-06-29 18:26:56 -05:00
|
|
|
let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
|
2013-12-15 06:35:12 -06:00
|
|
|
let b = a.as_ptr();
|
2012-06-24 22:18:18 -05:00
|
|
|
let c = from_buf_len(b, 3u);
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(c, "AAA".to_owned());
|
2012-06-24 22:18:18 -05:00
|
|
|
}
|
2012-04-12 23:46:41 -05:00
|
|
|
}
|
2012-01-30 21:52:38 -06:00
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/*
|
|
|
|
Section: Trait implementations
|
|
|
|
*/
|
|
|
|
|
2013-05-08 06:11:23 -05:00
|
|
|
#[cfg(not(test))]
|
2013-10-12 21:02:46 -05:00
|
|
|
#[allow(missing_doc)]
|
2012-09-28 17:41:10 -05:00
|
|
|
pub mod traits {
|
2014-01-06 18:48:51 -06:00
|
|
|
use container::Container;
|
2013-06-13 22:37:47 -05:00
|
|
|
use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
|
2014-01-06 18:48:51 -06:00
|
|
|
use iter::Iterator;
|
|
|
|
use ops::Add;
|
2013-08-01 02:16:42 -05:00
|
|
|
use option::{Some, None};
|
2014-04-02 18:54:22 -05:00
|
|
|
use str::{Str, StrSlice, eq_slice};
|
|
|
|
use strbuf::StrBuf;
|
2013-06-13 22:37:47 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> Add<&'a str,~str> for &'a str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn add(&self, rhs: & &'a str) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut ret = StrBuf::from_owned_str(self.to_owned());
|
2013-06-15 08:17:53 -05:00
|
|
|
ret.push_str(*rhs);
|
2014-04-02 18:54:22 -05:00
|
|
|
ret.into_owned()
|
2012-11-28 15:51:50 -06:00
|
|
|
}
|
|
|
|
}
|
2013-06-13 22:37:47 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> TotalOrd for &'a str {
|
2013-06-13 22:37:47 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn cmp(&self, other: & &'a str) -> Ordering {
|
2013-11-23 04:18:51 -06:00
|
|
|
for (s_b, o_b) in self.bytes().zip(other.bytes()) {
|
2013-06-13 22:37:47 -05:00
|
|
|
match s_b.cmp(&o_b) {
|
|
|
|
Greater => return Greater,
|
|
|
|
Less => return Less,
|
|
|
|
Equal => ()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
self.len().cmp(&other.len())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TotalOrd for ~str {
|
|
|
|
#[inline]
|
|
|
|
fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
|
|
|
|
}
|
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> Eq for &'a str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn eq(&self, other: & &'a str) -> bool {
|
2013-06-13 22:37:47 -05:00
|
|
|
eq_slice((*self), (*other))
|
|
|
|
}
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn ne(&self, other: & &'a str) -> bool { !(*self).eq(other) }
|
2013-06-13 22:37:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Eq for ~str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-06-13 22:37:47 -05:00
|
|
|
fn eq(&self, other: &~str) -> bool {
|
|
|
|
eq_slice((*self), (*other))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-23 06:54:42 -05:00
|
|
|
impl<'a> TotalEq for &'a str {}
|
2013-06-13 22:37:47 -05:00
|
|
|
|
2014-03-23 06:54:42 -05:00
|
|
|
impl TotalEq for ~str {}
|
2013-06-13 22:37:47 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> Ord for &'a str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn lt(&self, other: & &'a str) -> bool { self.cmp(other) == Less }
|
2013-06-13 22:37:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Ord for ~str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-06-13 22:37:47 -05:00
|
|
|
fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
|
|
|
|
}
|
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a, S: Str> Equiv<S> for &'a str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-06-13 22:37:47 -05:00
|
|
|
fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
|
|
|
|
}
|
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a, S: Str> Equiv<S> for ~str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-06-13 22:37:47 -05:00
|
|
|
fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
|
|
|
|
}
|
2012-07-27 16:51:19 -05:00
|
|
|
}
|
|
|
|
|
2012-09-19 20:00:26 -05:00
|
|
|
#[cfg(test)]
|
2012-09-28 17:41:10 -05:00
|
|
|
pub mod traits {}
|
2012-09-19 20:00:26 -05:00
|
|
|
|
2013-06-11 08:52:24 -05:00
|
|
|
/// Any string that can be represented as a slice
|
|
|
|
pub trait Str {
|
|
|
|
/// Work with `self` as a slice.
|
|
|
|
fn as_slice<'a>(&'a self) -> &'a str;
|
2013-08-01 17:54:58 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
/// Convert `self` into a ~str, not making a copy if possible.
|
2013-08-01 17:54:58 -05:00
|
|
|
fn into_owned(self) -> ~str;
|
2014-04-02 18:54:22 -05:00
|
|
|
|
|
|
|
/// Convert `self` into a `StrBuf`.
|
|
|
|
#[inline]
|
|
|
|
fn to_strbuf(&self) -> StrBuf {
|
|
|
|
StrBuf::from_str(self.as_slice())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Convert `self` into a `StrBuf`, not making a copy if possible.
|
|
|
|
#[inline]
|
|
|
|
fn into_strbuf(self) -> StrBuf {
|
|
|
|
StrBuf::from_owned_str(self.into_owned())
|
|
|
|
}
|
2013-06-11 08:52:24 -05:00
|
|
|
}
|
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> Str for &'a str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-06-11 08:52:24 -05:00
|
|
|
fn as_slice<'a>(&'a self) -> &'a str { *self }
|
2013-08-01 17:54:58 -05:00
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn into_owned(self) -> ~str { self.to_owned() }
|
2013-06-11 08:52:24 -05:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> Str for ~str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-06-11 08:52:24 -05:00
|
|
|
fn as_slice<'a>(&'a self) -> &'a str {
|
|
|
|
let s: &'a str = *self; s
|
|
|
|
}
|
2013-08-01 17:54:58 -05:00
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn into_owned(self) -> ~str { self }
|
2013-06-11 08:52:24 -05:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> Container for &'a str {
|
2013-08-04 15:22:56 -05:00
|
|
|
#[inline]
|
|
|
|
fn len(&self) -> uint {
|
2013-12-17 09:37:30 -06:00
|
|
|
self.repr().len
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2013-06-18 11:39:16 -05:00
|
|
|
}
|
|
|
|
|
2013-07-20 12:28:38 -05:00
|
|
|
impl Container for ~str {
|
|
|
|
#[inline]
|
|
|
|
fn len(&self) -> uint { self.as_slice().len() }
|
|
|
|
}
|
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Methods for string slices
|
2013-12-10 01:16:18 -06:00
|
|
|
pub trait StrSlice<'a> {
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Returns true if one string contains another
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
2013-09-25 18:18:50 -05:00
|
|
|
/// - needle - The string to look for
|
|
|
|
fn contains<'a>(&self, needle: &'a str) -> bool;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
|
|
|
/// Returns true if a string contains a char.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
2013-09-25 18:18:50 -05:00
|
|
|
/// - needle - The char to look for
|
|
|
|
fn contains_char(&self, needle: char) -> bool;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// An iterator over the characters of `self`. Note, this iterates
|
|
|
|
/// over unicode code-points, not unicode graphemes.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```rust
|
2013-11-23 04:18:51 -06:00
|
|
|
/// let v: ~[char] = "abc åäö".chars().collect();
|
2013-06-10 08:57:41 -05:00
|
|
|
/// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn chars(&self) -> Chars<'a>;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
/// Do not use this - it is deprecated.
|
|
|
|
#[deprecated = "replaced by .chars().rev()"]
|
|
|
|
fn chars_rev(&self) -> Rev<Chars<'a>>;
|
2013-04-18 07:50:55 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// An iterator over the bytes of `self`
|
2014-01-14 21:32:24 -06:00
|
|
|
fn bytes(&self) -> Bytes<'a>;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
/// Do not use this - it is deprecated.
|
|
|
|
#[deprecated = "replaced by .bytes().rev()"]
|
|
|
|
fn bytes_rev(&self) -> Rev<Bytes<'a>>;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
|
|
|
/// An iterator over the characters of `self` and their byte offsets.
|
2014-01-14 21:32:24 -06:00
|
|
|
fn char_indices(&self) -> CharOffsets<'a>;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
/// Do not use this - it is deprecated.
|
|
|
|
#[deprecated = "replaced by .char_indices().rev()"]
|
|
|
|
fn char_indices_rev(&self) -> Rev<CharOffsets<'a>>;
|
2013-06-08 09:38:58 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// An iterator over substrings of `self`, separated by characters
|
|
|
|
/// matched by `sep`.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```rust
|
2013-11-23 04:18:51 -06:00
|
|
|
/// let v: ~[&str] = "Mary had a little lamb".split(' ').collect();
|
2013-06-10 08:57:41 -05:00
|
|
|
/// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
|
|
|
|
///
|
2013-11-23 04:18:51 -06:00
|
|
|
/// let v: ~[&str] = "abc1def2ghi".split(|c: char| c.is_digit()).collect();
|
2013-06-10 08:57:41 -05:00
|
|
|
/// assert_eq!(v, ~["abc", "def", "ghi"]);
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// let v: ~[&str] = "lionXXtigerXleopard".split('X').collect();
|
|
|
|
/// assert_eq!(v, ~["lion", "", "tiger", "leopard"]);
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// An iterator over substrings of `self`, separated by characters
|
|
|
|
/// matched by `sep`, restricted to splitting at most `count`
|
|
|
|
/// times.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let v: ~[&str] = "Mary had a little lambda".splitn(' ', 2).collect();
|
|
|
|
/// assert_eq!(v, ~["Mary", "had", "a little lambda"]);
|
|
|
|
///
|
|
|
|
/// let v: ~[&str] = "abc1def2ghi".splitn(|c: char| c.is_digit(), 1).collect();
|
|
|
|
/// assert_eq!(v, ~["abc", "def2ghi"]);
|
|
|
|
///
|
|
|
|
/// let v: ~[&str] = "lionXXtigerXleopard".splitn('X', 2).collect();
|
|
|
|
/// assert_eq!(v, ~["lion", "", "tigerXleopard"]);
|
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
|
2013-06-10 08:57:41 -05:00
|
|
|
|
|
|
|
/// An iterator over substrings of `self`, separated by characters
|
2013-08-25 01:54:47 -05:00
|
|
|
/// matched by `sep`.
|
|
|
|
///
|
2013-11-23 04:18:51 -06:00
|
|
|
/// Equivalent to `split`, except that the trailing substring
|
2013-08-25 01:54:47 -05:00
|
|
|
/// is skipped if empty (terminator semantics).
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```rust
|
2013-11-23 04:18:51 -06:00
|
|
|
/// let v: ~[&str] = "A.B.".split_terminator('.').collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
/// assert_eq!(v, ~["A", "B"]);
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// let v: ~[&str] = "A..B..".split_terminator('.').collect();
|
|
|
|
/// assert_eq!(v, ~["A", "", "B", ""]);
|
2013-08-25 01:54:47 -05:00
|
|
|
///
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
/// let v: ~[&str] = "Mary had a little lamb".split(' ').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
/// assert_eq!(v, ~["lamb", "little", "a", "had", "Mary"]);
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
/// let v: ~[&str] = "abc1def2ghi".split(|c: char| c.is_digit()).rev().collect();
|
2013-12-05 05:49:34 -06:00
|
|
|
/// assert_eq!(v, ~["ghi", "def", "abc"]);
|
|
|
|
///
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
/// let v: ~[&str] = "lionXXtigerXleopard".split('X').rev().collect();
|
2013-12-05 05:49:34 -06:00
|
|
|
/// assert_eq!(v, ~["leopard", "tiger", "", "lion"]);
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
fn split_terminator<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
|
|
|
|
|
|
|
|
/// Do not use this - it is deprecated.
|
|
|
|
#[deprecated = "replaced by .split(sep).rev()"]
|
|
|
|
fn rsplit<Sep: CharEq>(&self, sep: Sep) -> Rev<CharSplits<'a, Sep>>;
|
2013-08-25 01:54:47 -05:00
|
|
|
|
|
|
|
/// An iterator over substrings of `self`, separated by characters
|
|
|
|
/// matched by `sep`, starting from the end of the string.
|
|
|
|
/// Restricted to splitting at most `count` times.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let v: ~[&str] = "Mary had a little lamb".rsplitn(' ', 2).collect();
|
|
|
|
/// assert_eq!(v, ~["lamb", "little", "Mary had a"]);
|
|
|
|
///
|
|
|
|
/// let v: ~[&str] = "abc1def2ghi".rsplitn(|c: char| c.is_digit(), 1).collect();
|
|
|
|
/// assert_eq!(v, ~["ghi", "abc1def"]);
|
|
|
|
///
|
|
|
|
/// let v: ~[&str] = "lionXXtigerXleopard".rsplitn('X', 2).collect();
|
|
|
|
/// assert_eq!(v, ~["leopard", "tiger", "lionX"]);
|
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// An iterator over the start and end indices of the disjoint
|
|
|
|
/// matches of `sep` within `self`.
|
|
|
|
///
|
|
|
|
/// That is, each returned value `(start, end)` satisfies
|
|
|
|
/// `self.slice(start, end) == sep`. For matches of `sep` within
|
|
|
|
/// `self` that overlap, only the indicies corresponding to the
|
|
|
|
/// first match are returned.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let v: ~[(uint, uint)] = "abcXXXabcYYYabc".match_indices("abc").collect();
|
|
|
|
/// assert_eq!(v, ~[(0,3), (6,9), (12,15)]);
|
|
|
|
///
|
2013-12-22 15:31:23 -06:00
|
|
|
/// let v: ~[(uint, uint)] = "1abcabc2".match_indices("abc").collect();
|
2013-12-05 05:49:34 -06:00
|
|
|
/// assert_eq!(v, ~[(1,4), (4,7)]);
|
|
|
|
///
|
2013-12-22 15:31:23 -06:00
|
|
|
/// let v: ~[(uint, uint)] = "ababa".match_indices("aba").collect();
|
2013-12-05 05:49:34 -06:00
|
|
|
/// assert_eq!(v, ~[(0, 3)]); // only the first `aba`
|
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn match_indices(&self, sep: &'a str) -> MatchIndices<'a>;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
|
|
|
/// An iterator over the substrings of `self` separated by `sep`.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```rust
|
2013-12-05 05:49:34 -06:00
|
|
|
/// let v: ~[&str] = "abcXXXabcYYYabc".split_str("abc").collect();
|
|
|
|
/// assert_eq!(v, ~["", "XXX", "YYY", ""]);
|
|
|
|
///
|
|
|
|
/// let v: ~[&str] = "1abcabc2".split_str("abc").collect();
|
|
|
|
/// assert_eq!(v, ~["1", "", "2"]);
|
2013-09-23 19:20:36 -05:00
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn split_str(&self, &'a str) -> StrSplits<'a>;
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// An iterator over the lines of a string (subsequences separated
|
2013-12-05 05:49:34 -06:00
|
|
|
/// by `\n`). This does not include the empty string after a
|
|
|
|
/// trailing `\n`.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let four_lines = "foo\nbar\n\nbaz\n";
|
|
|
|
/// let v: ~[&str] = four_lines.lines().collect();
|
|
|
|
/// assert_eq!(v, ~["foo", "bar", "", "baz"]);
|
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn lines(&self) -> CharSplits<'a, char>;
|
2013-06-13 10:39:06 -05:00
|
|
|
|
|
|
|
/// An iterator over the lines of a string, separated by either
|
2013-12-05 05:49:34 -06:00
|
|
|
/// `\n` or `\r\n`. As with `.lines()`, this does not include an
|
|
|
|
/// empty trailing line.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let four_lines = "foo\r\nbar\n\r\nbaz\n";
|
|
|
|
/// let v: ~[&str] = four_lines.lines_any().collect();
|
|
|
|
/// assert_eq!(v, ~["foo", "bar", "", "baz"]);
|
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn lines_any(&self) -> AnyLines<'a>;
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// An iterator over the words of a string (subsequences separated
|
2013-12-05 05:49:34 -06:00
|
|
|
/// by any sequence of whitespace). Sequences of whitespace are
|
|
|
|
/// collapsed, so empty "words" are not included.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let some_words = " Mary had\ta little \n\t lamb";
|
|
|
|
/// let v: ~[&str] = some_words.words().collect();
|
|
|
|
/// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
|
|
|
|
/// ```
|
2014-01-14 21:32:24 -06:00
|
|
|
fn words(&self) -> Words<'a>;
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// An Iterator over the string in Unicode Normalization Form D
|
|
|
|
/// (canonical decomposition).
|
2014-01-14 21:32:24 -06:00
|
|
|
fn nfd_chars(&self) -> Normalizations<'a>;
|
2013-08-10 20:36:38 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// An Iterator over the string in Unicode Normalization Form KD
|
|
|
|
/// (compatibility decomposition).
|
2014-01-14 21:32:24 -06:00
|
|
|
fn nfkd_chars(&self) -> Normalizations<'a>;
|
2013-08-10 20:36:38 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns true if the string contains only whitespace.
|
2013-07-27 16:38:38 -05:00
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Whitespace characters are determined by `char::is_whitespace`.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// assert!(" \t\n".is_whitespace());
|
|
|
|
/// assert!("".is_whitespace());
|
|
|
|
///
|
2013-12-22 15:31:23 -06:00
|
|
|
/// assert!( !"abc".is_whitespace());
|
2013-12-05 05:49:34 -06:00
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn is_whitespace(&self) -> bool;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-12-14 23:26:09 -06:00
|
|
|
/// Returns true if the string contains only alphanumeric code
|
|
|
|
/// points.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// Alphanumeric characters are determined by `char::is_alphanumeric`.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// assert!("Löwe老虎Léopard123".is_alphanumeric());
|
|
|
|
/// assert!("".is_alphanumeric());
|
2013-07-27 16:38:38 -05:00
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// assert!( !" &*~".is_alphanumeric());
|
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn is_alphanumeric(&self) -> bool;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns the number of Unicode code points (`char`) that a
|
|
|
|
/// string holds.
|
|
|
|
///
|
|
|
|
/// This does not perform any normalization, and is `O(n)`, since
|
|
|
|
/// UTF-8 is a variable width encoding of code points.
|
|
|
|
///
|
|
|
|
/// *Warning*: The number of code points in a string does not directly
|
|
|
|
/// correspond to the number of visible characters or width of the
|
|
|
|
/// visible text due to composing characters, and double- and
|
|
|
|
/// zero-width ones.
|
|
|
|
///
|
|
|
|
/// See also `.len()` for the byte length.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// // composed forms of `ö` and `é`
|
|
|
|
/// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
|
|
|
|
/// // decomposed forms of `ö` and `é`
|
|
|
|
/// let d = "Lo\u0308we 老虎 Le\u0301opard";
|
|
|
|
///
|
|
|
|
/// assert_eq!(c.char_len(), 15);
|
|
|
|
/// assert_eq!(d.char_len(), 17);
|
|
|
|
///
|
|
|
|
/// assert_eq!(c.len(), 21);
|
|
|
|
/// assert_eq!(d.len(), 23);
|
|
|
|
///
|
|
|
|
/// // the two strings *look* the same
|
2014-01-09 04:06:55 -06:00
|
|
|
/// println!("{}", c);
|
|
|
|
/// println!("{}", d);
|
2013-12-05 05:49:34 -06:00
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn char_len(&self) -> uint;
|
2013-06-11 06:37:22 -05:00
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Returns a slice of the given string from the byte range
|
2013-12-05 05:49:34 -06:00
|
|
|
/// [`begin`..`end`).
|
|
|
|
///
|
|
|
|
/// This operation is `O(1)`.
|
2013-07-27 16:38:38 -05:00
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Fails when `begin` and `end` do not point to valid characters
|
|
|
|
/// or point beyond the last character of the string.
|
|
|
|
///
|
|
|
|
/// See also `slice_to` and `slice_from` for slicing prefixes and
|
|
|
|
/// suffixes of strings, and `slice_chars` for slicing based on
|
|
|
|
/// code point counts.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Löwe 老虎 Léopard";
|
|
|
|
/// assert_eq!(s.slice(0, 1), "L");
|
|
|
|
///
|
2013-12-22 15:31:23 -06:00
|
|
|
/// assert_eq!(s.slice(1, 9), "öwe 老");
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// // these will fail:
|
|
|
|
/// // byte 2 lies within `ö`:
|
|
|
|
/// // s.slice(2, 3);
|
|
|
|
///
|
|
|
|
/// // byte 8 lies within `老`
|
|
|
|
/// // s.slice(1, 8);
|
|
|
|
///
|
|
|
|
/// // byte 100 is outside the string
|
|
|
|
/// // s.slice(3, 100);
|
|
|
|
/// ```
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice(&self, begin: uint, end: uint) -> &'a str;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// Returns a slice of the string from `begin` to its end.
|
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Equivalent to `self.slice(begin, self.len())`.
|
|
|
|
///
|
2013-06-10 08:57:41 -05:00
|
|
|
/// Fails when `begin` does not point to a valid character, or is
|
|
|
|
/// out of bounds.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// See also `slice`, `slice_to` and `slice_chars`.
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice_from(&self, begin: uint) -> &'a str;
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-06-10 08:57:41 -05:00
|
|
|
/// Returns a slice of the string from the beginning to byte
|
|
|
|
/// `end`.
|
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Equivalent to `self.slice(0, end)`.
|
|
|
|
///
|
2013-06-10 08:57:41 -05:00
|
|
|
/// Fails when `end` does not point to a valid character, or is
|
|
|
|
/// out of bounds.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// See also `slice`, `slice_from` and `slice_chars`.
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice_to(&self, end: uint) -> &'a str;
|
2013-06-11 06:37:22 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns a slice of the string from the character range
|
2013-06-11 06:37:22 -05:00
|
|
|
/// [`begin`..`end`).
|
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// That is, start at the `begin`-th code point of the string and
|
|
|
|
/// continue to the `end`-th code point. This does not detect or
|
|
|
|
/// handle edge cases such as leaving a combining character as the
|
|
|
|
/// first code point of the string.
|
|
|
|
///
|
2014-03-05 03:41:35 -06:00
|
|
|
/// Due to the design of UTF-8, this operation is `O(end)`.
|
|
|
|
/// See `slice`, `slice_to` and `slice_from` for `O(1)`
|
2013-12-05 05:49:34 -06:00
|
|
|
/// variants that use byte indices rather than code point
|
|
|
|
/// indices.
|
|
|
|
///
|
2013-06-11 06:37:22 -05:00
|
|
|
/// Fails if `begin` > `end` or the either `begin` or `end` are
|
|
|
|
/// beyond the last character of the string.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Löwe 老虎 Léopard";
|
|
|
|
/// assert_eq!(s.slice_chars(0, 4), "Löwe");
|
2013-12-19 00:36:44 -06:00
|
|
|
/// assert_eq!(s.slice_chars(5, 7), "老虎");
|
2013-12-05 05:49:34 -06:00
|
|
|
/// ```
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice_chars(&self, begin: uint, end: uint) -> &'a str;
|
2013-08-18 15:15:47 -05:00
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Returns true if `needle` is a prefix of the string.
|
|
|
|
fn starts_with(&self, needle: &str) -> bool;
|
|
|
|
|
|
|
|
/// Returns true if `needle` is a suffix of the string.
|
|
|
|
fn ends_with(&self, needle: &str) -> bool;
|
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Escape each char in `s` with `char::escape_default`.
|
2013-09-25 18:18:50 -05:00
|
|
|
fn escape_default(&self) -> ~str;
|
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Escape each char in `s` with `char::escape_unicode`.
|
2013-09-25 18:18:50 -05:00
|
|
|
fn escape_unicode(&self) -> ~str;
|
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns a string with leading and trailing whitespace removed.
|
2013-12-10 01:16:18 -06:00
|
|
|
fn trim(&self) -> &'a str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns a string with leading whitespace removed.
|
2013-12-10 01:16:18 -06:00
|
|
|
fn trim_left(&self) -> &'a str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns a string with trailing whitespace removed.
|
2013-12-10 01:16:18 -06:00
|
|
|
fn trim_right(&self) -> &'a str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
/// Returns a string with characters that match `to_trim` removed.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * to_trim - a character matcher
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
2014-04-22 00:21:37 -05:00
|
|
|
/// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar")
|
|
|
|
/// assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar")
|
|
|
|
/// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar")
|
2013-09-25 18:18:50 -05:00
|
|
|
/// ```
|
2014-04-22 00:21:37 -05:00
|
|
|
fn trim_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
/// Returns a string with leading `chars_to_trim` removed.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * to_trim - a character matcher
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
2014-04-22 00:21:37 -05:00
|
|
|
/// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11")
|
|
|
|
/// assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12")
|
|
|
|
/// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123")
|
2013-09-25 18:18:50 -05:00
|
|
|
/// ```
|
2014-04-22 00:21:37 -05:00
|
|
|
fn trim_left_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
/// Returns a string with trailing `chars_to_trim` removed.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * to_trim - a character matcher
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
2014-04-22 00:21:37 -05:00
|
|
|
/// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar")
|
|
|
|
/// assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar")
|
|
|
|
/// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar")
|
2013-09-25 18:18:50 -05:00
|
|
|
/// ```
|
2014-04-22 00:21:37 -05:00
|
|
|
fn trim_right_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Replace all occurrences of one string with another.
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// * `from` - The string to replace
|
|
|
|
/// * `to` - The replacement string
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
2014-04-20 23:49:39 -05:00
|
|
|
/// The original string with all occurrences of `from` replaced with `to`.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
2014-05-01 00:32:13 -05:00
|
|
|
/// let s = "Do you know the muffin man,
|
|
|
|
/// The muffin man, the muffin man, ...".to_owned();
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// assert_eq!(s.replace("muffin man", "little lamb"),
|
2014-05-01 00:32:13 -05:00
|
|
|
/// "Do you know the little lamb,
|
|
|
|
/// The little lamb, the little lamb, ...".to_owned());
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// // not found, so no change.
|
|
|
|
/// assert_eq!(s.replace("cookie monster", "little lamb"), s);
|
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn replace(&self, from: &str, to: &str) -> ~str;
|
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Copy a slice into a new owned str.
|
2013-09-25 18:18:50 -05:00
|
|
|
fn to_owned(&self) -> ~str;
|
|
|
|
|
|
|
|
/// Converts to a vector of `u16` encoded as UTF-16.
|
|
|
|
fn to_utf16(&self) -> ~[u16];
|
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Check that `index`-th byte lies at the start and/or end of a
|
|
|
|
/// UTF-8 code point sequence.
|
|
|
|
///
|
|
|
|
/// The start and end of the string (when `index == self.len()`)
|
|
|
|
/// are considered to be boundaries.
|
|
|
|
///
|
|
|
|
/// Fails if `index` is greater than `self.len()`.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Löwe 老虎 Léopard";
|
|
|
|
/// assert!(s.is_char_boundary(0));
|
|
|
|
/// // start of `老`
|
|
|
|
/// assert!(s.is_char_boundary(6));
|
|
|
|
/// assert!(s.is_char_boundary(s.len()));
|
|
|
|
///
|
|
|
|
/// // second byte of `ö`
|
|
|
|
/// assert!(!s.is_char_boundary(2));
|
|
|
|
///
|
|
|
|
/// // third byte of `老`
|
|
|
|
/// assert!(!s.is_char_boundary(8));
|
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn is_char_boundary(&self, index: uint) -> bool;
|
|
|
|
|
|
|
|
/// Pluck a character out of a string and return the index of the next
|
|
|
|
/// character.
|
|
|
|
///
|
|
|
|
/// This function can be used to iterate over the unicode characters of a
|
|
|
|
/// string.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// This example manually iterate through the characters of a
|
|
|
|
/// string; this should normally by done by `.chars()` or
|
|
|
|
/// `.char_indices`.
|
|
|
|
///
|
2013-09-25 18:18:50 -05:00
|
|
|
/// ```rust
|
2013-12-22 15:31:23 -06:00
|
|
|
/// use std::str::CharRange;
|
|
|
|
///
|
2013-09-25 18:18:50 -05:00
|
|
|
/// let s = "中华Việt Nam";
|
2013-12-05 05:49:34 -06:00
|
|
|
/// let mut i = 0u;
|
2013-09-25 18:18:50 -05:00
|
|
|
/// while i < s.len() {
|
|
|
|
/// let CharRange {ch, next} = s.char_range_at(i);
|
2013-09-25 00:16:43 -05:00
|
|
|
/// println!("{}: {}", i, ch);
|
2013-09-25 18:18:50 -05:00
|
|
|
/// i = next;
|
|
|
|
/// }
|
|
|
|
/// ```
|
|
|
|
///
|
2013-12-05 05:49:34 -06:00
|
|
|
/// ## Output
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
2014-02-15 01:44:22 -06:00
|
|
|
/// ```ignore
|
2013-09-25 18:18:50 -05:00
|
|
|
/// 0: 中
|
|
|
|
/// 3: 华
|
|
|
|
/// 6: V
|
|
|
|
/// 7: i
|
|
|
|
/// 8: ệ
|
|
|
|
/// 11: t
|
|
|
|
/// 12:
|
|
|
|
/// 13: N
|
|
|
|
/// 14: a
|
|
|
|
/// 15: m
|
|
|
|
/// ```
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * s - The string
|
|
|
|
/// * i - The byte offset of the char to extract
|
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
|
|
|
/// A record {ch: char, next: uint} containing the char value and the byte
|
|
|
|
/// index of the next unicode character.
|
|
|
|
///
|
|
|
|
/// # Failure
|
|
|
|
///
|
|
|
|
/// If `i` is greater than or equal to the length of the string.
|
|
|
|
/// If `i` is not the index of the beginning of a valid UTF-8 character.
|
|
|
|
fn char_range_at(&self, start: uint) -> CharRange;
|
|
|
|
|
|
|
|
/// Given a byte position and a str, return the previous char and its position.
|
|
|
|
///
|
|
|
|
/// This function can be used to iterate over a unicode string in reverse.
|
|
|
|
///
|
|
|
|
/// Returns 0 for next index if called on start index 0.
|
|
|
|
fn char_range_at_reverse(&self, start: uint) -> CharRange;
|
|
|
|
|
|
|
|
/// Plucks the character starting at the `i`th byte of a string
|
|
|
|
fn char_at(&self, i: uint) -> char;
|
|
|
|
|
|
|
|
/// Plucks the character ending at the `i`th byte of a string
|
|
|
|
fn char_at_reverse(&self, i: uint) -> char;
|
|
|
|
|
|
|
|
/// Work with the byte buffer of a string as a byte slice.
|
2013-12-10 01:16:18 -06:00
|
|
|
fn as_bytes(&self) -> &'a [u8];
|
2013-09-25 18:18:50 -05:00
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns the byte index of the first character of `self` that
|
|
|
|
/// matches `search`.
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
|
|
|
/// `Some` containing the byte index of the last matching character
|
|
|
|
/// or `None` if there is no match
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Löwe 老虎 Léopard";
|
|
|
|
///
|
|
|
|
/// assert_eq!(s.find('L'), Some(0));
|
|
|
|
/// assert_eq!(s.find('é'), Some(14));
|
|
|
|
///
|
|
|
|
/// // the first space
|
|
|
|
/// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5));
|
|
|
|
///
|
|
|
|
/// // neither are found
|
|
|
|
/// assert_eq!(s.find(&['1', '2']), None);
|
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn find<C: CharEq>(&self, search: C) -> Option<uint>;
|
|
|
|
|
2013-12-05 05:49:34 -06:00
|
|
|
/// Returns the byte index of the last character of `self` that
|
|
|
|
/// matches `search`.
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
|
|
|
/// `Some` containing the byte index of the last matching character
|
2013-12-05 05:49:34 -06:00
|
|
|
/// or `None` if there is no match.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Löwe 老虎 Léopard";
|
|
|
|
///
|
|
|
|
/// assert_eq!(s.rfind('L'), Some(13));
|
|
|
|
/// assert_eq!(s.rfind('é'), Some(14));
|
|
|
|
///
|
|
|
|
/// // the second space
|
|
|
|
/// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12));
|
|
|
|
///
|
|
|
|
/// // searches for an occurrence of either `1` or `2`, but neither are found
|
|
|
|
/// assert_eq!(s.rfind(&['1', '2']), None);
|
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
|
|
|
|
|
|
|
|
/// Returns the byte index of the first matching substring
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * `needle` - The string to search for
|
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
|
|
|
/// `Some` containing the byte index of the first matching substring
|
2013-12-05 05:49:34 -06:00
|
|
|
/// or `None` if there is no match.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Löwe 老虎 Léopard";
|
|
|
|
///
|
|
|
|
/// assert_eq!(s.find_str("老虎 L"), Some(6));
|
|
|
|
/// assert_eq!(s.find_str("muffin man"), None);
|
|
|
|
/// ```
|
2013-09-25 18:18:50 -05:00
|
|
|
fn find_str(&self, &str) -> Option<uint>;
|
|
|
|
|
|
|
|
/// Given a string, make a new string with repeated copies of it.
|
|
|
|
fn repeat(&self, nn: uint) -> ~str;
|
|
|
|
|
|
|
|
/// Retrieves the first character from a string slice and returns
|
|
|
|
/// it. This does not allocate a new string; instead, it returns a
|
|
|
|
/// slice that point one character beyond the character that was
|
2014-03-09 16:57:22 -05:00
|
|
|
/// shifted. If the string does not contain any characters,
|
|
|
|
/// a tuple of None and an empty string is returned instead.
|
2013-12-05 05:49:34 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Löwe 老虎 Léopard";
|
2013-12-22 15:31:23 -06:00
|
|
|
/// let (c, s1) = s.slice_shift_char();
|
2014-03-09 16:57:22 -05:00
|
|
|
/// assert_eq!(c, Some('L'));
|
2013-12-05 05:49:34 -06:00
|
|
|
/// assert_eq!(s1, "öwe 老虎 Léopard");
|
|
|
|
///
|
2013-12-22 15:31:23 -06:00
|
|
|
/// let (c, s2) = s1.slice_shift_char();
|
2014-03-09 16:57:22 -05:00
|
|
|
/// assert_eq!(c, Some('ö'));
|
2013-12-05 05:49:34 -06:00
|
|
|
/// assert_eq!(s2, "we 老虎 Léopard");
|
|
|
|
/// ```
|
2014-03-09 16:55:43 -05:00
|
|
|
fn slice_shift_char(&self) -> (Option<char>, &'a str);
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
/// Levenshtein Distance between two strings.
|
|
|
|
fn lev_distance(&self, t: &str) -> uint;
|
|
|
|
|
|
|
|
/// Returns the byte offset of an inner slice relative to an enclosing outer slice.
|
|
|
|
///
|
|
|
|
/// Fails if `inner` is not a direct slice contained within self.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let string = "a\nb\nc";
|
2013-12-05 05:49:34 -06:00
|
|
|
/// let lines: ~[&str] = string.lines().collect();
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
|
|
|
/// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
|
|
|
|
/// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
|
|
|
|
/// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
|
|
|
|
/// ```
|
|
|
|
fn subslice_offset(&self, inner: &str) -> uint;
|
|
|
|
|
2013-12-17 09:37:30 -06:00
|
|
|
/// Return an unsafe pointer to the strings buffer.
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
2013-12-17 09:37:30 -06:00
|
|
|
/// The caller must ensure that the string outlives this pointer,
|
|
|
|
/// and that it is not reallocated (e.g. by pushing to the
|
|
|
|
/// string).
|
|
|
|
fn as_ptr(&self) -> *u8;
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> StrSlice<'a> for &'a str {
|
2013-09-25 18:18:50 -05:00
|
|
|
#[inline]
|
|
|
|
fn contains<'a>(&self, needle: &'a str) -> bool {
|
|
|
|
self.find_str(needle).is_some()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn contains_char(&self, needle: char) -> bool {
|
|
|
|
self.find(needle).is_some()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn chars(&self) -> Chars<'a> {
|
|
|
|
Chars{string: *self}
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by .chars().rev()"]
|
|
|
|
fn chars_rev(&self) -> Rev<Chars<'a>> {
|
2014-01-23 13:41:57 -06:00
|
|
|
self.chars().rev()
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn bytes(&self) -> Bytes<'a> {
|
2013-09-25 18:18:50 -05:00
|
|
|
self.as_bytes().iter().map(|&b| b)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by .bytes().rev()"]
|
|
|
|
fn bytes_rev(&self) -> Rev<Bytes<'a>> {
|
2014-01-23 13:41:57 -06:00
|
|
|
self.bytes().rev()
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn char_indices(&self) -> CharOffsets<'a> {
|
|
|
|
CharOffsets{string: *self, iter: self.chars()}
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by .char_indices().rev()"]
|
|
|
|
fn char_indices_rev(&self) -> Rev<CharOffsets<'a>> {
|
2014-01-23 13:41:57 -06:00
|
|
|
self.char_indices().rev()
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep> {
|
|
|
|
CharSplits {
|
2013-09-25 18:18:50 -05:00
|
|
|
string: *self,
|
|
|
|
only_ascii: sep.only_ascii(),
|
|
|
|
sep: sep,
|
|
|
|
allow_trailing_empty: true,
|
|
|
|
finished: false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint)
|
2014-01-14 21:32:24 -06:00
|
|
|
-> CharSplitsN<'a, Sep> {
|
|
|
|
CharSplitsN {
|
2013-11-23 04:18:51 -06:00
|
|
|
iter: self.split(sep),
|
2013-09-25 18:18:50 -05:00
|
|
|
count: count,
|
|
|
|
invert: false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn split_terminator<Sep: CharEq>(&self, sep: Sep)
|
2014-01-14 21:32:24 -06:00
|
|
|
-> CharSplits<'a, Sep> {
|
|
|
|
CharSplits {
|
2013-09-25 18:18:50 -05:00
|
|
|
allow_trailing_empty: false,
|
2013-11-23 04:18:51 -06:00
|
|
|
..self.split(sep)
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
#[deprecated = "replaced by .split(sep).rev()"]
|
|
|
|
fn rsplit<Sep: CharEq>(&self, sep: Sep) -> Rev<CharSplits<'a, Sep>> {
|
2014-01-23 13:41:57 -06:00
|
|
|
self.split(sep).rev()
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint)
|
2014-01-14 21:32:24 -06:00
|
|
|
-> CharSplitsN<'a, Sep> {
|
|
|
|
CharSplitsN {
|
2013-11-23 04:18:51 -06:00
|
|
|
iter: self.split(sep),
|
2013-09-25 18:18:50 -05:00
|
|
|
count: count,
|
|
|
|
invert: true,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn match_indices(&self, sep: &'a str) -> MatchIndices<'a> {
|
2013-09-25 18:18:50 -05:00
|
|
|
assert!(!sep.is_empty())
|
2014-01-14 21:32:24 -06:00
|
|
|
MatchIndices {
|
2013-09-25 18:18:50 -05:00
|
|
|
haystack: *self,
|
|
|
|
needle: sep,
|
|
|
|
position: 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn split_str(&self, sep: &'a str) -> StrSplits<'a> {
|
|
|
|
StrSplits {
|
2013-11-23 04:18:51 -06:00
|
|
|
it: self.match_indices(sep),
|
2013-09-25 18:18:50 -05:00
|
|
|
last_end: 0,
|
|
|
|
finished: false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn lines(&self) -> CharSplits<'a, char> {
|
2013-11-23 04:18:51 -06:00
|
|
|
self.split_terminator('\n')
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
2014-01-14 21:32:24 -06:00
|
|
|
fn lines_any(&self) -> AnyLines<'a> {
|
2013-11-20 16:17:12 -06:00
|
|
|
self.lines().map(|line| {
|
2013-09-25 18:18:50 -05:00
|
|
|
let l = line.len();
|
|
|
|
if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
|
|
|
|
else { line }
|
2013-11-20 16:17:12 -06:00
|
|
|
})
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn words(&self) -> Words<'a> {
|
2013-11-23 04:18:51 -06:00
|
|
|
self.split(char::is_whitespace).filter(|s| !s.is_empty())
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn nfd_chars(&self) -> Normalizations<'a> {
|
|
|
|
Normalizations {
|
2013-11-23 04:18:51 -06:00
|
|
|
iter: self.chars(),
|
2014-04-17 17:28:14 -05:00
|
|
|
buffer: Vec::new(),
|
2013-09-25 18:18:50 -05:00
|
|
|
sorted: false,
|
|
|
|
kind: NFD
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-01-14 21:32:24 -06:00
|
|
|
fn nfkd_chars(&self) -> Normalizations<'a> {
|
|
|
|
Normalizations {
|
2013-11-23 04:18:51 -06:00
|
|
|
iter: self.chars(),
|
2014-04-17 17:28:14 -05:00
|
|
|
buffer: Vec::new(),
|
2013-09-25 18:18:50 -05:00
|
|
|
sorted: false,
|
|
|
|
kind: NFKD
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn is_whitespace(&self) -> bool { self.chars().all(char::is_whitespace) }
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
#[inline]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn is_alphanumeric(&self) -> bool { self.chars().all(char::is_alphanumeric) }
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
#[inline]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn char_len(&self) -> uint { self.chars().len() }
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice(&self, begin: uint, end: uint) -> &'a str {
|
2013-09-25 18:18:50 -05:00
|
|
|
assert!(self.is_char_boundary(begin) && self.is_char_boundary(end));
|
|
|
|
unsafe { raw::slice_bytes(*self, begin, end) }
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice_from(&self, begin: uint) -> &'a str {
|
2013-09-25 18:18:50 -05:00
|
|
|
self.slice(begin, self.len())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice_to(&self, end: uint) -> &'a str {
|
2013-09-25 18:18:50 -05:00
|
|
|
assert!(self.is_char_boundary(end));
|
|
|
|
unsafe { raw::slice_bytes(*self, 0, end) }
|
|
|
|
}
|
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
fn slice_chars(&self, begin: uint, end: uint) -> &'a str {
|
2013-09-25 18:18:50 -05:00
|
|
|
assert!(begin <= end);
|
|
|
|
let mut count = 0;
|
|
|
|
let mut begin_byte = None;
|
|
|
|
let mut end_byte = None;
|
|
|
|
|
|
|
|
// This could be even more efficient by not decoding,
|
|
|
|
// only finding the char boundaries
|
2013-11-23 04:18:51 -06:00
|
|
|
for (idx, _) in self.char_indices() {
|
2013-09-25 18:18:50 -05:00
|
|
|
if count == begin { begin_byte = Some(idx); }
|
|
|
|
if count == end { end_byte = Some(idx); break; }
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
|
2013-08-18 15:15:47 -05:00
|
|
|
if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
|
2013-06-11 06:37:22 -05:00
|
|
|
|
2013-08-18 15:15:47 -05:00
|
|
|
match (begin_byte, end_byte) {
|
2013-10-21 15:08:31 -05:00
|
|
|
(None, _) => fail!("slice_chars: `begin` is beyond end of string"),
|
|
|
|
(_, None) => fail!("slice_chars: `end` is beyond end of string"),
|
2013-08-18 15:15:47 -05:00
|
|
|
(Some(a), Some(b)) => unsafe { raw::slice_bytes(*self, a, b) }
|
|
|
|
}
|
2013-06-11 06:37:22 -05:00
|
|
|
}
|
|
|
|
|
2013-10-17 01:02:46 -05:00
|
|
|
#[inline]
|
2013-03-25 15:21:04 -05:00
|
|
|
fn starts_with<'a>(&self, needle: &'a str) -> bool {
|
2013-10-17 01:02:46 -05:00
|
|
|
let n = needle.len();
|
2013-10-18 00:32:46 -05:00
|
|
|
self.len() >= n && needle.as_bytes() == self.as_bytes().slice_to(n)
|
2013-03-04 21:36:15 -06:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-10-17 01:02:46 -05:00
|
|
|
#[inline]
|
2013-06-10 21:20:47 -05:00
|
|
|
fn ends_with(&self, needle: &str) -> bool {
|
2013-10-17 01:02:46 -05:00
|
|
|
let (m, n) = (self.len(), needle.len());
|
2013-10-18 00:32:46 -05:00
|
|
|
m >= n && needle.as_bytes() == self.as_bytes().slice_from(m - n)
|
2013-06-10 10:03:16 -05:00
|
|
|
}
|
|
|
|
|
2013-06-11 07:13:23 -05:00
|
|
|
fn escape_default(&self) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut out = StrBuf::with_capacity(self.len());
|
2013-11-23 04:18:51 -06:00
|
|
|
for c in self.chars() {
|
2013-11-20 16:17:12 -06:00
|
|
|
c.escape_default(|c| out.push_char(c));
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
out.into_owned()
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn escape_unicode(&self) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut out = StrBuf::with_capacity(self.len());
|
2013-11-23 04:18:51 -06:00
|
|
|
for c in self.chars() {
|
2013-11-20 16:17:12 -06:00
|
|
|
c.escape_unicode(|c| out.push_char(c));
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
out.into_owned()
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
2012-07-23 13:51:12 -05:00
|
|
|
|
2012-10-08 10:06:25 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn trim(&self) -> &'a str {
|
2013-06-10 06:03:16 -05:00
|
|
|
self.trim_left().trim_right()
|
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2012-10-08 10:06:25 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn trim_left(&self) -> &'a str {
|
2014-04-22 00:21:37 -05:00
|
|
|
self.trim_left_chars(char::is_whitespace)
|
2013-06-10 06:03:16 -05:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2012-10-08 10:06:25 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn trim_right(&self) -> &'a str {
|
2014-04-22 00:21:37 -05:00
|
|
|
self.trim_right_chars(char::is_whitespace)
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn trim_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
|
|
|
|
let cur = match self.find(|c: char| !to_trim.matches(c)) {
|
|
|
|
None => "",
|
|
|
|
Some(i) => unsafe { raw::slice_bytes(*self, i, self.len()) }
|
|
|
|
};
|
|
|
|
match cur.rfind(|c: char| !to_trim.matches(c)) {
|
|
|
|
None => "",
|
|
|
|
Some(i) => {
|
|
|
|
let right = cur.char_range_at(i).next;
|
|
|
|
unsafe { raw::slice_bytes(cur, 0, right) }
|
|
|
|
}
|
|
|
|
}
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
|
2013-03-21 16:59:33 -05:00
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn trim_left_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
|
2013-06-11 10:32:49 -05:00
|
|
|
match self.find(|c: char| !to_trim.matches(c)) {
|
2013-06-10 06:03:16 -05:00
|
|
|
None => "",
|
|
|
|
Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
|
|
|
|
}
|
2013-03-21 16:59:33 -05:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-03-21 16:59:33 -05:00
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn trim_right_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
|
2013-06-11 10:32:49 -05:00
|
|
|
match self.rfind(|c: char| !to_trim.matches(c)) {
|
2013-06-10 06:03:16 -05:00
|
|
|
None => "",
|
|
|
|
Some(last) => {
|
2013-06-10 06:46:36 -05:00
|
|
|
let next = self.char_range_at(last).next;
|
|
|
|
unsafe { raw::slice_bytes(*self, 0u, next) }
|
2013-06-10 06:03:16 -05:00
|
|
|
}
|
|
|
|
}
|
2013-03-21 16:59:33 -05:00
|
|
|
}
|
|
|
|
|
2013-08-09 03:25:24 -05:00
|
|
|
fn replace(&self, from: &str, to: &str) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::new();
|
2013-06-14 21:40:11 -05:00
|
|
|
let mut last_end = 0;
|
2013-11-23 04:18:51 -06:00
|
|
|
for (start, end) in self.match_indices(from) {
|
2013-06-11 06:46:40 -05:00
|
|
|
result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
|
|
|
|
result.push_str(to);
|
|
|
|
last_end = end;
|
|
|
|
}
|
|
|
|
result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
|
2014-04-02 18:54:22 -05:00
|
|
|
result.into_owned()
|
2013-06-11 06:46:40 -05:00
|
|
|
}
|
|
|
|
|
2013-08-04 15:22:56 -05:00
|
|
|
#[inline]
|
|
|
|
fn to_owned(&self) -> ~str {
|
2013-12-17 09:37:30 -06:00
|
|
|
let len = self.len();
|
|
|
|
unsafe {
|
2014-04-17 17:28:14 -05:00
|
|
|
let mut v = Vec::with_capacity(len);
|
2013-08-04 15:22:56 -05:00
|
|
|
|
2013-12-17 09:37:30 -06:00
|
|
|
ptr::copy_memory(v.as_mut_ptr(), self.as_ptr(), len);
|
|
|
|
v.set_len(len);
|
2014-04-17 17:28:14 -05:00
|
|
|
::cast::transmute(v.move_iter().collect::<~[u8]>())
|
2013-12-17 09:37:30 -06:00
|
|
|
}
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2013-06-13 10:44:15 -05:00
|
|
|
fn to_utf16(&self) -> ~[u16] {
|
2014-04-17 17:28:14 -05:00
|
|
|
let mut u = Vec::new();;
|
2013-11-23 04:18:51 -06:00
|
|
|
for ch in self.chars() {
|
2014-04-11 14:49:31 -05:00
|
|
|
let mut buf = [0u16, ..2];
|
|
|
|
let n = ch.encode_utf16(buf /* as mut slice! */);
|
|
|
|
u.push_all(buf.slice_to(n));
|
2013-06-13 10:44:15 -05:00
|
|
|
}
|
2014-04-17 17:28:14 -05:00
|
|
|
u.move_iter().collect()
|
2013-06-13 10:44:15 -05:00
|
|
|
}
|
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
#[inline]
|
2013-06-10 06:46:36 -05:00
|
|
|
fn is_char_boundary(&self, index: uint) -> bool {
|
|
|
|
if index == self.len() { return true; }
|
|
|
|
let b = self[index];
|
|
|
|
return b < 128u8 || b >= 192u8;
|
|
|
|
}
|
|
|
|
|
2013-07-10 13:32:59 -05:00
|
|
|
#[inline]
|
2013-06-10 06:46:36 -05:00
|
|
|
fn char_range_at(&self, i: uint) -> CharRange {
|
2014-01-19 02:21:14 -06:00
|
|
|
if self[i] < 128u8 {
|
2013-07-10 13:32:59 -05:00
|
|
|
return CharRange {ch: self[i] as char, next: i + 1 };
|
2013-06-10 06:46:36 -05:00
|
|
|
}
|
2013-07-10 13:32:59 -05:00
|
|
|
|
|
|
|
// Multibyte case is a fn to allow char_range_at to inline cleanly
|
|
|
|
fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
|
2014-02-06 01:56:27 -06:00
|
|
|
let mut val = s[i] as u32;
|
2014-04-01 22:39:26 -05:00
|
|
|
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
|
2013-07-10 13:32:59 -05:00
|
|
|
assert!((w != 0));
|
|
|
|
|
2013-07-30 11:39:31 -05:00
|
|
|
val = utf8_first_byte!(val, w);
|
|
|
|
val = utf8_acc_cont_byte!(val, s[i + 1]);
|
|
|
|
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
|
|
|
|
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
|
2013-07-10 13:32:59 -05:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
return CharRange {ch: unsafe { transmute(val) }, next: i + w};
|
2013-07-10 13:32:59 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return multibyte_char_range_at(*self, i);
|
2013-06-10 06:46:36 -05:00
|
|
|
}
|
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
#[inline]
|
2013-06-10 06:46:36 -05:00
|
|
|
fn char_range_at_reverse(&self, start: uint) -> CharRange {
|
|
|
|
let mut prev = start;
|
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
prev = prev.saturating_sub(1);
|
|
|
|
if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
|
2013-06-10 06:46:36 -05:00
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
// Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
|
2013-09-25 18:18:50 -05:00
|
|
|
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
|
2013-08-18 06:57:34 -05:00
|
|
|
// while there is a previous byte == 10......
|
|
|
|
while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
|
|
|
|
i -= 1u;
|
|
|
|
}
|
2013-06-10 06:46:36 -05:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
let mut val = s[i] as u32;
|
2014-04-01 22:39:26 -05:00
|
|
|
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
|
2013-08-18 06:57:34 -05:00
|
|
|
assert!((w != 0));
|
2013-06-10 06:46:36 -05:00
|
|
|
|
2013-08-18 06:57:34 -05:00
|
|
|
val = utf8_first_byte!(val, w);
|
|
|
|
val = utf8_acc_cont_byte!(val, s[i + 1]);
|
|
|
|
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
|
|
|
|
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
|
2013-06-10 06:46:36 -05:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
return CharRange {ch: unsafe { transmute(val) }, next: i};
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
2013-06-10 06:46:36 -05:00
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
return multibyte_char_range_at_reverse(*self, prev);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn char_at(&self, i: uint) -> char {
|
|
|
|
self.char_range_at(i).ch
|
2013-06-10 06:46:36 -05:00
|
|
|
}
|
2013-03-14 20:08:20 -05:00
|
|
|
|
2013-03-15 02:32:11 -05:00
|
|
|
#[inline]
|
2013-03-21 23:20:48 -05:00
|
|
|
fn char_at_reverse(&self, i: uint) -> char {
|
2013-06-10 06:46:36 -05:00
|
|
|
self.char_range_at_reverse(i).ch
|
2013-03-15 02:32:11 -05:00
|
|
|
}
|
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
#[inline]
|
2013-12-10 01:16:18 -06:00
|
|
|
fn as_bytes(&self) -> &'a [u8] {
|
2013-08-04 15:22:56 -05:00
|
|
|
unsafe { cast::transmute(*self) }
|
|
|
|
}
|
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
fn find<C: CharEq>(&self, mut search: C) -> Option<uint> {
|
2013-06-09 22:09:51 -05:00
|
|
|
if search.only_ascii() {
|
2013-11-23 04:18:51 -06:00
|
|
|
self.bytes().position(|b| search.matches(b as char))
|
2013-06-09 22:09:51 -05:00
|
|
|
} else {
|
2013-11-23 04:18:51 -06:00
|
|
|
for (index, c) in self.char_indices() {
|
2013-06-09 22:09:51 -05:00
|
|
|
if search.matches(c) { return Some(index); }
|
|
|
|
}
|
2013-08-30 13:00:17 -05:00
|
|
|
None
|
2013-06-09 22:09:51 -05:00
|
|
|
}
|
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
fn rfind<C: CharEq>(&self, mut search: C) -> Option<uint> {
|
2013-06-09 22:09:51 -05:00
|
|
|
if search.only_ascii() {
|
2013-11-23 04:18:51 -06:00
|
|
|
self.bytes().rposition(|b| search.matches(b as char))
|
2013-06-09 22:09:51 -05:00
|
|
|
} else {
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
for (index, c) in self.char_indices().rev() {
|
2013-06-09 22:09:51 -05:00
|
|
|
if search.matches(c) { return Some(index); }
|
|
|
|
}
|
2013-08-30 13:00:17 -05:00
|
|
|
None
|
2013-06-09 22:09:51 -05:00
|
|
|
}
|
|
|
|
}
|
2013-06-10 01:23:05 -05:00
|
|
|
|
|
|
|
fn find_str(&self, needle: &str) -> Option<uint> {
|
|
|
|
if needle.is_empty() {
|
|
|
|
Some(0)
|
|
|
|
} else {
|
2013-11-23 04:18:51 -06:00
|
|
|
self.match_indices(needle)
|
2013-06-10 01:23:05 -05:00
|
|
|
.next()
|
2013-09-20 01:08:47 -05:00
|
|
|
.map(|(start, _end)| start)
|
2013-06-10 01:23:05 -05:00
|
|
|
}
|
|
|
|
}
|
2013-06-10 21:05:42 -05:00
|
|
|
|
2013-08-04 15:22:56 -05:00
|
|
|
fn repeat(&self, nn: uint) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut ret = StrBuf::with_capacity(nn * self.len());
|
2013-09-10 17:53:21 -05:00
|
|
|
for _ in range(0, nn) {
|
|
|
|
ret.push_str(*self);
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
ret.into_owned()
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2013-06-10 21:20:47 -05:00
|
|
|
#[inline]
|
2014-03-09 16:55:43 -05:00
|
|
|
fn slice_shift_char(&self) -> (Option<char>, &'a str) {
|
|
|
|
if self.is_empty() {
|
|
|
|
return (None, *self);
|
|
|
|
} else {
|
|
|
|
let CharRange {ch, next} = self.char_range_at(0u);
|
|
|
|
let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
|
|
|
|
return (Some(ch), next_s);
|
|
|
|
}
|
2013-06-10 21:20:47 -05:00
|
|
|
}
|
|
|
|
|
2013-06-13 10:39:06 -05:00
|
|
|
fn lev_distance(&self, t: &str) -> uint {
|
|
|
|
let slen = self.len();
|
|
|
|
let tlen = t.len();
|
|
|
|
|
|
|
|
if slen == 0 { return tlen; }
|
|
|
|
if tlen == 0 { return slen; }
|
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
let mut dcol = Vec::from_fn(tlen + 1, |x| x);
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
for (i, sc) in self.chars().enumerate() {
|
2013-06-13 10:39:06 -05:00
|
|
|
|
|
|
|
let mut current = i;
|
2014-04-17 17:28:14 -05:00
|
|
|
*dcol.get_mut(0) = current + 1;
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
for (j, tc) in t.chars().enumerate() {
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
let next = *dcol.get(j + 1);
|
2013-06-13 10:39:06 -05:00
|
|
|
|
|
|
|
if sc == tc {
|
2014-04-17 17:28:14 -05:00
|
|
|
*dcol.get_mut(j + 1) = current;
|
2013-06-13 10:39:06 -05:00
|
|
|
} else {
|
2014-04-17 17:28:14 -05:00
|
|
|
*dcol.get_mut(j + 1) = ::cmp::min(current, next);
|
|
|
|
*dcol.get_mut(j + 1) = ::cmp::min(*dcol.get(j + 1),
|
|
|
|
*dcol.get(j)) + 1;
|
2013-06-13 10:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
current = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
return *dcol.get(tlen);
|
2013-06-13 10:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn subslice_offset(&self, inner: &str) -> uint {
|
2013-12-17 09:37:30 -06:00
|
|
|
let a_start = self.as_ptr() as uint;
|
|
|
|
let a_end = a_start + self.len();
|
|
|
|
let b_start = inner.as_ptr() as uint;
|
|
|
|
let b_end = b_start + inner.len();
|
|
|
|
|
|
|
|
assert!(a_start <= b_start);
|
|
|
|
assert!(b_end <= a_end);
|
|
|
|
b_start - a_start
|
2013-06-13 10:39:06 -05:00
|
|
|
}
|
|
|
|
|
2013-07-10 19:33:11 -05:00
|
|
|
#[inline]
|
2013-12-17 09:37:30 -06:00
|
|
|
fn as_ptr(&self) -> *u8 {
|
|
|
|
self.repr().data
|
2013-07-10 19:33:11 -05:00
|
|
|
}
|
2012-03-16 19:35:38 -05:00
|
|
|
}
|
2012-01-24 03:29:45 -06:00
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Methods for owned strings
|
2013-01-31 19:12:29 -06:00
|
|
|
pub trait OwnedStr {
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Consumes the string, returning the underlying byte buffer.
|
|
|
|
///
|
|
|
|
/// The buffer does not have a null terminator.
|
2013-08-24 01:37:22 -05:00
|
|
|
fn into_bytes(self) -> ~[u8];
|
2013-06-10 22:10:37 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
/// Pushes the given string onto this string, returning the concatenation of the two strings.
|
|
|
|
fn append(self, rhs: &str) -> ~str;
|
2013-01-31 19:12:29 -06:00
|
|
|
}
|
|
|
|
|
2013-02-26 19:12:00 -06:00
|
|
|
impl OwnedStr for ~str {
|
2013-08-24 01:37:22 -05:00
|
|
|
#[inline]
|
|
|
|
fn into_bytes(self) -> ~[u8] {
|
|
|
|
unsafe { cast::transmute(self) }
|
|
|
|
}
|
2013-08-24 00:05:35 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
|
|
|
fn append(self, rhs: &str) -> ~str {
|
|
|
|
let mut new_str = StrBuf::from_owned_str(self);
|
|
|
|
new_str.push_str(rhs);
|
|
|
|
new_str.into_owned()
|
|
|
|
}
|
2013-01-31 19:12:29 -06:00
|
|
|
}
|
|
|
|
|
2013-03-15 17:26:59 -05:00
|
|
|
impl Clone for ~str {
|
2013-06-18 16:45:18 -05:00
|
|
|
#[inline]
|
2013-03-15 17:26:59 -05:00
|
|
|
fn clone(&self) -> ~str {
|
2013-07-23 08:49:17 -05:00
|
|
|
self.to_owned()
|
2013-03-15 17:26:59 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
std: Move the iterator param on FromIterator and Extendable to the method.
If they are on the trait then it is extremely annoying to use them as
generic parameters to a function, e.g. with the iterator param on the trait
itself, if one was to pass an Extendable<int> to a function that filled it
either from a Range or a Map<VecIterator>, one needs to write something
like:
fn foo<E: Extendable<int, Range<int>> +
Extendable<int, Map<&'self int, int, VecIterator<int>>>
(e: &mut E, ...) { ... }
since using a generic, i.e. `foo<E: Extendable<int, I>, I: Iterator<int>>`
means that `foo` takes 2 type parameters, and the caller has to specify them
(which doesn't work anyway, as they'll mismatch with the iterators used in
`foo` itself).
This patch changes it to:
fn foo<E: Extendable<int>>(e: &mut E, ...) { ... }
2013-08-13 08:08:14 -05:00
|
|
|
impl FromIterator<char> for ~str {
|
2013-07-28 19:40:28 -05:00
|
|
|
#[inline]
|
2014-03-30 23:45:55 -05:00
|
|
|
fn from_iter<T: Iterator<char>>(iterator: T) -> ~str {
|
2013-07-28 19:40:28 -05:00
|
|
|
let (lower, _) = iterator.size_hint();
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut buf = StrBuf::with_capacity(lower);
|
2013-07-29 19:17:17 -05:00
|
|
|
buf.extend(iterator);
|
2014-04-02 18:54:22 -05:00
|
|
|
buf.into_owned()
|
2013-07-28 19:40:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-17 02:05:51 -05:00
|
|
|
// This works because every lifetime is a sub-lifetime of 'static
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a> Default for &'a str {
|
|
|
|
fn default() -> &'a str { "" }
|
2013-06-17 02:05:51 -05:00
|
|
|
}
|
|
|
|
|
2013-08-10 08:38:00 -05:00
|
|
|
impl Default for ~str {
|
2014-04-15 20:17:48 -05:00
|
|
|
fn default() -> ~str { "".to_owned() }
|
2013-06-14 20:27:52 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2014-01-07 00:33:37 -06:00
|
|
|
use iter::AdditiveIterator;
|
2014-02-22 19:07:11 -06:00
|
|
|
use default::Default;
|
2014-01-07 00:33:37 -06:00
|
|
|
use prelude::*;
|
2013-01-08 21:37:25 -06:00
|
|
|
use str::*;
|
2014-04-02 18:54:22 -05:00
|
|
|
use strbuf::StrBuf;
|
2012-03-23 16:41:02 -05:00
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
|
|
|
fn test_eq() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert!((eq(&"".to_owned(), &"".to_owned())));
|
|
|
|
assert!((eq(&"foo".to_owned(), &"foo".to_owned())));
|
|
|
|
assert!((!eq(&"foo".to_owned(), &"bar".to_owned())));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2012-09-03 12:47:10 -05:00
|
|
|
#[test]
|
|
|
|
fn test_eq_slice() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert!((eq_slice("foobar".slice(0, 3), "foo")));
|
|
|
|
assert!((eq_slice("barfoo".slice(3, 6), "foo")));
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!((!eq_slice("foo1", "foo2")));
|
2012-09-03 12:47:10 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-02-03 05:28:49 -06:00
|
|
|
fn test_le() {
|
2013-06-13 22:37:47 -05:00
|
|
|
assert!("" <= "");
|
|
|
|
assert!("" <= "foo");
|
|
|
|
assert!("foo" <= "foo");
|
2013-06-27 10:45:24 -05:00
|
|
|
assert!("foo" != "bar");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2012-02-12 03:32:09 -06:00
|
|
|
fn test_len() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("".len(), 0u);
|
|
|
|
assert_eq!("hello world".len(), 11u);
|
|
|
|
assert_eq!("\x63".len(), 1u);
|
|
|
|
assert_eq!("\xa2".len(), 2u);
|
|
|
|
assert_eq!("\u03c0".len(), 2u);
|
|
|
|
assert_eq!("\u2620".len(), 3u);
|
|
|
|
assert_eq!("\U0001d11e".len(), 4u);
|
2013-05-18 21:02:45 -05:00
|
|
|
|
2013-06-11 06:37:22 -05:00
|
|
|
assert_eq!("".char_len(), 0u);
|
|
|
|
assert_eq!("hello world".char_len(), 11u);
|
|
|
|
assert_eq!("\x63".char_len(), 1u);
|
|
|
|
assert_eq!("\xa2".char_len(), 1u);
|
|
|
|
assert_eq!("\u03c0".char_len(), 1u);
|
|
|
|
assert_eq!("\u2620".char_len(), 1u);
|
|
|
|
assert_eq!("\U0001d11e".char_len(), 1u);
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-06-09 22:09:51 -05:00
|
|
|
fn test_find() {
|
|
|
|
assert_eq!("hello".find('l'), Some(2u));
|
|
|
|
assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
|
|
|
|
assert!("hello".find('x').is_none());
|
|
|
|
assert!("hello".find(|c:char| c == 'x').is_none());
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_rfind() {
|
|
|
|
assert_eq!("hello".rfind('l'), Some(3u));
|
|
|
|
assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
|
|
|
|
assert!("hello".rfind('x').is_none());
|
|
|
|
assert!("hello".rfind(|c:char| c == 'x').is_none());
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
|
2012-02-11 07:03:03 -06:00
|
|
|
}
|
|
|
|
|
2013-07-28 19:40:28 -05:00
|
|
|
#[test]
|
|
|
|
fn test_collect() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let empty = "".to_owned();
|
2013-11-23 04:18:51 -06:00
|
|
|
let s: ~str = empty.chars().collect();
|
2013-09-27 19:02:31 -05:00
|
|
|
assert_eq!(empty, s);
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中".to_owned();
|
2013-11-23 04:18:51 -06:00
|
|
|
let s: ~str = data.chars().collect();
|
2013-09-27 19:02:31 -05:00
|
|
|
assert_eq!(data, s);
|
2013-07-28 19:40:28 -05:00
|
|
|
}
|
|
|
|
|
2013-08-24 01:37:22 -05:00
|
|
|
#[test]
|
|
|
|
fn test_into_bytes() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "asdf".to_owned();
|
2013-08-24 01:37:22 -05:00
|
|
|
let buf = data.into_bytes();
|
|
|
|
assert_eq!(bytes!("asdf"), buf.as_slice());
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-02-23 09:59:30 -06:00
|
|
|
fn test_find_str() {
|
2012-02-13 02:17:59 -06:00
|
|
|
// byte positions
|
2013-06-10 01:23:05 -05:00
|
|
|
assert_eq!("".find_str(""), Some(0u));
|
|
|
|
assert!("banana".find_str("apple pie").is_none());
|
2012-02-16 21:16:08 -06:00
|
|
|
|
2013-05-23 11:39:17 -05:00
|
|
|
let data = "abcabc";
|
2013-06-10 01:23:05 -05:00
|
|
|
assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
|
2013-06-10 08:01:45 -05:00
|
|
|
assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
|
2013-06-10 01:23:05 -05:00
|
|
|
assert!(data.slice(2u, 4u).find_str("ab").is_none());
|
2012-02-13 05:07:29 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let mut data = "ประเทศไทย中华Việt Nam".to_owned();
|
2012-10-19 08:01:01 -05:00
|
|
|
data = data + data;
|
2013-06-10 01:23:05 -05:00
|
|
|
assert!(data.find_str("ไท华").is_none());
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
|
|
|
|
assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
|
|
|
|
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
|
|
|
|
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-06-11 06:37:22 -05:00
|
|
|
fn test_slice_chars() {
|
|
|
|
fn t(a: &str, b: &str, start: uint) {
|
|
|
|
assert_eq!(a.slice_chars(start, start + b.char_len()), b);
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
2013-08-29 10:11:11 -05:00
|
|
|
t("", "", 0);
|
2013-03-21 16:59:33 -05:00
|
|
|
t("hello", "llo", 2);
|
|
|
|
t("hello", "el", 1);
|
2013-08-29 10:11:11 -05:00
|
|
|
t("αβλ", "β", 1);
|
|
|
|
t("αβλ", "", 3);
|
2013-06-11 06:37:22 -05:00
|
|
|
assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_concat() {
|
2012-09-21 20:36:32 -05:00
|
|
|
fn t(v: &[~str], s: &str) {
|
2013-06-02 22:19:37 -05:00
|
|
|
assert_eq!(v.concat(), s.to_str());
|
2012-09-21 20:36:32 -05:00
|
|
|
}
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
|
|
|
|
"no".to_owned(), "good".to_owned()], "youknowI'mnogood");
|
2013-05-23 11:39:17 -05:00
|
|
|
let v: &[~str] = [];
|
|
|
|
t(v, "");
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["hi".to_owned()], "hi");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_connect() {
|
2012-09-21 20:36:32 -05:00
|
|
|
fn t(v: &[~str], sep: &str, s: &str) {
|
2013-06-02 22:19:37 -05:00
|
|
|
assert_eq!(v.connect(sep), s.to_str());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
|
|
|
|
"no".to_owned(), "good".to_owned()],
|
2013-05-23 11:39:17 -05:00
|
|
|
" ", "you know I'm no good");
|
2013-05-27 18:04:00 -05:00
|
|
|
let v: &[~str] = [];
|
2013-05-23 11:39:17 -05:00
|
|
|
t(v, " ", "");
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["hi".to_owned()], " ", "hi");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-02 22:19:37 -05:00
|
|
|
#[test]
|
|
|
|
fn test_concat_slices() {
|
|
|
|
fn t(v: &[&str], s: &str) {
|
|
|
|
assert_eq!(v.concat(), s.to_str());
|
|
|
|
}
|
|
|
|
t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
|
|
|
|
let v: &[&str] = [];
|
|
|
|
t(v, "");
|
|
|
|
t(["hi"], "hi");
|
|
|
|
}
|
|
|
|
|
2013-02-07 21:33:12 -06:00
|
|
|
#[test]
|
|
|
|
fn test_connect_slices() {
|
|
|
|
fn t(v: &[&str], sep: &str, s: &str) {
|
2013-06-02 22:19:37 -05:00
|
|
|
assert_eq!(v.connect(sep), s.to_str());
|
2013-02-07 21:33:12 -06:00
|
|
|
}
|
|
|
|
t(["you", "know", "I'm", "no", "good"],
|
|
|
|
" ", "you know I'm no good");
|
|
|
|
t([], " ", "");
|
|
|
|
t(["hi"], " ", "hi");
|
|
|
|
}
|
|
|
|
|
2012-10-11 18:54:31 -05:00
|
|
|
#[test]
|
|
|
|
fn test_repeat() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("x".repeat(4), "xxxx".to_owned());
|
|
|
|
assert_eq!("hi".repeat(4), "hihihihi".to_owned());
|
|
|
|
assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_owned());
|
|
|
|
assert_eq!("".repeat(4), "".to_owned());
|
|
|
|
assert_eq!("hi".repeat(0), "".to_owned());
|
2012-10-11 18:54:31 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-06-24 22:18:18 -05:00
|
|
|
fn test_unsafe_slice() {
|
2013-05-18 21:02:45 -05:00
|
|
|
assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
|
|
|
|
assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
|
|
|
|
assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
|
2013-03-21 05:58:03 -05:00
|
|
|
fn a_million_letter_a() -> ~str {
|
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
|
|
|
while i < 100000 {
|
|
|
|
rs.push_str("aaaaaaaaaa");
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
rs.into_owned()
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
2013-03-21 05:58:03 -05:00
|
|
|
fn half_a_million_letter_a() -> ~str {
|
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
|
|
|
while i < 100000 {
|
|
|
|
rs.push_str("aaaaa");
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
rs.into_owned()
|
2013-03-21 05:58:03 -05:00
|
|
|
}
|
|
|
|
let letters = a_million_letter_a();
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(half_a_million_letter_a() ==
|
2013-03-21 06:36:21 -05:00
|
|
|
unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_starts_with() {
|
2013-06-10 10:03:16 -05:00
|
|
|
assert!(("".starts_with("")));
|
|
|
|
assert!(("abc".starts_with("")));
|
|
|
|
assert!(("abc".starts_with("a")));
|
|
|
|
assert!((!"a".starts_with("abc")));
|
|
|
|
assert!((!"".starts_with("abc")));
|
2013-10-18 00:32:46 -05:00
|
|
|
assert!((!"ödd".starts_with("-")));
|
|
|
|
assert!(("ödd".starts_with("öd")));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_ends_with() {
|
2013-06-10 10:03:16 -05:00
|
|
|
assert!(("".ends_with("")));
|
|
|
|
assert!(("abc".ends_with("")));
|
|
|
|
assert!(("abc".ends_with("c")));
|
|
|
|
assert!((!"a".ends_with("abc")));
|
|
|
|
assert!((!"".ends_with("abc")));
|
2013-10-18 00:32:46 -05:00
|
|
|
assert!((!"ddö".ends_with("-")));
|
|
|
|
assert!(("ddö".ends_with("dö")));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_is_empty() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert!("".is_empty());
|
|
|
|
assert!(!"a".is_empty());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace() {
|
2013-05-23 11:39:17 -05:00
|
|
|
let a = "a";
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("".replace(a, "b"), "".to_owned());
|
|
|
|
assert_eq!("a".replace(a, "b"), "b".to_owned());
|
|
|
|
assert_eq!("ab".replace(a, "b"), "bb".to_owned());
|
2013-05-23 11:39:17 -05:00
|
|
|
let test = "test";
|
2013-06-11 06:46:40 -05:00
|
|
|
assert!(" test test ".replace(test, "toast") ==
|
2014-04-15 20:17:48 -05:00
|
|
|
" toast toast ".to_owned());
|
|
|
|
assert_eq!(" test test ".replace(test, ""), " ".to_owned());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2012-02-12 08:14:49 -06:00
|
|
|
#[test]
|
|
|
|
fn test_replace_2a() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let a = "ประเ".to_owned();
|
|
|
|
let a2 = "دولة الكويتทศไทย中华".to_owned();
|
2014-02-15 15:15:03 -06:00
|
|
|
assert_eq!(data.replace(a, repl), a2);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_2b() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let b = "ะเ".to_owned();
|
|
|
|
let b2 = "ปรدولة الكويتทศไทย中华".to_owned();
|
2014-02-15 15:15:03 -06:00
|
|
|
assert_eq!(data.replace(b, repl), b2);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_2c() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let c = "中华".to_owned();
|
|
|
|
let c2 = "ประเทศไทยدولة الكويت".to_owned();
|
2014-02-15 15:15:03 -06:00
|
|
|
assert_eq!(data.replace(c, repl), c2);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_2d() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let d = "ไท华".to_owned();
|
2013-06-11 06:46:40 -05:00
|
|
|
assert_eq!(data.replace(d, repl), data);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
2012-02-22 02:49:05 -06:00
|
|
|
#[test]
|
|
|
|
fn test_slice() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("ab", "abc".slice(0, 2));
|
|
|
|
assert_eq!("bc", "abc".slice(1, 3));
|
|
|
|
assert_eq!("", "abc".slice(1, 1));
|
|
|
|
assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
|
2012-07-14 00:57:48 -05:00
|
|
|
|
2013-03-21 05:58:03 -05:00
|
|
|
let data = "ประเทศไทย中华";
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("ป", data.slice(0, 3));
|
|
|
|
assert_eq!("ร", data.slice(3, 6));
|
|
|
|
assert_eq!("", data.slice(3, 3));
|
|
|
|
assert_eq!("华", data.slice(30, 33));
|
2012-07-14 00:57:48 -05:00
|
|
|
|
|
|
|
fn a_million_letter_X() -> ~str {
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
2012-09-21 20:36:32 -05:00
|
|
|
while i < 100000 {
|
2014-04-02 18:54:22 -05:00
|
|
|
rs.push_str("华华华华华华华华华华");
|
2012-09-21 20:36:32 -05:00
|
|
|
i += 1;
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
rs.into_owned()
|
2012-02-22 02:49:05 -06:00
|
|
|
}
|
2012-07-14 00:57:48 -05:00
|
|
|
fn half_a_million_letter_X() -> ~str {
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
|
|
|
while i < 100000 {
|
|
|
|
rs.push_str("华华华华华");
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
rs.into_owned()
|
2012-02-22 02:49:05 -06:00
|
|
|
}
|
2013-03-21 05:58:03 -05:00
|
|
|
let letters = a_million_letter_X();
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(half_a_million_letter_X() ==
|
2013-06-09 09:44:58 -05:00
|
|
|
letters.slice(0u, 3u * 500000u).to_owned());
|
2012-02-22 02:49:05 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2012-02-23 06:41:10 -06:00
|
|
|
fn test_slice_2() {
|
2013-03-21 05:58:03 -05:00
|
|
|
let ss = "中华Việt Nam";
|
2012-02-23 06:41:10 -06:00
|
|
|
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("华", ss.slice(3u, 6u));
|
|
|
|
assert_eq!("Việt Nam", ss.slice(6u, 16u));
|
2012-02-22 02:49:05 -06:00
|
|
|
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("ab", "abc".slice(0u, 2u));
|
|
|
|
assert_eq!("bc", "abc".slice(1u, 3u));
|
|
|
|
assert_eq!("", "abc".slice(1u, 1u));
|
2012-02-22 02:49:05 -06:00
|
|
|
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("中", ss.slice(0u, 3u));
|
|
|
|
assert_eq!("华V", ss.slice(3u, 7u));
|
|
|
|
assert_eq!("", ss.slice(3u, 3u));
|
2012-02-23 06:41:10 -06:00
|
|
|
/*0: 中
|
2012-02-22 02:49:05 -06:00
|
|
|
3: 华
|
|
|
|
6: V
|
|
|
|
7: i
|
|
|
|
8: ệ
|
|
|
|
11: t
|
|
|
|
12:
|
|
|
|
13: N
|
|
|
|
14: a
|
|
|
|
15: m */
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-02-23 06:41:10 -06:00
|
|
|
#[should_fail]
|
|
|
|
fn test_slice_fail() {
|
2013-06-09 09:44:58 -05:00
|
|
|
"中华Việt Nam".slice(0u, 2u);
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-09 22:09:51 -05:00
|
|
|
#[test]
|
|
|
|
fn test_slice_from() {
|
|
|
|
assert_eq!("abcd".slice_from(0), "abcd");
|
|
|
|
assert_eq!("abcd".slice_from(2), "cd");
|
|
|
|
assert_eq!("abcd".slice_from(4), "");
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_slice_to() {
|
|
|
|
assert_eq!("abcd".slice_to(0), "");
|
|
|
|
assert_eq!("abcd".slice_to(2), "ab");
|
|
|
|
assert_eq!("abcd".slice_to(4), "abcd");
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-09-05 18:39:06 -05:00
|
|
|
fn test_trim_left_chars() {
|
2013-06-11 10:32:49 -05:00
|
|
|
let v: &[char] = &[];
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
|
|
|
|
assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
|
|
|
|
assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
|
|
|
|
assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
|
2013-06-11 10:32:49 -05:00
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
|
|
|
|
assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
|
|
|
|
assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
|
2012-09-05 18:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim_right_chars() {
|
2013-06-11 10:32:49 -05:00
|
|
|
let v: &[char] = &[];
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
|
|
|
|
assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
|
|
|
|
assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
|
|
|
|
assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
|
2013-06-11 10:32:49 -05:00
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
|
|
|
|
assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
|
|
|
|
assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
|
2012-09-05 18:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim_chars() {
|
2013-06-11 10:32:49 -05:00
|
|
|
let v: &[char] = &[];
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
|
|
|
|
assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
|
|
|
|
assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
|
|
|
|
assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
|
2013-06-11 10:32:49 -05:00
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
|
|
|
|
assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
|
|
|
|
assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
|
2012-09-05 18:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2012-01-17 19:28:21 -06:00
|
|
|
fn test_trim_left() {
|
2013-06-10 06:03:16 -05:00
|
|
|
assert_eq!("".trim_left(), "");
|
|
|
|
assert_eq!("a".trim_left(), "a");
|
|
|
|
assert_eq!(" ".trim_left(), "");
|
|
|
|
assert_eq!(" blah".trim_left(), "blah");
|
|
|
|
assert_eq!(" \u3000 wut".trim_left(), "wut");
|
|
|
|
assert_eq!("hey ".trim_left(), "hey ");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim_right() {
|
2013-06-10 06:03:16 -05:00
|
|
|
assert_eq!("".trim_right(), "");
|
|
|
|
assert_eq!("a".trim_right(), "a");
|
|
|
|
assert_eq!(" ".trim_right(), "");
|
|
|
|
assert_eq!("blah ".trim_right(), "blah");
|
|
|
|
assert_eq!("wut \u3000 ".trim_right(), "wut");
|
|
|
|
assert_eq!(" hey".trim_right(), " hey");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim() {
|
2013-06-10 06:03:16 -05:00
|
|
|
assert_eq!("".trim(), "");
|
|
|
|
assert_eq!("a".trim(), "a");
|
|
|
|
assert_eq!(" ".trim(), "");
|
|
|
|
assert_eq!(" blah ".trim(), "blah");
|
|
|
|
assert_eq!("\nwut \u3000 ".trim(), "wut");
|
|
|
|
assert_eq!(" hey dude ".trim(), "hey dude");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_is_whitespace() {
|
2013-06-10 10:07:52 -05:00
|
|
|
assert!("".is_whitespace());
|
|
|
|
assert!(" ".is_whitespace());
|
|
|
|
assert!("\u2009".is_whitespace()); // Thin space
|
|
|
|
assert!(" \n\t ".is_whitespace());
|
|
|
|
assert!(!" _ ".is_whitespace());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2014-03-09 16:56:33 -05:00
|
|
|
#[test]
|
|
|
|
fn test_slice_shift_char() {
|
|
|
|
let data = "ประเทศไทย中";
|
|
|
|
assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_slice_shift_char_2() {
|
|
|
|
let empty = "";
|
|
|
|
assert_eq!(empty.slice_shift_char(), (None, ""));
|
|
|
|
}
|
|
|
|
|
2013-07-30 11:39:31 -05:00
|
|
|
#[test]
|
2013-08-02 11:34:00 -05:00
|
|
|
fn test_is_utf8() {
|
2013-09-03 19:36:55 -05:00
|
|
|
// deny overlong encodings
|
2013-07-30 11:39:31 -05:00
|
|
|
assert!(!is_utf8([0xc0, 0x80]));
|
|
|
|
assert!(!is_utf8([0xc0, 0xae]));
|
|
|
|
assert!(!is_utf8([0xe0, 0x80, 0x80]));
|
|
|
|
assert!(!is_utf8([0xe0, 0x80, 0xaf]));
|
|
|
|
assert!(!is_utf8([0xe0, 0x81, 0x81]));
|
|
|
|
assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
|
2013-08-02 11:34:00 -05:00
|
|
|
assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
|
|
|
|
|
2013-09-03 19:36:55 -05:00
|
|
|
// deny surrogates
|
|
|
|
assert!(!is_utf8([0xED, 0xA0, 0x80]));
|
|
|
|
assert!(!is_utf8([0xED, 0xBF, 0xBF]));
|
|
|
|
|
2013-08-02 11:34:00 -05:00
|
|
|
assert!(is_utf8([0xC2, 0x80]));
|
|
|
|
assert!(is_utf8([0xDF, 0xBF]));
|
|
|
|
assert!(is_utf8([0xE0, 0xA0, 0x80]));
|
2013-09-03 19:36:55 -05:00
|
|
|
assert!(is_utf8([0xED, 0x9F, 0xBF]));
|
|
|
|
assert!(is_utf8([0xEE, 0x80, 0x80]));
|
2013-08-02 11:34:00 -05:00
|
|
|
assert!(is_utf8([0xEF, 0xBF, 0xBF]));
|
|
|
|
assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
|
|
|
|
assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
|
2013-07-30 11:39:31 -05:00
|
|
|
}
|
|
|
|
|
2014-02-16 06:52:14 -06:00
|
|
|
#[test]
|
|
|
|
fn test_is_utf16() {
|
|
|
|
macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
|
|
|
|
|
|
|
|
// non-surrogates
|
|
|
|
pos!([0x0000],
|
|
|
|
[0x0001, 0x0002],
|
|
|
|
[0xD7FF],
|
|
|
|
[0xE000]);
|
|
|
|
|
|
|
|
// surrogate pairs (randomly generated with Python 3's
|
|
|
|
// .encode('utf-16be'))
|
|
|
|
pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
|
|
|
|
[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
|
|
|
|
[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
|
|
|
|
|
|
|
|
// mixtures (also random)
|
|
|
|
pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
|
|
|
|
[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
|
|
|
|
[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
|
|
|
|
|
|
|
|
// negative tests
|
|
|
|
macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
|
|
|
|
|
|
|
|
neg!(
|
|
|
|
// surrogate + regular unit
|
|
|
|
[0xdb45, 0x0000],
|
|
|
|
// surrogate + lead surrogate
|
|
|
|
[0xd900, 0xd900],
|
|
|
|
// unterminated surrogate
|
|
|
|
[0xd8ff],
|
|
|
|
// trail surrogate without a lead
|
|
|
|
[0xddb7]);
|
|
|
|
|
|
|
|
// random byte sequences that Python 3's .decode('utf-16be')
|
|
|
|
// failed on
|
|
|
|
neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
|
|
|
|
[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
|
|
|
|
[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
|
|
|
|
[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
|
|
|
|
[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
|
|
|
|
[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
|
|
|
|
[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
|
|
|
|
[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
|
|
|
|
[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
|
|
|
|
[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
|
|
|
|
[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
|
|
|
|
[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
|
|
|
|
[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
|
|
|
|
[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
|
|
|
|
[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
|
|
|
|
[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
|
|
|
|
[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
|
|
|
|
[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
|
|
|
|
[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
|
|
|
|
[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
|
|
|
|
[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2013-08-04 16:08:20 -05:00
|
|
|
fn test_raw_from_c_str() {
|
2012-06-24 22:18:18 -05:00
|
|
|
unsafe {
|
2013-08-04 16:08:20 -05:00
|
|
|
let a = ~[65, 65, 65, 65, 65, 65, 65, 0];
|
2013-12-15 06:35:12 -06:00
|
|
|
let b = a.as_ptr();
|
2013-08-04 16:08:20 -05:00
|
|
|
let c = raw::from_c_str(b);
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(c, "AAAAAAA".to_owned());
|
2012-06-24 22:18:18 -05:00
|
|
|
}
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-10 22:10:37 -05:00
|
|
|
#[test]
|
|
|
|
fn test_as_bytes() {
|
|
|
|
// no null
|
|
|
|
let v = [
|
|
|
|
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
|
|
|
|
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
|
|
|
|
109
|
|
|
|
];
|
|
|
|
assert_eq!("".as_bytes(), &[]);
|
|
|
|
assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
|
2014-02-28 03:23:06 -06:00
|
|
|
assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
|
2013-06-10 22:10:37 -05:00
|
|
|
}
|
|
|
|
|
2012-04-09 20:56:24 -05:00
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn test_as_bytes_fail() {
|
2013-06-10 22:10:37 -05:00
|
|
|
// Don't double free. (I'm not sure if this exercises the
|
|
|
|
// original problem code path anymore.)
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "".to_owned();
|
2013-07-03 22:02:09 -05:00
|
|
|
let _bytes = s.as_bytes();
|
2013-10-21 15:08:31 -05:00
|
|
|
fail!();
|
2012-04-09 20:56:24 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2013-12-17 09:37:30 -06:00
|
|
|
fn test_as_ptr() {
|
|
|
|
let buf = "hello".as_ptr();
|
|
|
|
unsafe {
|
2014-02-10 15:50:42 -06:00
|
|
|
assert_eq!(*buf.offset(0), 'h' as u8);
|
|
|
|
assert_eq!(*buf.offset(1), 'e' as u8);
|
|
|
|
assert_eq!(*buf.offset(2), 'l' as u8);
|
|
|
|
assert_eq!(*buf.offset(3), 'l' as u8);
|
|
|
|
assert_eq!(*buf.offset(4), 'o' as u8);
|
2013-12-17 09:37:30 -06:00
|
|
|
}
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-30 10:29:38 -05:00
|
|
|
#[test]
|
2013-04-10 16:51:41 -05:00
|
|
|
fn test_subslice_offset() {
|
|
|
|
let a = "kernelsprite";
|
2013-06-09 09:44:58 -05:00
|
|
|
let b = a.slice(7, a.len());
|
|
|
|
let c = a.slice(0, a.len() - 6);
|
2013-06-13 10:39:06 -05:00
|
|
|
assert_eq!(a.subslice_offset(b), 7);
|
|
|
|
assert_eq!(a.subslice_offset(c), 0);
|
2013-04-10 17:48:31 -05:00
|
|
|
|
|
|
|
let string = "a\nb\nc";
|
2014-04-17 17:28:14 -05:00
|
|
|
let lines: ~[&str] = string.lines().collect();
|
2013-06-13 10:39:06 -05:00
|
|
|
assert_eq!(string.subslice_offset(lines[0]), 0);
|
|
|
|
assert_eq!(string.subslice_offset(lines[1]), 2);
|
|
|
|
assert_eq!(string.subslice_offset(lines[2]), 4);
|
2013-04-10 16:51:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn test_subslice_offset_2() {
|
|
|
|
let a = "alchemiter";
|
|
|
|
let b = "cruxtruder";
|
2013-06-13 10:39:06 -05:00
|
|
|
a.subslice_offset(b);
|
2013-04-10 16:51:41 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
|
|
|
fn vec_str_conversions() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s1: ~str = "All mimsy were the borogoves".to_owned();
|
2012-01-17 19:28:21 -06:00
|
|
|
|
2013-06-10 22:10:37 -05:00
|
|
|
let v: ~[u8] = s1.as_bytes().to_owned();
|
2013-12-23 10:30:49 -06:00
|
|
|
let s2: ~str = from_utf8(v).unwrap().to_owned();
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut i: uint = 0u;
|
2013-06-09 09:44:58 -05:00
|
|
|
let n1: uint = s1.len();
|
2013-06-08 20:38:47 -05:00
|
|
|
let n2: uint = v.len();
|
2013-05-18 21:02:45 -05:00
|
|
|
assert_eq!(n1, n2);
|
2012-01-17 19:28:21 -06:00
|
|
|
while i < n1 {
|
|
|
|
let a: u8 = s1[i];
|
|
|
|
let b: u8 = s2[i];
|
2013-10-21 15:08:31 -05:00
|
|
|
debug!("{}", a);
|
|
|
|
debug!("{}", b);
|
2013-05-18 21:02:45 -05:00
|
|
|
assert_eq!(a, b);
|
2012-01-17 19:28:21 -06:00
|
|
|
i += 1u;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_contains() {
|
2013-06-10 02:32:36 -05:00
|
|
|
assert!("abcde".contains("bcd"));
|
|
|
|
assert!("abcde".contains("abcd"));
|
|
|
|
assert!("abcde".contains("bcde"));
|
|
|
|
assert!("abcde".contains(""));
|
|
|
|
assert!("".contains(""));
|
|
|
|
assert!(!"abcde".contains("def"));
|
|
|
|
assert!(!"".contains("a"));
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华Việt Nam".to_owned();
|
2013-06-10 02:32:36 -05:00
|
|
|
assert!(data.contains("ประเ"));
|
|
|
|
assert!(data.contains("ะเ"));
|
|
|
|
assert!(data.contains("中华"));
|
|
|
|
assert!(!data.contains("ไท华"));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2012-06-30 05:54:54 -05:00
|
|
|
#[test]
|
|
|
|
fn test_contains_char() {
|
2013-06-10 08:01:45 -05:00
|
|
|
assert!("abc".contains_char('b'));
|
|
|
|
assert!("a".contains_char('a'));
|
|
|
|
assert!(!"abc".contains_char('d'));
|
|
|
|
assert!(!"".contains_char('a'));
|
2012-06-30 05:54:54 -05:00
|
|
|
}
|
|
|
|
|
2012-01-30 22:44:48 -06:00
|
|
|
#[test]
|
2012-03-02 17:47:14 -06:00
|
|
|
fn test_utf16() {
|
|
|
|
let pairs =
|
2014-04-15 20:17:48 -05:00
|
|
|
[("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_owned(),
|
2012-06-29 18:26:56 -05:00
|
|
|
~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
|
|
|
|
0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
|
|
|
|
0xd800_u16, 0xdf30_u16, 0x000a_u16]),
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_owned(),
|
2012-06-29 18:26:56 -05:00
|
|
|
~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
|
|
|
|
0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
|
|
|
|
0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
|
|
|
|
0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
|
|
|
|
0x000a_u16]),
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_owned(),
|
2012-06-29 18:26:56 -05:00
|
|
|
~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
|
|
|
|
0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
|
|
|
|
0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
|
|
|
|
0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
|
|
|
|
0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
|
|
|
|
0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_owned(),
|
2012-06-29 18:26:56 -05:00
|
|
|
~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
|
|
|
|
0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
|
|
|
|
0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
|
|
|
|
0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
|
|
|
|
0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
|
|
|
|
0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
|
|
|
|
0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
|
|
|
|
0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
|
|
|
|
0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
|
2014-02-16 07:57:16 -06:00
|
|
|
0x000a_u16 ]),
|
|
|
|
// Issue #12318, even-numbered non-BMP planes
|
2014-04-15 20:17:48 -05:00
|
|
|
("\U00020000".to_owned(),
|
2014-02-16 07:57:16 -06:00
|
|
|
~[0xD840, 0xDC00])];
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for p in pairs.iter() {
|
2013-07-02 14:47:32 -05:00
|
|
|
let (s, u) = (*p).clone();
|
2014-02-16 06:52:14 -06:00
|
|
|
assert!(is_utf16(u));
|
|
|
|
assert_eq!(s.to_utf16(), u);
|
2014-02-16 07:52:58 -06:00
|
|
|
|
2014-02-16 16:57:56 -06:00
|
|
|
assert_eq!(from_utf16(u).unwrap(), s);
|
2014-02-16 07:52:58 -06:00
|
|
|
assert_eq!(from_utf16_lossy(u), s);
|
|
|
|
|
2014-02-16 16:57:56 -06:00
|
|
|
assert_eq!(from_utf16(s.to_utf16()).unwrap(), s);
|
|
|
|
assert_eq!(from_utf16(u).unwrap().to_utf16(), u);
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
}
|
2012-03-30 00:28:26 -05:00
|
|
|
|
2014-02-16 16:57:56 -06:00
|
|
|
#[test]
|
|
|
|
fn test_utf16_invalid() {
|
|
|
|
// completely positive cases tested above.
|
|
|
|
// lead + eof
|
|
|
|
assert_eq!(from_utf16([0xD800]), None);
|
|
|
|
// lead + lead
|
|
|
|
assert_eq!(from_utf16([0xD800, 0xD800]), None);
|
|
|
|
|
|
|
|
// isolated trail
|
|
|
|
assert_eq!(from_utf16([0x0061, 0xDC00]), None);
|
|
|
|
|
|
|
|
// general
|
|
|
|
assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
|
|
|
|
}
|
|
|
|
|
2014-02-16 07:52:58 -06:00
|
|
|
#[test]
|
|
|
|
fn test_utf16_lossy() {
|
|
|
|
// completely positive cases tested above.
|
|
|
|
// lead + eof
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
// lead + lead
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
|
|
|
|
// isolated trail
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
|
|
|
|
// general
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), "\uFFFD𐒋\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
}
|
|
|
|
|
2014-02-18 05:25:32 -06:00
|
|
|
#[test]
|
|
|
|
fn test_truncate_utf16_at_nul() {
|
|
|
|
let v = [];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[]);
|
|
|
|
|
|
|
|
let v = [0, 2, 3];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[]);
|
|
|
|
|
|
|
|
let v = [1, 0, 3];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[1]);
|
|
|
|
|
|
|
|
let v = [1, 2, 0];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
|
|
|
|
|
|
|
|
let v = [1, 2, 3];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
|
|
|
|
}
|
|
|
|
|
2013-03-15 02:32:11 -05:00
|
|
|
#[test]
|
|
|
|
fn test_char_at() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-03-15 02:32:11 -05:00
|
|
|
let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
|
|
|
let mut pos = 0;
|
2013-08-03 11:45:23 -05:00
|
|
|
for ch in v.iter() {
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(s.char_at(pos) == *ch);
|
2013-03-15 02:32:11 -05:00
|
|
|
pos += from_char(*ch).len();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_char_at_reverse() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-03-15 02:32:11 -05:00
|
|
|
let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
|
|
|
let mut pos = s.len();
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
for ch in v.iter().rev() {
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(s.char_at_reverse(pos) == *ch);
|
2013-03-15 02:32:11 -05:00
|
|
|
pos -= from_char(*ch).len();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-05-31 17:31:13 -05:00
|
|
|
#[test]
|
|
|
|
fn test_escape_unicode() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_owned());
|
|
|
|
assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_owned());
|
|
|
|
assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_owned());
|
|
|
|
assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_owned());
|
|
|
|
assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_owned());
|
|
|
|
assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_owned());
|
|
|
|
assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_owned());
|
|
|
|
assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_owned());
|
|
|
|
assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_owned());
|
2012-05-31 17:31:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_escape_default() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".escape_default(), "abc".to_owned());
|
|
|
|
assert_eq!("a c".escape_default(), "a c".to_owned());
|
|
|
|
assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_owned());
|
|
|
|
assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_owned());
|
|
|
|
assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_owned());
|
|
|
|
assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_owned());
|
|
|
|
assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_owned());
|
|
|
|
assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_owned());
|
2012-05-31 17:31:13 -05:00
|
|
|
}
|
|
|
|
|
2013-03-01 21:07:12 -06:00
|
|
|
#[test]
|
|
|
|
fn test_total_ord() {
|
2014-05-01 00:32:13 -05:00
|
|
|
"1234".cmp(&("123")) == Greater;
|
|
|
|
"123".cmp(&("1234")) == Less;
|
|
|
|
"1234".cmp(&("1234")) == Equal;
|
|
|
|
"12345555".cmp(&("123456")) == Less;
|
|
|
|
"22".cmp(&("1234")) == Greater;
|
2013-03-01 21:07:12 -06:00
|
|
|
}
|
2013-03-28 18:37:12 -05:00
|
|
|
|
2013-07-10 13:32:59 -05:00
|
|
|
#[test]
|
|
|
|
fn test_char_range_at() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "b¢€𤭢𤭢€¢b".to_owned();
|
2013-07-10 13:32:59 -05:00
|
|
|
assert_eq!('b', data.char_range_at(0).ch);
|
|
|
|
assert_eq!('¢', data.char_range_at(1).ch);
|
|
|
|
assert_eq!('€', data.char_range_at(3).ch);
|
|
|
|
assert_eq!('𤭢', data.char_range_at(6).ch);
|
|
|
|
assert_eq!('𤭢', data.char_range_at(10).ch);
|
|
|
|
assert_eq!('€', data.char_range_at(14).ch);
|
|
|
|
assert_eq!('¢', data.char_range_at(17).ch);
|
|
|
|
assert_eq!('b', data.char_range_at(19).ch);
|
|
|
|
}
|
|
|
|
|
2013-03-28 18:37:12 -05:00
|
|
|
#[test]
|
|
|
|
fn test_char_range_at_reverse_underflow() {
|
2013-06-10 06:46:36 -05:00
|
|
|
assert_eq!("abc".char_range_at_reverse(0).next, 0);
|
2013-03-28 18:37:12 -05:00
|
|
|
}
|
|
|
|
|
2013-06-15 08:17:53 -05:00
|
|
|
#[test]
|
|
|
|
fn test_add() {
|
2014-03-21 20:05:05 -05:00
|
|
|
#![allow(unnecessary_allocation)]
|
2013-06-15 08:17:53 -05:00
|
|
|
macro_rules! t (
|
2013-08-05 04:46:22 -05:00
|
|
|
($s1:expr, $s2:expr, $e:expr) => { {
|
|
|
|
let s1 = $s1;
|
|
|
|
let s2 = $s2;
|
|
|
|
let e = $e;
|
|
|
|
assert_eq!(s1 + s2, e.to_owned());
|
|
|
|
assert_eq!(s1.to_owned() + s2, e.to_owned());
|
|
|
|
} }
|
2013-06-15 08:17:53 -05:00
|
|
|
);
|
|
|
|
|
2013-08-05 04:46:22 -05:00
|
|
|
t!("foo", "bar", "foobar");
|
2014-04-15 20:17:48 -05:00
|
|
|
t!("foo", "bar".to_owned(), "foobar");
|
2013-08-05 04:46:22 -05:00
|
|
|
t!("ศไทย中", "华Việt Nam", "ศไทย中华Việt Nam");
|
2014-04-15 20:17:48 -05:00
|
|
|
t!("ศไทย中", "华Việt Nam".to_owned(), "ศไทย中华Việt Nam");
|
2013-06-15 08:17:53 -05:00
|
|
|
}
|
|
|
|
|
2013-04-18 07:50:55 -05:00
|
|
|
#[test]
|
|
|
|
fn test_iterator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-04-18 07:50:55 -05:00
|
|
|
let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
|
|
|
|
|
|
|
let mut pos = 0;
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut it = s.chars();
|
2013-06-08 07:04:46 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-06-08 07:04:46 -05:00
|
|
|
assert_eq!(c, v[pos]);
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_rev_iterator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-06-08 07:04:46 -05:00
|
|
|
let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
|
|
|
|
|
|
|
|
let mut pos = 0;
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut it = s.chars().rev();
|
2013-04-18 07:50:55 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-04-18 07:50:55 -05:00
|
|
|
assert_eq!(c, v[pos]);
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
}
|
2013-06-08 09:38:58 -05:00
|
|
|
|
2013-08-21 17:35:16 -05:00
|
|
|
#[test]
|
|
|
|
fn test_iterator_clone() {
|
|
|
|
let s = "ศไทย中华Việt Nam";
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut it = s.chars();
|
2013-08-21 17:35:16 -05:00
|
|
|
it.next();
|
|
|
|
assert!(it.zip(it.clone()).all(|(x,y)| x == y));
|
|
|
|
}
|
|
|
|
|
2013-06-08 09:38:58 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_bytesator() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-06-08 09:38:58 -05:00
|
|
|
let v = [
|
|
|
|
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
|
|
|
|
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
|
|
|
|
109
|
|
|
|
];
|
|
|
|
let mut pos = 0;
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
for b in s.bytes() {
|
2013-06-08 09:38:58 -05:00
|
|
|
assert_eq!(b, v[pos]);
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_bytes_revator() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-06-08 09:38:58 -05:00
|
|
|
let v = [
|
|
|
|
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
|
|
|
|
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
|
|
|
|
109
|
|
|
|
];
|
|
|
|
let mut pos = v.len();
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
for b in s.bytes().rev() {
|
2013-06-08 09:38:58 -05:00
|
|
|
pos -= 1;
|
|
|
|
assert_eq!(b, v[pos]);
|
|
|
|
}
|
|
|
|
}
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_char_indicesator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2013-07-27 16:38:38 -05:00
|
|
|
let s = "ศไทย中华Việt Nam";
|
|
|
|
let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
|
|
|
|
let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
|
|
|
|
|
|
|
let mut pos = 0;
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut it = s.char_indices();
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-07-27 16:38:38 -05:00
|
|
|
assert_eq!(c, (p[pos], v[pos]));
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
assert_eq!(pos, p.len());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_char_indices_revator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2013-07-27 16:38:38 -05:00
|
|
|
let s = "ศไทย中华Việt Nam";
|
|
|
|
let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
|
|
|
|
let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
|
|
|
|
|
|
|
|
let mut pos = 0;
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut it = s.char_indices().rev();
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-07-27 16:38:38 -05:00
|
|
|
assert_eq!(c, (p[pos], v[pos]));
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
assert_eq!(pos, p.len());
|
|
|
|
}
|
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
|
|
|
fn test_split_char_iterator() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.split(' ').collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
assert_eq!( split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut rsplit: ~[&str] = data.split(' ').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
|
|
|
assert_eq!(rsplit, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.split(|c: char| c == ' ').collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
assert_eq!( split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut rsplit: ~[&str] = data.split(|c: char| c == ' ').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
|
|
|
assert_eq!(rsplit, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
|
|
|
// Unicode
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.split('ä').collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
assert_eq!( split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut rsplit: ~[&str] = data.split('ä').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
|
|
|
assert_eq!(rsplit, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.split(|c: char| c == 'ä').collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
assert_eq!( split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut rsplit: ~[&str] = data.split(|c: char| c == 'ä').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
|
|
|
assert_eq!(rsplit, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
|
|
|
fn test_splitn_char_iterator() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.splitn(' ', 3).collect();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.splitn(|c: char| c == ' ', 3).collect();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
|
|
|
|
|
|
|
|
// Unicode
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.splitn('ä', 3).collect();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.splitn(|c: char| c == 'ä', 3).collect();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
|
|
|
|
}
|
|
|
|
|
2013-08-25 01:54:47 -05:00
|
|
|
#[test]
|
|
|
|
fn test_rsplitn_char_iterator() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut split: ~[&str] = data.rsplitn(' ', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
|
|
|
assert_eq!(split, ~["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut split: ~[&str] = data.rsplitn(|c: char| c == ' ', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
|
|
|
assert_eq!(split, ~["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
|
|
|
|
|
|
|
|
// Unicode
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut split: ~[&str] = data.rsplitn('ä', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
|
|
|
assert_eq!(split, ~["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut split: ~[&str] = data.rsplitn(|c: char| c == 'ä', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
|
|
|
assert_eq!(split, ~["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
|
|
|
|
}
|
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
|
|
|
fn test_split_char_iterator_no_trailing() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.split('\n').collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
let split: ~[&str] = data.split_terminator('\n').collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_rev_split_char_iterator_no_trailing() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2014-01-23 13:41:57 -06:00
|
|
|
let mut split: ~[&str] = data.split('\n').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
|
|
|
|
|
2014-01-23 13:41:57 -06:00
|
|
|
let mut split: ~[&str] = data.split_terminator('\n').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_words() {
|
2013-06-09 08:10:50 -05:00
|
|
|
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
|
2013-11-23 04:18:51 -06:00
|
|
|
let words: ~[&str] = data.words().collect();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
|
|
|
|
}
|
|
|
|
|
2013-08-10 20:36:38 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_nfd_chars() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".nfd_chars().collect::<~str>(), "abc".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<~str>(), "d\u0307\u01c4".to_owned());
|
|
|
|
assert_eq!("\u2026".nfd_chars().collect::<~str>(), "\u2026".to_owned());
|
|
|
|
assert_eq!("\u2126".nfd_chars().collect::<~str>(), "\u03a9".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u0323".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("\u1e0d\u0307".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("a\u0301".nfd_chars().collect::<~str>(), "a\u0301".to_owned());
|
|
|
|
assert_eq!("\u0301a".nfd_chars().collect::<~str>(), "\u0301a".to_owned());
|
|
|
|
assert_eq!("\ud4db".nfd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
|
|
|
|
assert_eq!("\uac1c".nfd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_nfkd_chars() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".nfkd_chars().collect::<~str>(), "abc".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<~str>(), "d\u0307DZ\u030c".to_owned());
|
|
|
|
assert_eq!("\u2026".nfkd_chars().collect::<~str>(), "...".to_owned());
|
|
|
|
assert_eq!("\u2126".nfkd_chars().collect::<~str>(), "\u03a9".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("a\u0301".nfkd_chars().collect::<~str>(), "a\u0301".to_owned());
|
|
|
|
assert_eq!("\u0301a".nfkd_chars().collect::<~str>(), "\u0301a".to_owned());
|
|
|
|
assert_eq!("\ud4db".nfkd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
|
|
|
|
assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_lines() {
|
2013-06-09 08:10:50 -05:00
|
|
|
let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
|
2013-11-23 04:18:51 -06:00
|
|
|
let lines: ~[&str] = data.lines().collect();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
|
|
|
|
|
|
|
|
let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
|
2013-11-23 04:18:51 -06:00
|
|
|
let lines: ~[&str] = data.lines().collect();
|
2013-06-09 08:10:50 -05:00
|
|
|
assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
|
|
|
|
}
|
2013-06-09 21:46:35 -05:00
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_split_strator() {
|
2013-06-09 21:46:35 -05:00
|
|
|
fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
|
2013-11-23 04:18:51 -06:00
|
|
|
let v: ~[&str] = s.split_str(sep).collect();
|
2013-06-09 21:46:35 -05:00
|
|
|
assert_eq!(v, u);
|
|
|
|
}
|
|
|
|
t("--1233345--", "12345", ~["--1233345--"]);
|
|
|
|
t("abc::hello::there", "::", ~["abc", "hello", "there"]);
|
|
|
|
t("::hello::there", "::", ~["", "hello", "there"]);
|
|
|
|
t("hello::there::", "::", ~["hello", "there", ""]);
|
|
|
|
t("::hello::there::", "::", ~["", "hello", "there", ""]);
|
|
|
|
t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
|
|
|
|
t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
|
|
|
|
t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
|
|
|
|
t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
|
|
|
|
t("", ".", ~[""]);
|
|
|
|
t("zz", "zz", ~["",""]);
|
|
|
|
t("ok", "z", ~["ok"]);
|
|
|
|
t("zzz", "zz", ~["","z"]);
|
|
|
|
t("zzzzz", "zz", ~["","","z"]);
|
|
|
|
}
|
2013-06-17 02:05:51 -05:00
|
|
|
|
|
|
|
#[test]
|
2013-08-10 08:38:00 -05:00
|
|
|
fn test_str_default() {
|
|
|
|
use default::Default;
|
|
|
|
fn t<S: Default + Str>() {
|
|
|
|
let s: S = Default::default();
|
2013-06-17 02:05:51 -05:00
|
|
|
assert_eq!(s.as_slice(), "");
|
|
|
|
}
|
|
|
|
|
|
|
|
t::<&str>();
|
|
|
|
t::<~str>();
|
|
|
|
}
|
2013-07-20 12:28:38 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_str_container() {
|
|
|
|
fn sum_len<S: Container>(v: &[S]) -> uint {
|
2013-08-09 22:09:47 -05:00
|
|
|
v.iter().map(|x| x.len()).sum()
|
2013-07-20 12:28:38 -05:00
|
|
|
}
|
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "01234".to_owned();
|
2013-07-20 12:28:38 -05:00
|
|
|
assert_eq!(5, sum_len(["012", "", "34"]));
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(5, sum_len(["01".to_owned(), "2".to_owned(), "34".to_owned(), "".to_owned()]));
|
2013-07-20 12:28:38 -05:00
|
|
|
assert_eq!(5, sum_len([s.as_slice()]));
|
|
|
|
}
|
2013-08-24 00:05:35 -05:00
|
|
|
|
2013-08-25 19:07:29 -05:00
|
|
|
#[test]
|
2013-12-01 07:33:04 -06:00
|
|
|
fn test_str_from_utf8() {
|
2013-08-25 19:07:29 -05:00
|
|
|
let xs = bytes!("hello");
|
2013-12-23 10:30:49 -06:00
|
|
|
assert_eq!(from_utf8(xs), Some("hello"));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("ศไทย中华Việt Nam");
|
2013-12-23 10:30:49 -06:00
|
|
|
assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("hello", 0xff);
|
2013-12-23 10:30:49 -06:00
|
|
|
assert_eq!(from_utf8(xs), None);
|
2013-08-25 19:07:29 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-09-05 07:17:24 -05:00
|
|
|
fn test_str_from_utf8_owned() {
|
2013-08-25 19:07:29 -05:00
|
|
|
let xs = bytes!("hello").to_owned();
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_owned(xs), Some("hello".to_owned()));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("ศไทย中华Việt Nam").to_owned();
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_owned(xs), Some("ศไทย中华Việt Nam".to_owned()));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("hello", 0xff).to_owned();
|
2013-12-23 10:45:01 -06:00
|
|
|
assert_eq!(from_utf8_owned(xs), None);
|
2013-08-25 19:07:29 -05:00
|
|
|
}
|
2013-09-14 12:37:45 -05:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
#[test]
|
|
|
|
fn test_str_from_utf8_lossy() {
|
|
|
|
let xs = bytes!("hello");
|
2014-02-07 16:58:37 -06:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Slice("hello"));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!("ศไทย中华Việt Nam");
|
2014-02-07 16:58:37 -06:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
|
|
|
|
foo\U00010000bar".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
// surrogates
|
|
|
|
let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
|
|
|
|
\uFFFD\uFFFD\uFFFDbar".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
|
|
|
|
2013-10-02 08:37:59 -05:00
|
|
|
#[test]
|
|
|
|
fn test_from_str() {
|
2014-05-01 00:32:13 -05:00
|
|
|
let owned: Option<~str> = from_str("string");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(owned, Some("string".to_owned()));
|
2013-10-02 08:37:59 -05:00
|
|
|
}
|
2014-02-07 18:36:59 -06:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_traits() {
|
|
|
|
let s = Slice("abcde");
|
|
|
|
assert_eq!(s.len(), 5);
|
|
|
|
assert_eq!(s.as_slice(), "abcde");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(s.to_str(), "abcde".to_owned());
|
|
|
|
assert_eq!(format!("{}", s), "abcde".to_owned());
|
|
|
|
assert!(s.lt(&Owned("bcdef".to_owned())));
|
2014-02-07 18:36:59 -06:00
|
|
|
assert_eq!(Slice(""), Default::default());
|
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let o = Owned("abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert_eq!(o.len(), 5);
|
|
|
|
assert_eq!(o.as_slice(), "abcde");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(o.to_str(), "abcde".to_owned());
|
|
|
|
assert_eq!(format!("{}", o), "abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(o.lt(&Slice("bcdef")));
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Owned("".to_owned()), Default::default());
|
2014-02-07 18:36:59 -06:00
|
|
|
|
2014-02-28 03:23:06 -06:00
|
|
|
assert!(s.cmp(&o) == Equal);
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(s.equiv(&o));
|
|
|
|
|
2014-02-28 03:23:06 -06:00
|
|
|
assert!(o.cmp(&s) == Equal);
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(o.equiv(&s));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_methods() {
|
|
|
|
let s = Slice("abcde");
|
|
|
|
assert!(s.is_slice());
|
|
|
|
assert!(!s.is_owned());
|
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let o = Owned("abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(!o.is_slice());
|
|
|
|
assert!(o.is_owned());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_clone() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Owned("abcde".to_owned()), Slice("abcde").clone());
|
|
|
|
assert_eq!(Owned("abcde".to_owned()), Owned("abcde".to_owned()).clone());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert_eq!(Slice("abcde"), Slice("abcde").clone());
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Slice("abcde"), Owned("abcde".to_owned()).clone());
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_into_owned() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Slice("abcde").into_owned(), "abcde".to_owned());
|
|
|
|
assert_eq!(Owned("abcde".to_owned()).into_owned(), "abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_into_maybe_owned() {
|
|
|
|
assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(("abcde".to_owned()).into_maybe_owned(), Slice("abcde"));
|
|
|
|
assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_owned()));
|
|
|
|
assert_eq!(("abcde".to_owned()).into_maybe_owned(), Owned("abcde".to_owned()));
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
2012-01-23 02:36:58 -06:00
|
|
|
}
|
2013-07-22 12:52:38 -05:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod bench {
|
2014-02-13 19:49:11 -06:00
|
|
|
extern crate test;
|
2014-03-31 20:16:35 -05:00
|
|
|
use self::test::Bencher;
|
2013-08-10 12:32:05 -05:00
|
|
|
use super::*;
|
2013-08-18 06:57:34 -05:00
|
|
|
use prelude::*;
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_iterator(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.chars().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_iterator_ascii(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.chars().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_iterator_rev(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
b.iter(|| assert_eq!(s.chars().rev().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_indicesator(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.char_indices().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_indicesator_rev(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
b.iter(|| assert_eq!(s.char_indices().rev().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
2013-07-22 12:52:38 -05:00
|
|
|
|
2013-08-26 04:48:48 -05:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_unicode_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split('V').len(), 3));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_unicode_not_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
struct NotAscii(char);
|
|
|
|
impl CharEq for NotAscii {
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool {
|
2013-11-01 20:06:31 -05:00
|
|
|
let NotAscii(cc) = *self;
|
|
|
|
cc == c
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
fn only_ascii(&self) -> bool { false }
|
|
|
|
}
|
|
|
|
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(' ').len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_not_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
struct NotAscii(char);
|
|
|
|
impl CharEq for NotAscii {
|
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool {
|
2013-11-01 20:06:31 -05:00
|
|
|
let NotAscii(cc) = *self;
|
|
|
|
cc == c
|
|
|
|
}
|
2013-08-26 04:48:48 -05:00
|
|
|
fn only_ascii(&self) -> bool { false }
|
|
|
|
}
|
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_extern_fn(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
fn pred(c: char) -> bool { c == ' ' }
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(pred).len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_closure(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_slice(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(&[' ']).len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
2013-07-22 12:52:38 -05:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn is_utf8_100_ascii(b: &mut Bencher) {
|
2013-07-22 12:52:38 -05:00
|
|
|
|
|
|
|
let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
|
|
|
|
Lorem ipsum dolor sit amet, consectetur. ");
|
|
|
|
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-12 09:39:21 -06:00
|
|
|
is_utf8(s)
|
2013-11-21 19:23:21 -06:00
|
|
|
});
|
2013-07-22 12:52:38 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn is_utf8_100_multibyte(b: &mut Bencher) {
|
2013-07-22 12:52:38 -05:00
|
|
|
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-12 09:39:21 -06:00
|
|
|
is_utf8(s)
|
2013-11-21 19:23:21 -06:00
|
|
|
});
|
2013-07-22 12:52:38 -05:00
|
|
|
}
|
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
|
2014-02-06 01:56:27 -06:00
|
|
|
let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
|
|
|
|
Lorem ipsum dolor sit amet, consectetur. ");
|
|
|
|
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-06 01:56:27 -06:00
|
|
|
let _ = from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
|
2014-02-06 01:56:27 -06:00
|
|
|
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-06 01:56:27 -06:00
|
|
|
let _ = from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_invalid(b: &mut Bencher) {
|
2014-02-06 01:56:27 -06:00
|
|
|
let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-06 01:56:27 -06:00
|
|
|
let _ = from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
|
2014-04-17 17:28:14 -05:00
|
|
|
let s = Vec::from_elem(100, 0xF5u8);
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-04-17 17:28:14 -05:00
|
|
|
let _ = from_utf8_lossy(s.as_slice());
|
2014-02-06 01:56:27 -06:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2013-09-10 19:16:11 -05:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn bench_connect(b: &mut Bencher) {
|
2013-09-10 19:16:11 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let sep = "→";
|
|
|
|
let v = [s, s, s, s, s, s, s, s, s, s];
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2013-09-10 19:16:11 -05:00
|
|
|
assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
|
2013-11-20 16:17:12 -06:00
|
|
|
})
|
2013-09-10 19:16:11 -05:00
|
|
|
}
|
2013-07-22 12:52:38 -05:00
|
|
|
}
|