2014-01-30 12:29:35 -06:00
|
|
|
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
|
2012-12-10 17:44:02 -06:00
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2013-09-17 21:42:07 -05:00
|
|
|
/*!
|
|
|
|
|
2013-12-24 10:08:28 -06:00
|
|
|
Unicode string manipulation (`str` type)
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
# Basic Usage
|
|
|
|
|
|
|
|
Rust's string type is one of the core primitive types of the language. While
|
|
|
|
represented by the name `str`, the name `str` is not actually a valid type in
|
2013-10-04 23:24:29 -05:00
|
|
|
Rust. Each string must also be decorated with its ownership. This means that
|
2014-02-20 09:56:22 -06:00
|
|
|
there are two common kinds of strings in Rust:
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
* `~str` - This is an owned string. This type obeys all of the normal semantics
|
2014-05-05 20:56:44 -05:00
|
|
|
of the `Box<T>` types, meaning that it has one, and only one,
|
|
|
|
owner. This type cannot be implicitly copied, and is moved out of
|
|
|
|
when passed to other functions.
|
2013-09-17 21:42:07 -05:00
|
|
|
|
2014-01-31 18:50:45 -06:00
|
|
|
* `&str` - This is the borrowed string type. This type of string can only be
|
|
|
|
created from the other kind of string. As the name "borrowed"
|
|
|
|
implies, this type of string is owned elsewhere, and this string
|
|
|
|
cannot be moved out of.
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
As an example, here's a few different kinds of strings.
|
|
|
|
|
2013-09-23 19:20:36 -05:00
|
|
|
```rust
|
2013-12-31 00:51:11 -06:00
|
|
|
fn main() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let owned_string = "I am an owned string".to_owned();
|
2013-12-31 00:51:11 -06:00
|
|
|
let borrowed_string1 = "This string is borrowed with the 'static lifetime";
|
|
|
|
let borrowed_string2: &str = owned_string; // owned strings can be borrowed
|
|
|
|
}
|
2014-05-02 19:56:35 -05:00
|
|
|
```
|
2013-09-17 21:42:07 -05:00
|
|
|
|
2014-02-20 09:56:22 -06:00
|
|
|
From the example above, you can see that Rust has 2 different kinds of string
|
2014-01-31 18:50:45 -06:00
|
|
|
literals. The owned literals correspond to the owned string types, but the
|
|
|
|
"borrowed literal" is actually more akin to C's concept of a static string.
|
2013-09-17 21:42:07 -05:00
|
|
|
|
2014-01-31 18:50:45 -06:00
|
|
|
When a string is declared without a `~` sigil, then the string is allocated
|
|
|
|
statically in the rodata of the executable/library. The string then has the
|
|
|
|
type `&'static str` meaning that the string is valid for the `'static`
|
2013-09-17 21:42:07 -05:00
|
|
|
lifetime, otherwise known as the lifetime of the entire program. As can be
|
|
|
|
inferred from the type, these static strings are not mutable.
|
|
|
|
|
|
|
|
# Mutability
|
|
|
|
|
2014-02-20 09:56:22 -06:00
|
|
|
Many languages have immutable strings by default, and Rust has a particular
|
2013-09-17 21:42:07 -05:00
|
|
|
flavor on this idea. As with the rest of Rust types, strings are immutable by
|
|
|
|
default. If a string is declared as `mut`, however, it may be mutated. This
|
|
|
|
works the same way as the rest of Rust's type system in the sense that if
|
|
|
|
there's a mutable reference to a string, there may only be one mutable reference
|
|
|
|
to that string. With these guarantees, strings can easily transition between
|
|
|
|
being mutable/immutable with the same benefits of having mutable strings in
|
|
|
|
other languages.
|
|
|
|
|
|
|
|
# Representation
|
|
|
|
|
|
|
|
Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
|
|
|
|
stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
|
2013-09-24 07:26:10 -05:00
|
|
|
encoded UTF-8 sequences. Additionally, strings are not null-terminated
|
|
|
|
and can contain null codepoints.
|
2013-09-17 21:42:07 -05:00
|
|
|
|
|
|
|
The actual representation of strings have direct mappings to vectors:
|
|
|
|
|
|
|
|
* `~str` is the same as `~[u8]`
|
|
|
|
* `&str` is the same as `&[u8]`
|
|
|
|
|
|
|
|
*/
|
2012-01-24 03:29:45 -06:00
|
|
|
|
2013-06-09 22:09:51 -05:00
|
|
|
use char::Char;
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
use char;
|
2014-03-05 00:19:14 -06:00
|
|
|
use clone::Clone;
|
2014-02-07 18:36:59 -06:00
|
|
|
use cmp::{Eq, TotalEq, Ord, TotalOrd, Equiv, Ordering};
|
2014-04-17 17:28:14 -05:00
|
|
|
use container::Container;
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
use default::Default;
|
2014-02-07 18:36:59 -06:00
|
|
|
use fmt;
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
use from_str::FromStr;
|
2014-02-25 10:03:41 -06:00
|
|
|
use io::Writer;
|
2014-05-01 01:06:36 -05:00
|
|
|
use iter::{Iterator, range, AdditiveIterator};
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
use mem::transmute;
|
|
|
|
use mem;
|
2013-01-08 21:37:25 -06:00
|
|
|
use option::{None, Option, Some};
|
2014-05-04 02:25:44 -05:00
|
|
|
use slice::Vector;
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
use slice::{ImmutableVector, MutableVector, CloneableVector};
|
2014-04-02 18:54:22 -05:00
|
|
|
use strbuf::StrBuf;
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
use vec::Vec;
|
2012-06-04 19:26:17 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
pub use core::str::{from_utf8, CharEq, Chars, CharOffsets, RevChars};
|
|
|
|
pub use core::str::{RevCharOffsets, Bytes, RevBytes, CharSplits, RevCharSplits};
|
|
|
|
pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
|
|
|
|
pub use core::str::{eq_slice, eq, is_utf8, is_utf16, UTF16Items};
|
|
|
|
pub use core::str::{UTF16Item, ScalarValue, LoneSurrogate, utf16_items};
|
|
|
|
pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
|
|
|
|
pub use core::str::{Str, StrSlice};
|
|
|
|
|
2011-12-13 18:25:51 -06:00
|
|
|
/*
|
2012-01-24 03:29:45 -06:00
|
|
|
Section: Creating a string
|
2011-12-13 18:25:51 -06:00
|
|
|
*/
|
|
|
|
|
2013-08-25 19:07:29 -05:00
|
|
|
/// Consumes a vector of bytes to create a new utf-8 string.
|
|
|
|
/// Returns None if the vector contains invalid UTF-8.
|
2013-12-23 10:45:01 -06:00
|
|
|
pub fn from_utf8_owned(vv: ~[u8]) -> Option<~str> {
|
2013-08-25 19:07:29 -05:00
|
|
|
if is_utf8(vv) {
|
2013-09-05 07:17:24 -05:00
|
|
|
Some(unsafe { raw::from_utf8_owned(vv) })
|
2013-08-25 19:07:29 -05:00
|
|
|
} else {
|
|
|
|
None
|
2013-06-28 16:05:10 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-02 08:37:59 -05:00
|
|
|
impl FromStr for ~str {
|
|
|
|
#[inline]
|
|
|
|
fn from_str(s: &str) -> Option<~str> { Some(s.to_owned()) }
|
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Convert a byte to a UTF-8 string
|
|
|
|
///
|
|
|
|
/// # Failure
|
|
|
|
///
|
|
|
|
/// Fails if invalid UTF-8
|
2013-08-04 15:22:56 -05:00
|
|
|
pub fn from_byte(b: u8) -> ~str {
|
|
|
|
assert!(b < 128u8);
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
unsafe { ::mem::transmute(box [b]) }
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Convert a char to a string
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn from_char(ch: char) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut buf = StrBuf::new();
|
2013-06-10 02:42:24 -05:00
|
|
|
buf.push_char(ch);
|
2014-04-02 18:54:22 -05:00
|
|
|
buf.into_owned()
|
2011-12-13 18:25:51 -06:00
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Convert a vector of chars to a string
|
2013-03-21 23:20:48 -05:00
|
|
|
pub fn from_chars(chs: &[char]) -> ~str {
|
2014-01-31 07:03:20 -06:00
|
|
|
chs.iter().map(|c| *c).collect()
|
2011-12-13 18:25:51 -06:00
|
|
|
}
|
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Methods for vectors of strings
|
2013-06-02 22:19:37 -05:00
|
|
|
pub trait StrVector {
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Concatenate a vector of strings.
|
2013-08-09 03:25:24 -05:00
|
|
|
fn concat(&self) -> ~str;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
|
|
|
/// Concatenate a vector of strings, placing a given separator between each.
|
2013-08-09 03:25:24 -05:00
|
|
|
fn connect(&self, sep: &str) -> ~str;
|
2013-06-02 22:19:37 -05:00
|
|
|
}
|
2013-05-02 04:24:41 -05:00
|
|
|
|
2013-12-10 01:16:18 -06:00
|
|
|
impl<'a, S: Str> StrVector for &'a [S] {
|
2013-08-09 03:25:24 -05:00
|
|
|
fn concat(&self) -> ~str {
|
2014-04-15 20:17:48 -05:00
|
|
|
if self.is_empty() { return "".to_owned(); }
|
2013-06-02 22:19:37 -05:00
|
|
|
|
2013-09-10 17:53:21 -05:00
|
|
|
// `len` calculation may overflow but push_str but will check boundaries
|
2013-08-09 22:09:47 -05:00
|
|
|
let len = self.iter().map(|s| s.as_slice().len()).sum();
|
2013-05-02 04:24:41 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::with_capacity(len);
|
2013-06-02 22:19:37 -05:00
|
|
|
|
2013-09-10 17:53:21 -05:00
|
|
|
for s in self.iter() {
|
|
|
|
result.push_str(s.as_slice())
|
2013-05-02 04:24:41 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
|
|
|
|
result.into_owned()
|
2013-06-02 22:19:37 -05:00
|
|
|
}
|
|
|
|
|
2013-08-09 03:25:24 -05:00
|
|
|
fn connect(&self, sep: &str) -> ~str {
|
2014-04-15 20:17:48 -05:00
|
|
|
if self.is_empty() { return "".to_owned(); }
|
2013-08-04 15:22:56 -05:00
|
|
|
|
|
|
|
// concat is faster
|
|
|
|
if sep.is_empty() { return self.concat(); }
|
|
|
|
|
|
|
|
// this is wrong without the guarantee that `self` is non-empty
|
2013-09-10 17:53:21 -05:00
|
|
|
// `len` calculation may overflow but push_str but will check boundaries
|
2013-08-04 15:22:56 -05:00
|
|
|
let len = sep.len() * (self.len() - 1)
|
2013-08-09 22:09:47 -05:00
|
|
|
+ self.iter().map(|s| s.as_slice().len()).sum();
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::with_capacity(len);
|
2013-08-04 15:22:56 -05:00
|
|
|
let mut first = true;
|
|
|
|
|
2013-09-10 17:53:21 -05:00
|
|
|
for s in self.iter() {
|
|
|
|
if first {
|
|
|
|
first = false;
|
|
|
|
} else {
|
|
|
|
result.push_str(sep);
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2013-09-10 17:53:21 -05:00
|
|
|
result.push_str(s.as_slice());
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
result.into_owned()
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2013-02-07 21:33:12 -06:00
|
|
|
}
|
|
|
|
|
2014-02-18 23:36:51 -06:00
|
|
|
impl<'a, S: Str> StrVector for Vec<S> {
|
|
|
|
#[inline]
|
|
|
|
fn concat(&self) -> ~str {
|
|
|
|
self.as_slice().concat()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn connect(&self, sep: &str) -> ~str {
|
|
|
|
self.as_slice().connect(sep)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/*
|
|
|
|
Section: Iterators
|
|
|
|
*/
|
|
|
|
|
2013-08-10 20:36:38 -05:00
|
|
|
// Helper functions used for Unicode normalization
|
|
|
|
fn canonical_sort(comb: &mut [(char, u8)]) {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::range;
|
2014-02-16 03:36:43 -06:00
|
|
|
use tuple::Tuple2;
|
2013-08-10 20:36:38 -05:00
|
|
|
|
|
|
|
let len = comb.len();
|
|
|
|
for i in range(0, len) {
|
|
|
|
let mut swapped = false;
|
|
|
|
for j in range(1, len-i) {
|
2014-02-15 15:15:03 -06:00
|
|
|
let class_a = *comb[j-1].ref1();
|
|
|
|
let class_b = *comb[j].ref1();
|
|
|
|
if class_a != 0 && class_b != 0 && class_a > class_b {
|
2013-08-10 20:36:38 -05:00
|
|
|
comb.swap(j-1, j);
|
|
|
|
swapped = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !swapped { break; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[deriving(Clone)]
|
2014-05-12 15:44:21 -05:00
|
|
|
enum DecompositionType {
|
|
|
|
Canonical,
|
|
|
|
Compatible
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
2014-05-12 15:44:21 -05:00
|
|
|
/// External iterator for a string's decomposition's characters.
|
2013-11-03 17:01:00 -06:00
|
|
|
/// Use with the `std::iter` module.
|
2013-08-10 20:36:38 -05:00
|
|
|
#[deriving(Clone)]
|
2014-05-12 15:44:21 -05:00
|
|
|
pub struct Decompositions<'a> {
|
|
|
|
kind: DecompositionType,
|
2014-03-27 17:09:47 -05:00
|
|
|
iter: Chars<'a>,
|
2014-04-17 17:28:14 -05:00
|
|
|
buffer: Vec<(char, u8)>,
|
2014-03-27 17:09:47 -05:00
|
|
|
sorted: bool
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
2014-05-12 15:44:21 -05:00
|
|
|
impl<'a> Iterator<char> for Decompositions<'a> {
|
2013-08-10 20:36:38 -05:00
|
|
|
#[inline]
|
|
|
|
fn next(&mut self) -> Option<char> {
|
2014-05-12 15:44:21 -05:00
|
|
|
use unicode::normalization::canonical_combining_class;
|
2013-08-10 20:36:38 -05:00
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
match self.buffer.as_slice().head() {
|
2013-08-10 20:36:38 -05:00
|
|
|
Some(&(c, 0)) => {
|
|
|
|
self.sorted = false;
|
|
|
|
self.buffer.shift();
|
|
|
|
return Some(c);
|
|
|
|
}
|
|
|
|
Some(&(c, _)) if self.sorted => {
|
|
|
|
self.buffer.shift();
|
|
|
|
return Some(c);
|
|
|
|
}
|
|
|
|
_ => self.sorted = false
|
|
|
|
}
|
|
|
|
|
|
|
|
let decomposer = match self.kind {
|
2014-05-12 15:44:21 -05:00
|
|
|
Canonical => char::decompose_canonical,
|
|
|
|
Compatible => char::decompose_compatible
|
2013-08-10 20:36:38 -05:00
|
|
|
};
|
|
|
|
|
2013-08-29 10:11:11 -05:00
|
|
|
if !self.sorted {
|
|
|
|
for ch in self.iter {
|
2014-02-07 16:00:45 -06:00
|
|
|
let buffer = &mut self.buffer;
|
|
|
|
let sorted = &mut self.sorted;
|
2013-11-20 16:17:12 -06:00
|
|
|
decomposer(ch, |d| {
|
2013-08-29 10:11:11 -05:00
|
|
|
let class = canonical_combining_class(d);
|
2014-02-07 16:00:45 -06:00
|
|
|
if class == 0 && !*sorted {
|
2014-04-17 17:28:14 -05:00
|
|
|
canonical_sort(buffer.as_mut_slice());
|
2014-02-07 16:00:45 -06:00
|
|
|
*sorted = true;
|
2013-08-29 10:11:11 -05:00
|
|
|
}
|
2014-02-07 16:00:45 -06:00
|
|
|
buffer.push((d, class));
|
2013-11-20 16:17:12 -06:00
|
|
|
});
|
2014-02-07 16:00:45 -06:00
|
|
|
if *sorted { break }
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !self.sorted {
|
2014-04-17 17:28:14 -05:00
|
|
|
canonical_sort(self.buffer.as_mut_slice());
|
2013-08-10 20:36:38 -05:00
|
|
|
self.sorted = true;
|
|
|
|
}
|
|
|
|
|
2013-12-23 09:40:42 -06:00
|
|
|
match self.buffer.shift() {
|
2013-08-10 20:36:38 -05:00
|
|
|
Some((c, 0)) => {
|
|
|
|
self.sorted = false;
|
|
|
|
Some(c)
|
|
|
|
}
|
|
|
|
Some((c, _)) => Some(c),
|
|
|
|
None => None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-29 10:11:11 -05:00
|
|
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
|
|
|
let (lower, _) = self.iter.size_hint();
|
|
|
|
(lower, None)
|
|
|
|
}
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
/// Replace all occurrences of one string with another
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * s - The string containing substrings to replace
|
|
|
|
/// * from - The string to replace
|
|
|
|
/// * to - The replacement string
|
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
2014-04-20 23:49:39 -05:00
|
|
|
/// The original string with all occurrences of `from` replaced with `to`
|
2013-06-06 20:54:14 -05:00
|
|
|
pub fn replace(s: &str, from: &str, to: &str) -> ~str {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::new();
|
2013-06-06 20:54:14 -05:00
|
|
|
let mut last_end = 0;
|
2013-11-23 04:18:51 -06:00
|
|
|
for (start, end) in s.match_indices(from) {
|
2013-06-06 20:54:14 -05:00
|
|
|
result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
|
|
|
|
result.push_str(to);
|
|
|
|
last_end = end;
|
|
|
|
}
|
|
|
|
result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
|
2014-04-02 18:54:22 -05:00
|
|
|
result.into_owned()
|
2013-06-06 20:54:14 -05:00
|
|
|
}
|
|
|
|
|
2012-01-24 03:29:45 -06:00
|
|
|
/*
|
|
|
|
Section: Misc
|
|
|
|
*/
|
|
|
|
|
2014-02-16 16:57:56 -06:00
|
|
|
/// Decode a UTF-16 encoded vector `v` into a string, returning `None`
|
|
|
|
/// if `v` contains any invalid data.
|
2014-02-16 07:52:58 -06:00
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// use std::str;
|
|
|
|
///
|
|
|
|
/// // 𝄞music
|
2014-02-16 16:57:56 -06:00
|
|
|
/// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
|
|
|
|
/// 0x0073, 0x0069, 0x0063];
|
2014-04-15 20:17:48 -05:00
|
|
|
/// assert_eq!(str::from_utf16(v), Some("𝄞music".to_owned()));
|
2014-02-16 16:57:56 -06:00
|
|
|
///
|
|
|
|
/// // 𝄞mu<invalid>ic
|
|
|
|
/// v[4] = 0xD800;
|
|
|
|
/// assert_eq!(str::from_utf16(v), None);
|
2014-02-16 07:52:58 -06:00
|
|
|
/// ```
|
2014-02-16 16:57:56 -06:00
|
|
|
pub fn from_utf16(v: &[u16]) -> Option<~str> {
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut s = StrBuf::with_capacity(v.len() / 2);
|
2014-02-16 16:57:56 -06:00
|
|
|
for c in utf16_items(v) {
|
|
|
|
match c {
|
|
|
|
ScalarValue(c) => s.push_char(c),
|
|
|
|
LoneSurrogate(_) => return None
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
Some(s.into_owned())
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
|
2014-02-16 07:52:58 -06:00
|
|
|
/// Decode a UTF-16 encoded vector `v` into a string, replacing
|
|
|
|
/// invalid data with the replacement character (U+FFFD).
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
/// ```rust
|
|
|
|
/// use std::str;
|
|
|
|
///
|
|
|
|
/// // 𝄞mus<invalid>ic<invalid>
|
|
|
|
/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
|
|
|
|
/// 0x0073, 0xDD1E, 0x0069, 0x0063,
|
|
|
|
/// 0xD834];
|
|
|
|
///
|
|
|
|
/// assert_eq!(str::from_utf16_lossy(v),
|
2014-04-15 20:17:48 -05:00
|
|
|
/// "𝄞mus\uFFFDic\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
/// ```
|
|
|
|
pub fn from_utf16_lossy(v: &[u16]) -> ~str {
|
|
|
|
utf16_items(v).map(|c| c.to_char_lossy()).collect()
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
|
2013-08-02 11:34:00 -05:00
|
|
|
// Return the initial codepoint accumulator for the first byte.
|
|
|
|
// The first byte is special, only want bottom 5 bits for width 2, 4 bits
|
|
|
|
// for width 3, and 3 bits for width 4
|
|
|
|
macro_rules! utf8_first_byte(
|
2014-02-06 01:56:27 -06:00
|
|
|
($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
|
2013-08-02 11:34:00 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
// return the value of $ch updated with continuation byte $byte
|
|
|
|
macro_rules! utf8_acc_cont_byte(
|
2014-02-06 01:56:27 -06:00
|
|
|
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
|
2013-08-02 11:34:00 -05:00
|
|
|
)
|
|
|
|
|
2013-08-07 01:03:31 -05:00
|
|
|
static TAG_CONT_U8: u8 = 128u8;
|
2012-01-24 03:29:45 -06:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
/// Converts a vector of bytes to a new utf-8 string.
|
|
|
|
/// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
|
|
|
|
/// let output = std::str::from_utf8_lossy(input);
|
2014-02-07 16:58:37 -06:00
|
|
|
/// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
|
2014-02-06 01:56:27 -06:00
|
|
|
/// ```
|
2014-02-07 16:58:37 -06:00
|
|
|
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
|
2014-05-01 01:06:36 -05:00
|
|
|
if is_utf8(v) {
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
return Slice(unsafe { mem::transmute(v) })
|
2014-05-01 01:06:36 -05:00
|
|
|
}
|
2014-02-07 16:58:37 -06:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
|
2014-05-01 01:06:36 -05:00
|
|
|
let mut i = 0;
|
2014-02-06 01:56:27 -06:00
|
|
|
let total = v.len();
|
|
|
|
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
|
|
|
|
unsafe { *xs.unsafe_ref(i) }
|
|
|
|
}
|
|
|
|
fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
|
|
|
|
if i >= total {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
unsafe_get(xs, i)
|
|
|
|
}
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
|
|
|
|
let mut res = StrBuf::with_capacity(total);
|
2014-02-06 01:56:27 -06:00
|
|
|
|
2014-02-07 16:58:37 -06:00
|
|
|
if i > 0 {
|
2014-04-02 18:54:22 -05:00
|
|
|
unsafe {
|
|
|
|
res.push_bytes(v.slice_to(i))
|
|
|
|
};
|
2014-02-07 16:58:37 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// subseqidx is the index of the first byte of the subsequence we're looking at.
|
|
|
|
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
|
|
|
|
// them one by one.
|
2014-05-01 01:06:36 -05:00
|
|
|
let mut subseqidx = 0;
|
2014-02-07 16:58:37 -06:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
while i < total {
|
|
|
|
let i_ = i;
|
|
|
|
let byte = unsafe_get(v, i);
|
|
|
|
i += 1;
|
|
|
|
|
2014-02-07 16:58:37 -06:00
|
|
|
macro_rules! error(() => ({
|
2014-02-06 01:56:27 -06:00
|
|
|
unsafe {
|
2014-02-07 16:58:37 -06:00
|
|
|
if subseqidx != i_ {
|
2014-04-02 18:54:22 -05:00
|
|
|
res.push_bytes(v.slice(subseqidx, i_));
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
2014-02-07 16:58:37 -06:00
|
|
|
subseqidx = i;
|
2014-04-02 18:54:22 -05:00
|
|
|
res.push_bytes(REPLACEMENT);
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
2014-02-07 16:58:37 -06:00
|
|
|
}))
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
if byte < 128u8 {
|
2014-02-07 16:58:37 -06:00
|
|
|
// subseqidx handles this
|
2014-02-06 01:56:27 -06:00
|
|
|
} else {
|
|
|
|
let w = utf8_char_width(byte);
|
|
|
|
|
|
|
|
match w {
|
|
|
|
2 => {
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
3 => {
|
|
|
|
match (byte, safe_get(v, i, total)) {
|
|
|
|
(0xE0 , 0xA0 .. 0xBF) => (),
|
|
|
|
(0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
|
|
|
|
(0xED , 0x80 .. 0x9F) => (),
|
|
|
|
(0xEE .. 0xEF, 0x80 .. 0xBF) => (),
|
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
4 => {
|
|
|
|
match (byte, safe_get(v, i, total)) {
|
|
|
|
(0xF0 , 0x90 .. 0xBF) => (),
|
|
|
|
(0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
|
|
|
|
(0xF4 , 0x80 .. 0x8F) => (),
|
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-02-07 16:58:37 -06:00
|
|
|
if subseqidx < total {
|
2014-04-02 18:54:22 -05:00
|
|
|
unsafe {
|
|
|
|
res.push_bytes(v.slice(subseqidx, total))
|
|
|
|
};
|
2014-02-07 16:58:37 -06:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
Owned(res.into_owned())
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
|
|
|
|
2014-02-07 18:36:59 -06:00
|
|
|
/*
|
|
|
|
Section: MaybeOwned
|
|
|
|
*/
|
|
|
|
|
|
|
|
/// A MaybeOwned is a string that can hold either a ~str or a &str.
|
|
|
|
/// This can be useful as an optimization when an allocation is sometimes
|
|
|
|
/// needed but not always.
|
|
|
|
pub enum MaybeOwned<'a> {
|
|
|
|
/// A borrowed string
|
|
|
|
Slice(&'a str),
|
|
|
|
/// An owned string
|
|
|
|
Owned(~str)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// SendStr is a specialization of `MaybeOwned` to be sendable
|
|
|
|
pub type SendStr = MaybeOwned<'static>;
|
|
|
|
|
|
|
|
impl<'a> MaybeOwned<'a> {
|
|
|
|
/// Returns `true` if this `MaybeOwned` wraps an owned string
|
|
|
|
#[inline]
|
|
|
|
pub fn is_owned(&self) -> bool {
|
|
|
|
match *self {
|
|
|
|
Slice(_) => false,
|
|
|
|
Owned(_) => true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns `true` if this `MaybeOwned` wraps a borrowed string
|
|
|
|
#[inline]
|
|
|
|
pub fn is_slice(&self) -> bool {
|
|
|
|
match *self {
|
|
|
|
Slice(_) => true,
|
|
|
|
Owned(_) => false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Trait for moving into a `MaybeOwned`
|
|
|
|
pub trait IntoMaybeOwned<'a> {
|
|
|
|
/// Moves self into a `MaybeOwned`
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a>;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> IntoMaybeOwned<'a> for ~str {
|
|
|
|
#[inline]
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self) }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> IntoMaybeOwned<'a> for &'a str {
|
|
|
|
#[inline]
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Eq for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &MaybeOwned) -> bool {
|
2014-03-23 06:54:42 -05:00
|
|
|
self.as_slice() == other.as_slice()
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-23 06:54:42 -05:00
|
|
|
impl<'a> TotalEq for MaybeOwned<'a> {}
|
2014-02-07 18:36:59 -06:00
|
|
|
|
|
|
|
impl<'a> Ord for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn lt(&self, other: &MaybeOwned) -> bool {
|
|
|
|
self.as_slice().lt(&other.as_slice())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> TotalOrd for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn cmp(&self, other: &MaybeOwned) -> Ordering {
|
|
|
|
self.as_slice().cmp(&other.as_slice())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn equiv(&self, other: &S) -> bool {
|
2014-03-23 06:54:42 -05:00
|
|
|
self.as_slice() == other.as_slice()
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Str for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn as_slice<'b>(&'b self) -> &'b str {
|
|
|
|
match *self {
|
|
|
|
Slice(s) => s,
|
|
|
|
Owned(ref s) => s.as_slice()
|
|
|
|
}
|
|
|
|
}
|
2014-05-01 01:06:36 -05:00
|
|
|
}
|
2014-02-07 18:36:59 -06:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
impl<'a> StrAllocating for MaybeOwned<'a> {
|
2014-02-07 18:36:59 -06:00
|
|
|
#[inline]
|
|
|
|
fn into_owned(self) -> ~str {
|
|
|
|
match self {
|
|
|
|
Slice(s) => s.to_owned(),
|
|
|
|
Owned(s) => s
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Container for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn len(&self) -> uint { self.as_slice().len() }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Clone for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn clone(&self) -> MaybeOwned<'a> {
|
|
|
|
match *self {
|
|
|
|
Slice(s) => Slice(s),
|
|
|
|
Owned(ref s) => Owned(s.to_owned())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Default for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn default() -> MaybeOwned<'a> { Slice("") }
|
|
|
|
}
|
|
|
|
|
2014-02-25 10:03:41 -06:00
|
|
|
impl<'a, H: Writer> ::hash::Hash<H> for MaybeOwned<'a> {
|
2014-02-07 18:36:59 -06:00
|
|
|
#[inline]
|
2014-02-25 10:03:41 -06:00
|
|
|
fn hash(&self, hasher: &mut H) {
|
|
|
|
match *self {
|
|
|
|
Slice(s) => s.hash(hasher),
|
|
|
|
Owned(ref s) => s.hash(hasher),
|
|
|
|
}
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> fmt::Show for MaybeOwned<'a> {
|
|
|
|
#[inline]
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
match *self {
|
|
|
|
Slice(ref s) => s.fmt(f),
|
|
|
|
Owned(ref s) => s.fmt(f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Unsafe operations
|
2012-09-28 17:41:10 -05:00
|
|
|
pub mod raw {
|
2012-12-23 16:41:37 -06:00
|
|
|
use libc;
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
use mem;
|
2014-01-06 18:48:51 -06:00
|
|
|
use ptr::RawPtr;
|
2014-05-04 02:25:44 -05:00
|
|
|
use raw::Slice;
|
2014-05-07 23:53:11 -05:00
|
|
|
use slice::CloneableVector;
|
|
|
|
use str::{is_utf8, StrAllocating};
|
2012-01-30 21:52:38 -06:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
|
|
|
|
pub use core::str::raw::{slice_unchecked};
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Create a Rust string from a *u8 buffer of the given length
|
2013-08-04 15:22:56 -05:00
|
|
|
pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
|
2014-05-03 23:26:11 -05:00
|
|
|
let v = Slice { data: buf, len: len };
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
let bytes: &[u8] = ::mem::transmute(v);
|
2014-05-03 23:26:11 -05:00
|
|
|
assert!(is_utf8(bytes));
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
let s: &str = ::mem::transmute(bytes);
|
2014-05-03 23:26:11 -05:00
|
|
|
s.to_owned()
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2013-08-10 12:42:53 -05:00
|
|
|
#[lang="strdup_uniq"]
|
|
|
|
#[cfg(not(test))]
|
|
|
|
#[inline]
|
2014-03-16 18:50:22 -05:00
|
|
|
unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
|
2013-08-10 12:42:53 -05:00
|
|
|
from_buf_len(ptr, len)
|
|
|
|
}
|
|
|
|
|
2012-07-04 16:53:12 -05:00
|
|
|
/// Create a Rust string from a null-terminated C string
|
2013-08-04 16:08:20 -05:00
|
|
|
pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
|
|
|
|
let mut curr = buf;
|
|
|
|
let mut i = 0;
|
|
|
|
while *curr != 0 {
|
|
|
|
i += 1;
|
2014-02-10 15:50:42 -06:00
|
|
|
curr = buf.offset(i);
|
2013-08-04 16:08:20 -05:00
|
|
|
}
|
|
|
|
from_buf_len(buf as *u8, i as uint)
|
2012-03-19 17:25:26 -05:00
|
|
|
}
|
|
|
|
|
2013-06-28 16:05:10 -05:00
|
|
|
/// Converts an owned vector of bytes to a new owned string. This assumes
|
|
|
|
/// that the utf-8-ness of the vector has already been validated
|
2013-08-04 15:22:56 -05:00
|
|
|
#[inline]
|
2013-09-05 07:17:24 -05:00
|
|
|
pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
mem::transmute(v)
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2012-07-24 14:35:34 -05:00
|
|
|
/// Converts a byte to a string.
|
2014-04-25 03:08:02 -05:00
|
|
|
pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(box [u]) }
|
2012-02-01 05:25:04 -06:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Access the str in its vector representation.
|
|
|
|
/// The caller must preserve the valid UTF-8 property when modifying.
|
|
|
|
#[inline]
|
|
|
|
pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
mem::transmute(s)
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Sets the length of a string
|
2013-09-25 18:18:50 -05:00
|
|
|
///
|
2014-05-01 01:06:36 -05:00
|
|
|
/// This will explicitly set the size of the string, without actually
|
|
|
|
/// modifying its buffers, so it is up to the caller to ensure that
|
|
|
|
/// the string is actually the specified size.
|
|
|
|
#[test]
|
|
|
|
fn test_from_buf_len() {
|
|
|
|
use slice::ImmutableVector;
|
|
|
|
use str::StrAllocating;
|
2013-09-25 18:18:50 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
unsafe {
|
|
|
|
let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
|
|
|
|
let b = a.as_ptr();
|
|
|
|
let c = from_buf_len(b, 3u);
|
|
|
|
assert_eq!(c, "AAA".to_owned());
|
2013-09-25 18:18:50 -05:00
|
|
|
}
|
|
|
|
}
|
2014-05-01 01:06:36 -05:00
|
|
|
}
|
2013-09-25 18:18:50 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/*
|
|
|
|
Section: Trait implementations
|
|
|
|
*/
|
2013-06-11 06:37:22 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Any string that can be represented as a slice
|
|
|
|
pub trait StrAllocating: Str {
|
|
|
|
/// Convert `self` into a ~str, not making a copy if possible.
|
|
|
|
fn into_owned(self) -> ~str;
|
2013-06-11 06:37:22 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Convert `self` into a `StrBuf`.
|
2013-10-17 01:02:46 -05:00
|
|
|
#[inline]
|
2014-05-01 01:06:36 -05:00
|
|
|
fn to_strbuf(&self) -> StrBuf {
|
|
|
|
StrBuf::from_str(self.as_slice())
|
2013-03-04 21:36:15 -06:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Convert `self` into a `StrBuf`, not making a copy if possible.
|
2013-10-17 01:02:46 -05:00
|
|
|
#[inline]
|
2014-05-01 01:06:36 -05:00
|
|
|
fn into_strbuf(self) -> StrBuf {
|
|
|
|
StrBuf::from_owned_str(self.into_owned())
|
2013-06-10 10:03:16 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Escape each char in `s` with `char::escape_default`.
|
2013-06-11 07:13:23 -05:00
|
|
|
fn escape_default(&self) -> ~str {
|
2014-05-01 01:06:36 -05:00
|
|
|
let me = self.as_slice();
|
|
|
|
let mut out = StrBuf::with_capacity(me.len());
|
|
|
|
for c in me.chars() {
|
2013-11-20 16:17:12 -06:00
|
|
|
c.escape_default(|c| out.push_char(c));
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
out.into_owned()
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Escape each char in `s` with `char::escape_unicode`.
|
2013-06-11 07:13:23 -05:00
|
|
|
fn escape_unicode(&self) -> ~str {
|
2014-05-01 01:06:36 -05:00
|
|
|
let me = self.as_slice();
|
|
|
|
let mut out = StrBuf::with_capacity(me.len());
|
|
|
|
for c in me.chars() {
|
2013-11-20 16:17:12 -06:00
|
|
|
c.escape_unicode(|c| out.push_char(c));
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
out.into_owned()
|
2013-06-11 07:13:23 -05:00
|
|
|
}
|
2012-07-23 13:51:12 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Replace all occurrences of one string with another.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
|
|
|
/// * `from` - The string to replace
|
|
|
|
/// * `to` - The replacement string
|
|
|
|
///
|
|
|
|
/// # Return value
|
|
|
|
///
|
|
|
|
/// The original string with all occurrences of `from` replaced with `to`.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let s = "Do you know the muffin man,
|
|
|
|
/// The muffin man, the muffin man, ...".to_owned();
|
|
|
|
///
|
|
|
|
/// assert_eq!(s.replace("muffin man", "little lamb"),
|
|
|
|
/// "Do you know the little lamb,
|
|
|
|
/// The little lamb, the little lamb, ...".to_owned());
|
|
|
|
///
|
|
|
|
/// // not found, so no change.
|
|
|
|
/// assert_eq!(s.replace("cookie monster", "little lamb"), s);
|
|
|
|
/// ```
|
2013-08-09 03:25:24 -05:00
|
|
|
fn replace(&self, from: &str, to: &str) -> ~str {
|
2014-05-01 01:06:36 -05:00
|
|
|
let me = self.as_slice();
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut result = StrBuf::new();
|
2013-06-14 21:40:11 -05:00
|
|
|
let mut last_end = 0;
|
2014-05-01 01:06:36 -05:00
|
|
|
for (start, end) in me.match_indices(from) {
|
|
|
|
result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
|
2013-06-11 06:46:40 -05:00
|
|
|
result.push_str(to);
|
|
|
|
last_end = end;
|
|
|
|
}
|
2014-05-01 01:06:36 -05:00
|
|
|
result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
|
2014-04-02 18:54:22 -05:00
|
|
|
result.into_owned()
|
2013-06-11 06:46:40 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Copy a slice into a new owned str.
|
2013-08-04 15:22:56 -05:00
|
|
|
#[inline]
|
|
|
|
fn to_owned(&self) -> ~str {
|
2014-05-07 23:53:11 -05:00
|
|
|
use slice::Vector;
|
|
|
|
|
2013-12-17 09:37:30 -06:00
|
|
|
unsafe {
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
::mem::transmute(self.as_slice().as_bytes().to_owned())
|
2013-12-17 09:37:30 -06:00
|
|
|
}
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Converts to a vector of `u16` encoded as UTF-16.
|
2014-05-03 23:26:11 -05:00
|
|
|
fn to_utf16(&self) -> Vec<u16> {
|
2014-05-01 01:06:36 -05:00
|
|
|
let me = self.as_slice();
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut u = Vec::new();
|
2014-05-01 01:06:36 -05:00
|
|
|
for ch in me.chars() {
|
2014-04-11 14:49:31 -05:00
|
|
|
let mut buf = [0u16, ..2];
|
|
|
|
let n = ch.encode_utf16(buf /* as mut slice! */);
|
|
|
|
u.push_all(buf.slice_to(n));
|
2013-06-13 10:44:15 -05:00
|
|
|
}
|
2014-05-03 23:26:11 -05:00
|
|
|
u
|
2013-06-13 10:44:15 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Given a string, make a new string with repeated copies of it.
|
2013-08-04 15:22:56 -05:00
|
|
|
fn repeat(&self, nn: uint) -> ~str {
|
2014-05-01 01:06:36 -05:00
|
|
|
let me = self.as_slice();
|
|
|
|
let mut ret = StrBuf::with_capacity(nn * me.len());
|
2013-09-10 17:53:21 -05:00
|
|
|
for _ in range(0, nn) {
|
2014-05-01 01:06:36 -05:00
|
|
|
ret.push_str(me);
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
ret.into_owned()
|
2013-08-04 15:22:56 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// Levenshtein Distance between two strings.
|
2013-06-13 10:39:06 -05:00
|
|
|
fn lev_distance(&self, t: &str) -> uint {
|
2014-05-01 01:06:36 -05:00
|
|
|
let me = self.as_slice();
|
|
|
|
let slen = me.len();
|
2013-06-13 10:39:06 -05:00
|
|
|
let tlen = t.len();
|
|
|
|
|
|
|
|
if slen == 0 { return tlen; }
|
|
|
|
if tlen == 0 { return slen; }
|
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
let mut dcol = Vec::from_fn(tlen + 1, |x| x);
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
for (i, sc) in me.chars().enumerate() {
|
2013-06-13 10:39:06 -05:00
|
|
|
|
|
|
|
let mut current = i;
|
2014-04-17 17:28:14 -05:00
|
|
|
*dcol.get_mut(0) = current + 1;
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
for (j, tc) in t.chars().enumerate() {
|
2013-06-13 10:39:06 -05:00
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
let next = *dcol.get(j + 1);
|
2013-06-13 10:39:06 -05:00
|
|
|
|
|
|
|
if sc == tc {
|
2014-04-17 17:28:14 -05:00
|
|
|
*dcol.get_mut(j + 1) = current;
|
2013-06-13 10:39:06 -05:00
|
|
|
} else {
|
2014-04-17 17:28:14 -05:00
|
|
|
*dcol.get_mut(j + 1) = ::cmp::min(current, next);
|
|
|
|
*dcol.get_mut(j + 1) = ::cmp::min(*dcol.get(j + 1),
|
|
|
|
*dcol.get(j)) + 1;
|
2013-06-13 10:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
current = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-17 17:28:14 -05:00
|
|
|
return *dcol.get(tlen);
|
2013-06-13 10:39:06 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// An Iterator over the string in Unicode Normalization Form D
|
|
|
|
/// (canonical decomposition).
|
|
|
|
#[inline]
|
2014-05-12 15:44:21 -05:00
|
|
|
fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
|
|
|
|
Decompositions {
|
2014-05-01 01:06:36 -05:00
|
|
|
iter: self.as_slice().chars(),
|
|
|
|
buffer: Vec::new(),
|
|
|
|
sorted: false,
|
2014-05-12 15:44:21 -05:00
|
|
|
kind: Canonical
|
2014-05-01 01:06:36 -05:00
|
|
|
}
|
2013-06-13 10:39:06 -05:00
|
|
|
}
|
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
/// An Iterator over the string in Unicode Normalization Form KD
|
|
|
|
/// (compatibility decomposition).
|
2013-07-10 19:33:11 -05:00
|
|
|
#[inline]
|
2014-05-12 15:44:21 -05:00
|
|
|
fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
|
|
|
|
Decompositions {
|
2014-05-01 01:06:36 -05:00
|
|
|
iter: self.as_slice().chars(),
|
|
|
|
buffer: Vec::new(),
|
|
|
|
sorted: false,
|
2014-05-12 15:44:21 -05:00
|
|
|
kind: Compatible
|
2014-05-01 01:06:36 -05:00
|
|
|
}
|
2013-07-10 19:33:11 -05:00
|
|
|
}
|
2012-03-16 19:35:38 -05:00
|
|
|
}
|
2012-01-24 03:29:45 -06:00
|
|
|
|
2014-05-01 01:06:36 -05:00
|
|
|
impl<'a> StrAllocating for &'a str {
|
|
|
|
#[inline]
|
|
|
|
fn into_owned(self) -> ~str { self.to_owned() }
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> StrAllocating for ~str {
|
|
|
|
#[inline]
|
|
|
|
fn into_owned(self) -> ~str { self }
|
|
|
|
}
|
|
|
|
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Methods for owned strings
|
2013-01-31 19:12:29 -06:00
|
|
|
pub trait OwnedStr {
|
2013-09-25 18:18:50 -05:00
|
|
|
/// Consumes the string, returning the underlying byte buffer.
|
|
|
|
///
|
|
|
|
/// The buffer does not have a null terminator.
|
2013-08-24 01:37:22 -05:00
|
|
|
fn into_bytes(self) -> ~[u8];
|
2013-06-10 22:10:37 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
/// Pushes the given string onto this string, returning the concatenation of the two strings.
|
|
|
|
fn append(self, rhs: &str) -> ~str;
|
2013-01-31 19:12:29 -06:00
|
|
|
}
|
|
|
|
|
2013-02-26 19:12:00 -06:00
|
|
|
impl OwnedStr for ~str {
|
2013-08-24 01:37:22 -05:00
|
|
|
#[inline]
|
|
|
|
fn into_bytes(self) -> ~[u8] {
|
core: Remove the cast module
This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.
* transmute - This function was moved to `mem`, but it is now marked as
#[unstable]. This is due to planned changes to the `transmute`
function and how it can be invoked (see the #[unstable] comment).
For more information, see RFC 5 and #12898
* transmute_copy - This function was moved to `mem`, with clarification that is
is not an error to invoke it with T/U that are different
sizes, but rather that it is strongly discouraged. This
function is now #[stable]
* forget - This function was moved to `mem` and marked #[stable]
* bump_box_refcount - This function was removed due to the deprecation of
managed boxes as well as its questionable utility.
* transmute_mut - This function was previously deprecated, and removed as part
of this commit.
* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
can be achieved with an `as` in safe code, so it was
removed.
* transmute_lifetime - This function was removed because it is likely a strong
indication that code is incorrect in the first place.
* transmute_mut_lifetime - This function was removed for the same reasons as
`transmute_lifetime`
* copy_lifetime - This function was moved to `mem`, but it is marked
`#[unstable]` now due to the likelihood of being removed in
the future if it is found to not be very useful.
* copy_mut_lifetime - This function was also moved to `mem`, but had the same
treatment as `copy_lifetime`.
* copy_lifetime_vec - This function was removed because it is not used today,
and its existence is not necessary with DST
(copy_lifetime will suffice).
In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.
transmute - #[unstable]
transmute_copy - #[stable]
forget - #[stable]
copy_lifetime - #[unstable]
copy_mut_lifetime - #[unstable]
[breaking-change]
2014-05-09 12:34:51 -05:00
|
|
|
unsafe { mem::transmute(self) }
|
2013-08-24 01:37:22 -05:00
|
|
|
}
|
2013-08-24 00:05:35 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
|
|
|
fn append(self, rhs: &str) -> ~str {
|
|
|
|
let mut new_str = StrBuf::from_owned_str(self);
|
|
|
|
new_str.push_str(rhs);
|
|
|
|
new_str.into_owned()
|
|
|
|
}
|
2013-01-31 19:12:29 -06:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2014-01-07 00:33:37 -06:00
|
|
|
use iter::AdditiveIterator;
|
2014-02-22 19:07:11 -06:00
|
|
|
use default::Default;
|
2014-01-07 00:33:37 -06:00
|
|
|
use prelude::*;
|
2013-01-08 21:37:25 -06:00
|
|
|
use str::*;
|
2014-04-02 18:54:22 -05:00
|
|
|
use strbuf::StrBuf;
|
2012-03-23 16:41:02 -05:00
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
|
|
|
fn test_eq() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert!((eq(&"".to_owned(), &"".to_owned())));
|
|
|
|
assert!((eq(&"foo".to_owned(), &"foo".to_owned())));
|
|
|
|
assert!((!eq(&"foo".to_owned(), &"bar".to_owned())));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2012-09-03 12:47:10 -05:00
|
|
|
#[test]
|
|
|
|
fn test_eq_slice() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert!((eq_slice("foobar".slice(0, 3), "foo")));
|
|
|
|
assert!((eq_slice("barfoo".slice(3, 6), "foo")));
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!((!eq_slice("foo1", "foo2")));
|
2012-09-03 12:47:10 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-02-03 05:28:49 -06:00
|
|
|
fn test_le() {
|
2013-06-13 22:37:47 -05:00
|
|
|
assert!("" <= "");
|
|
|
|
assert!("" <= "foo");
|
|
|
|
assert!("foo" <= "foo");
|
2013-06-27 10:45:24 -05:00
|
|
|
assert!("foo" != "bar");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2012-02-12 03:32:09 -06:00
|
|
|
fn test_len() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("".len(), 0u);
|
|
|
|
assert_eq!("hello world".len(), 11u);
|
|
|
|
assert_eq!("\x63".len(), 1u);
|
|
|
|
assert_eq!("\xa2".len(), 2u);
|
|
|
|
assert_eq!("\u03c0".len(), 2u);
|
|
|
|
assert_eq!("\u2620".len(), 3u);
|
|
|
|
assert_eq!("\U0001d11e".len(), 4u);
|
2013-05-18 21:02:45 -05:00
|
|
|
|
2013-06-11 06:37:22 -05:00
|
|
|
assert_eq!("".char_len(), 0u);
|
|
|
|
assert_eq!("hello world".char_len(), 11u);
|
|
|
|
assert_eq!("\x63".char_len(), 1u);
|
|
|
|
assert_eq!("\xa2".char_len(), 1u);
|
|
|
|
assert_eq!("\u03c0".char_len(), 1u);
|
|
|
|
assert_eq!("\u2620".char_len(), 1u);
|
|
|
|
assert_eq!("\U0001d11e".char_len(), 1u);
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-06-09 22:09:51 -05:00
|
|
|
fn test_find() {
|
|
|
|
assert_eq!("hello".find('l'), Some(2u));
|
|
|
|
assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
|
|
|
|
assert!("hello".find('x').is_none());
|
|
|
|
assert!("hello".find(|c:char| c == 'x').is_none());
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_rfind() {
|
|
|
|
assert_eq!("hello".rfind('l'), Some(3u));
|
|
|
|
assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
|
|
|
|
assert!("hello".rfind('x').is_none());
|
|
|
|
assert!("hello".rfind(|c:char| c == 'x').is_none());
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
|
|
|
|
assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
|
2012-02-11 07:03:03 -06:00
|
|
|
}
|
|
|
|
|
2013-07-28 19:40:28 -05:00
|
|
|
#[test]
|
|
|
|
fn test_collect() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let empty = "".to_owned();
|
2013-11-23 04:18:51 -06:00
|
|
|
let s: ~str = empty.chars().collect();
|
2013-09-27 19:02:31 -05:00
|
|
|
assert_eq!(empty, s);
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中".to_owned();
|
2013-11-23 04:18:51 -06:00
|
|
|
let s: ~str = data.chars().collect();
|
2013-09-27 19:02:31 -05:00
|
|
|
assert_eq!(data, s);
|
2013-07-28 19:40:28 -05:00
|
|
|
}
|
|
|
|
|
2013-08-24 01:37:22 -05:00
|
|
|
#[test]
|
|
|
|
fn test_into_bytes() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "asdf".to_owned();
|
2013-08-24 01:37:22 -05:00
|
|
|
let buf = data.into_bytes();
|
|
|
|
assert_eq!(bytes!("asdf"), buf.as_slice());
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-02-23 09:59:30 -06:00
|
|
|
fn test_find_str() {
|
2012-02-13 02:17:59 -06:00
|
|
|
// byte positions
|
2013-06-10 01:23:05 -05:00
|
|
|
assert_eq!("".find_str(""), Some(0u));
|
|
|
|
assert!("banana".find_str("apple pie").is_none());
|
2012-02-16 21:16:08 -06:00
|
|
|
|
2013-05-23 11:39:17 -05:00
|
|
|
let data = "abcabc";
|
2013-06-10 01:23:05 -05:00
|
|
|
assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
|
2013-06-10 08:01:45 -05:00
|
|
|
assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
|
2013-06-10 01:23:05 -05:00
|
|
|
assert!(data.slice(2u, 4u).find_str("ab").is_none());
|
2012-02-13 05:07:29 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let mut data = "ประเทศไทย中华Việt Nam".to_owned();
|
2012-10-19 08:01:01 -05:00
|
|
|
data = data + data;
|
2013-06-10 01:23:05 -05:00
|
|
|
assert!(data.find_str("ไท华").is_none());
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
|
|
|
|
assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
|
|
|
|
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
|
|
|
|
assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
|
|
|
|
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
|
|
|
|
assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-06-11 06:37:22 -05:00
|
|
|
fn test_slice_chars() {
|
|
|
|
fn t(a: &str, b: &str, start: uint) {
|
|
|
|
assert_eq!(a.slice_chars(start, start + b.char_len()), b);
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
2013-08-29 10:11:11 -05:00
|
|
|
t("", "", 0);
|
2013-03-21 16:59:33 -05:00
|
|
|
t("hello", "llo", 2);
|
|
|
|
t("hello", "el", 1);
|
2013-08-29 10:11:11 -05:00
|
|
|
t("αβλ", "β", 1);
|
|
|
|
t("αβλ", "", 3);
|
2013-06-11 06:37:22 -05:00
|
|
|
assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_concat() {
|
2012-09-21 20:36:32 -05:00
|
|
|
fn t(v: &[~str], s: &str) {
|
2013-06-02 22:19:37 -05:00
|
|
|
assert_eq!(v.concat(), s.to_str());
|
2012-09-21 20:36:32 -05:00
|
|
|
}
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
|
|
|
|
"no".to_owned(), "good".to_owned()], "youknowI'mnogood");
|
2013-05-23 11:39:17 -05:00
|
|
|
let v: &[~str] = [];
|
|
|
|
t(v, "");
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["hi".to_owned()], "hi");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_connect() {
|
2012-09-21 20:36:32 -05:00
|
|
|
fn t(v: &[~str], sep: &str, s: &str) {
|
2013-06-02 22:19:37 -05:00
|
|
|
assert_eq!(v.connect(sep), s.to_str());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
|
|
|
|
"no".to_owned(), "good".to_owned()],
|
2013-05-23 11:39:17 -05:00
|
|
|
" ", "you know I'm no good");
|
2013-05-27 18:04:00 -05:00
|
|
|
let v: &[~str] = [];
|
2013-05-23 11:39:17 -05:00
|
|
|
t(v, " ", "");
|
2014-04-15 20:17:48 -05:00
|
|
|
t(["hi".to_owned()], " ", "hi");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-02 22:19:37 -05:00
|
|
|
#[test]
|
|
|
|
fn test_concat_slices() {
|
|
|
|
fn t(v: &[&str], s: &str) {
|
|
|
|
assert_eq!(v.concat(), s.to_str());
|
|
|
|
}
|
|
|
|
t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
|
|
|
|
let v: &[&str] = [];
|
|
|
|
t(v, "");
|
|
|
|
t(["hi"], "hi");
|
|
|
|
}
|
|
|
|
|
2013-02-07 21:33:12 -06:00
|
|
|
#[test]
|
|
|
|
fn test_connect_slices() {
|
|
|
|
fn t(v: &[&str], sep: &str, s: &str) {
|
2013-06-02 22:19:37 -05:00
|
|
|
assert_eq!(v.connect(sep), s.to_str());
|
2013-02-07 21:33:12 -06:00
|
|
|
}
|
|
|
|
t(["you", "know", "I'm", "no", "good"],
|
|
|
|
" ", "you know I'm no good");
|
|
|
|
t([], " ", "");
|
|
|
|
t(["hi"], " ", "hi");
|
|
|
|
}
|
|
|
|
|
2012-10-11 18:54:31 -05:00
|
|
|
#[test]
|
|
|
|
fn test_repeat() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("x".repeat(4), "xxxx".to_owned());
|
|
|
|
assert_eq!("hi".repeat(4), "hihihihi".to_owned());
|
|
|
|
assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_owned());
|
|
|
|
assert_eq!("".repeat(4), "".to_owned());
|
|
|
|
assert_eq!("hi".repeat(0), "".to_owned());
|
2012-10-11 18:54:31 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-06-24 22:18:18 -05:00
|
|
|
fn test_unsafe_slice() {
|
2013-05-18 21:02:45 -05:00
|
|
|
assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
|
|
|
|
assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
|
|
|
|
assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
|
2013-03-21 05:58:03 -05:00
|
|
|
fn a_million_letter_a() -> ~str {
|
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
|
|
|
while i < 100000 {
|
|
|
|
rs.push_str("aaaaaaaaaa");
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
rs.into_owned()
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
2013-03-21 05:58:03 -05:00
|
|
|
fn half_a_million_letter_a() -> ~str {
|
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
|
|
|
while i < 100000 {
|
|
|
|
rs.push_str("aaaaa");
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
rs.into_owned()
|
2013-03-21 05:58:03 -05:00
|
|
|
}
|
|
|
|
let letters = a_million_letter_a();
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(half_a_million_letter_a() ==
|
2013-03-21 06:36:21 -05:00
|
|
|
unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_starts_with() {
|
2013-06-10 10:03:16 -05:00
|
|
|
assert!(("".starts_with("")));
|
|
|
|
assert!(("abc".starts_with("")));
|
|
|
|
assert!(("abc".starts_with("a")));
|
|
|
|
assert!((!"a".starts_with("abc")));
|
|
|
|
assert!((!"".starts_with("abc")));
|
2013-10-18 00:32:46 -05:00
|
|
|
assert!((!"ödd".starts_with("-")));
|
|
|
|
assert!(("ödd".starts_with("öd")));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_ends_with() {
|
2013-06-10 10:03:16 -05:00
|
|
|
assert!(("".ends_with("")));
|
|
|
|
assert!(("abc".ends_with("")));
|
|
|
|
assert!(("abc".ends_with("c")));
|
|
|
|
assert!((!"a".ends_with("abc")));
|
|
|
|
assert!((!"".ends_with("abc")));
|
2013-10-18 00:32:46 -05:00
|
|
|
assert!((!"ddö".ends_with("-")));
|
|
|
|
assert!(("ddö".ends_with("dö")));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_is_empty() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert!("".is_empty());
|
|
|
|
assert!(!"a".is_empty());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace() {
|
2013-05-23 11:39:17 -05:00
|
|
|
let a = "a";
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("".replace(a, "b"), "".to_owned());
|
|
|
|
assert_eq!("a".replace(a, "b"), "b".to_owned());
|
|
|
|
assert_eq!("ab".replace(a, "b"), "bb".to_owned());
|
2013-05-23 11:39:17 -05:00
|
|
|
let test = "test";
|
2013-06-11 06:46:40 -05:00
|
|
|
assert!(" test test ".replace(test, "toast") ==
|
2014-04-15 20:17:48 -05:00
|
|
|
" toast toast ".to_owned());
|
|
|
|
assert_eq!(" test test ".replace(test, ""), " ".to_owned());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2012-02-12 08:14:49 -06:00
|
|
|
#[test]
|
|
|
|
fn test_replace_2a() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let a = "ประเ".to_owned();
|
|
|
|
let a2 = "دولة الكويتทศไทย中华".to_owned();
|
2014-02-15 15:15:03 -06:00
|
|
|
assert_eq!(data.replace(a, repl), a2);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_2b() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let b = "ะเ".to_owned();
|
|
|
|
let b2 = "ปรدولة الكويتทศไทย中华".to_owned();
|
2014-02-15 15:15:03 -06:00
|
|
|
assert_eq!(data.replace(b, repl), b2);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_2c() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let c = "中华".to_owned();
|
|
|
|
let c2 = "ประเทศไทยدولة الكويت".to_owned();
|
2014-02-15 15:15:03 -06:00
|
|
|
assert_eq!(data.replace(c, repl), c2);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_2d() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华".to_owned();
|
|
|
|
let repl = "دولة الكويت".to_owned();
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let d = "ไท华".to_owned();
|
2013-06-11 06:46:40 -05:00
|
|
|
assert_eq!(data.replace(d, repl), data);
|
2012-02-12 08:14:49 -06:00
|
|
|
}
|
|
|
|
|
2012-02-22 02:49:05 -06:00
|
|
|
#[test]
|
|
|
|
fn test_slice() {
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("ab", "abc".slice(0, 2));
|
|
|
|
assert_eq!("bc", "abc".slice(1, 3));
|
|
|
|
assert_eq!("", "abc".slice(1, 1));
|
|
|
|
assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
|
2012-07-14 00:57:48 -05:00
|
|
|
|
2013-03-21 05:58:03 -05:00
|
|
|
let data = "ประเทศไทย中华";
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("ป", data.slice(0, 3));
|
|
|
|
assert_eq!("ร", data.slice(3, 6));
|
|
|
|
assert_eq!("", data.slice(3, 3));
|
|
|
|
assert_eq!("华", data.slice(30, 33));
|
2012-07-14 00:57:48 -05:00
|
|
|
|
|
|
|
fn a_million_letter_X() -> ~str {
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
2012-09-21 20:36:32 -05:00
|
|
|
while i < 100000 {
|
2014-04-02 18:54:22 -05:00
|
|
|
rs.push_str("华华华华华华华华华华");
|
2012-09-21 20:36:32 -05:00
|
|
|
i += 1;
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
rs.into_owned()
|
2012-02-22 02:49:05 -06:00
|
|
|
}
|
2012-07-14 00:57:48 -05:00
|
|
|
fn half_a_million_letter_X() -> ~str {
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut i = 0;
|
2014-04-02 18:54:22 -05:00
|
|
|
let mut rs = StrBuf::new();
|
|
|
|
while i < 100000 {
|
|
|
|
rs.push_str("华华华华华");
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
rs.into_owned()
|
2012-02-22 02:49:05 -06:00
|
|
|
}
|
2013-03-21 05:58:03 -05:00
|
|
|
let letters = a_million_letter_X();
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(half_a_million_letter_X() ==
|
2013-06-09 09:44:58 -05:00
|
|
|
letters.slice(0u, 3u * 500000u).to_owned());
|
2012-02-22 02:49:05 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2012-02-23 06:41:10 -06:00
|
|
|
fn test_slice_2() {
|
2013-03-21 05:58:03 -05:00
|
|
|
let ss = "中华Việt Nam";
|
2012-02-23 06:41:10 -06:00
|
|
|
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("华", ss.slice(3u, 6u));
|
|
|
|
assert_eq!("Việt Nam", ss.slice(6u, 16u));
|
2012-02-22 02:49:05 -06:00
|
|
|
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("ab", "abc".slice(0u, 2u));
|
|
|
|
assert_eq!("bc", "abc".slice(1u, 3u));
|
|
|
|
assert_eq!("", "abc".slice(1u, 1u));
|
2012-02-22 02:49:05 -06:00
|
|
|
|
2013-06-09 09:44:58 -05:00
|
|
|
assert_eq!("中", ss.slice(0u, 3u));
|
|
|
|
assert_eq!("华V", ss.slice(3u, 7u));
|
|
|
|
assert_eq!("", ss.slice(3u, 3u));
|
2012-02-23 06:41:10 -06:00
|
|
|
/*0: 中
|
2012-02-22 02:49:05 -06:00
|
|
|
3: 华
|
|
|
|
6: V
|
|
|
|
7: i
|
|
|
|
8: ệ
|
|
|
|
11: t
|
|
|
|
12:
|
|
|
|
13: N
|
|
|
|
14: a
|
|
|
|
15: m */
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-02-23 06:41:10 -06:00
|
|
|
#[should_fail]
|
|
|
|
fn test_slice_fail() {
|
2013-06-09 09:44:58 -05:00
|
|
|
"中华Việt Nam".slice(0u, 2u);
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-09 22:09:51 -05:00
|
|
|
#[test]
|
|
|
|
fn test_slice_from() {
|
|
|
|
assert_eq!("abcd".slice_from(0), "abcd");
|
|
|
|
assert_eq!("abcd".slice_from(2), "cd");
|
|
|
|
assert_eq!("abcd".slice_from(4), "");
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_slice_to() {
|
|
|
|
assert_eq!("abcd".slice_to(0), "");
|
|
|
|
assert_eq!("abcd".slice_to(2), "ab");
|
|
|
|
assert_eq!("abcd".slice_to(4), "abcd");
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2012-09-05 18:39:06 -05:00
|
|
|
fn test_trim_left_chars() {
|
2013-06-11 10:32:49 -05:00
|
|
|
let v: &[char] = &[];
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
|
|
|
|
assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
|
|
|
|
assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
|
|
|
|
assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
|
2013-06-11 10:32:49 -05:00
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
|
|
|
|
assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
|
|
|
|
assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
|
2012-09-05 18:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim_right_chars() {
|
2013-06-11 10:32:49 -05:00
|
|
|
let v: &[char] = &[];
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
|
|
|
|
assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
|
|
|
|
assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
|
|
|
|
assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
|
2013-06-11 10:32:49 -05:00
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
|
|
|
|
assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
|
|
|
|
assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
|
2012-09-05 18:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim_chars() {
|
2013-06-11 10:32:49 -05:00
|
|
|
let v: &[char] = &[];
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
|
|
|
|
assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
|
|
|
|
assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
|
|
|
|
assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
|
2013-06-11 10:32:49 -05:00
|
|
|
|
2014-04-22 00:21:37 -05:00
|
|
|
assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
|
|
|
|
assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
|
|
|
|
assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
|
2012-09-05 18:39:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2012-01-17 19:28:21 -06:00
|
|
|
fn test_trim_left() {
|
2013-06-10 06:03:16 -05:00
|
|
|
assert_eq!("".trim_left(), "");
|
|
|
|
assert_eq!("a".trim_left(), "a");
|
|
|
|
assert_eq!(" ".trim_left(), "");
|
|
|
|
assert_eq!(" blah".trim_left(), "blah");
|
|
|
|
assert_eq!(" \u3000 wut".trim_left(), "wut");
|
|
|
|
assert_eq!("hey ".trim_left(), "hey ");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim_right() {
|
2013-06-10 06:03:16 -05:00
|
|
|
assert_eq!("".trim_right(), "");
|
|
|
|
assert_eq!("a".trim_right(), "a");
|
|
|
|
assert_eq!(" ".trim_right(), "");
|
|
|
|
assert_eq!("blah ".trim_right(), "blah");
|
|
|
|
assert_eq!("wut \u3000 ".trim_right(), "wut");
|
|
|
|
assert_eq!(" hey".trim_right(), " hey");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_trim() {
|
2013-06-10 06:03:16 -05:00
|
|
|
assert_eq!("".trim(), "");
|
|
|
|
assert_eq!("a".trim(), "a");
|
|
|
|
assert_eq!(" ".trim(), "");
|
|
|
|
assert_eq!(" blah ".trim(), "blah");
|
|
|
|
assert_eq!("\nwut \u3000 ".trim(), "wut");
|
|
|
|
assert_eq!(" hey dude ".trim(), "hey dude");
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_is_whitespace() {
|
2013-06-10 10:07:52 -05:00
|
|
|
assert!("".is_whitespace());
|
|
|
|
assert!(" ".is_whitespace());
|
|
|
|
assert!("\u2009".is_whitespace()); // Thin space
|
|
|
|
assert!(" \n\t ".is_whitespace());
|
|
|
|
assert!(!" _ ".is_whitespace());
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2014-03-09 16:56:33 -05:00
|
|
|
#[test]
|
|
|
|
fn test_slice_shift_char() {
|
|
|
|
let data = "ประเทศไทย中";
|
|
|
|
assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_slice_shift_char_2() {
|
|
|
|
let empty = "";
|
|
|
|
assert_eq!(empty.slice_shift_char(), (None, ""));
|
|
|
|
}
|
|
|
|
|
2013-07-30 11:39:31 -05:00
|
|
|
#[test]
|
2013-08-02 11:34:00 -05:00
|
|
|
fn test_is_utf8() {
|
2013-09-03 19:36:55 -05:00
|
|
|
// deny overlong encodings
|
2013-07-30 11:39:31 -05:00
|
|
|
assert!(!is_utf8([0xc0, 0x80]));
|
|
|
|
assert!(!is_utf8([0xc0, 0xae]));
|
|
|
|
assert!(!is_utf8([0xe0, 0x80, 0x80]));
|
|
|
|
assert!(!is_utf8([0xe0, 0x80, 0xaf]));
|
|
|
|
assert!(!is_utf8([0xe0, 0x81, 0x81]));
|
|
|
|
assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
|
2013-08-02 11:34:00 -05:00
|
|
|
assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
|
|
|
|
|
2013-09-03 19:36:55 -05:00
|
|
|
// deny surrogates
|
|
|
|
assert!(!is_utf8([0xED, 0xA0, 0x80]));
|
|
|
|
assert!(!is_utf8([0xED, 0xBF, 0xBF]));
|
|
|
|
|
2013-08-02 11:34:00 -05:00
|
|
|
assert!(is_utf8([0xC2, 0x80]));
|
|
|
|
assert!(is_utf8([0xDF, 0xBF]));
|
|
|
|
assert!(is_utf8([0xE0, 0xA0, 0x80]));
|
2013-09-03 19:36:55 -05:00
|
|
|
assert!(is_utf8([0xED, 0x9F, 0xBF]));
|
|
|
|
assert!(is_utf8([0xEE, 0x80, 0x80]));
|
2013-08-02 11:34:00 -05:00
|
|
|
assert!(is_utf8([0xEF, 0xBF, 0xBF]));
|
|
|
|
assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
|
|
|
|
assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
|
2013-07-30 11:39:31 -05:00
|
|
|
}
|
|
|
|
|
2014-02-16 06:52:14 -06:00
|
|
|
#[test]
|
|
|
|
fn test_is_utf16() {
|
|
|
|
macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
|
|
|
|
|
|
|
|
// non-surrogates
|
|
|
|
pos!([0x0000],
|
|
|
|
[0x0001, 0x0002],
|
|
|
|
[0xD7FF],
|
|
|
|
[0xE000]);
|
|
|
|
|
|
|
|
// surrogate pairs (randomly generated with Python 3's
|
|
|
|
// .encode('utf-16be'))
|
|
|
|
pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
|
|
|
|
[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
|
|
|
|
[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
|
|
|
|
|
|
|
|
// mixtures (also random)
|
|
|
|
pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
|
|
|
|
[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
|
|
|
|
[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
|
|
|
|
|
|
|
|
// negative tests
|
|
|
|
macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
|
|
|
|
|
|
|
|
neg!(
|
|
|
|
// surrogate + regular unit
|
|
|
|
[0xdb45, 0x0000],
|
|
|
|
// surrogate + lead surrogate
|
|
|
|
[0xd900, 0xd900],
|
|
|
|
// unterminated surrogate
|
|
|
|
[0xd8ff],
|
|
|
|
// trail surrogate without a lead
|
|
|
|
[0xddb7]);
|
|
|
|
|
|
|
|
// random byte sequences that Python 3's .decode('utf-16be')
|
|
|
|
// failed on
|
|
|
|
neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
|
|
|
|
[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
|
|
|
|
[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
|
|
|
|
[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
|
|
|
|
[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
|
|
|
|
[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
|
|
|
|
[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
|
|
|
|
[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
|
|
|
|
[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
|
|
|
|
[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
|
|
|
|
[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
|
|
|
|
[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
|
|
|
|
[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
|
|
|
|
[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
|
|
|
|
[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
|
|
|
|
[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
|
|
|
|
[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
|
|
|
|
[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
|
|
|
|
[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
|
|
|
|
[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
|
|
|
|
[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
|
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2013-08-04 16:08:20 -05:00
|
|
|
fn test_raw_from_c_str() {
|
2012-06-24 22:18:18 -05:00
|
|
|
unsafe {
|
2014-04-25 03:08:02 -05:00
|
|
|
let a = box [65, 65, 65, 65, 65, 65, 65, 0];
|
2013-12-15 06:35:12 -06:00
|
|
|
let b = a.as_ptr();
|
2013-08-04 16:08:20 -05:00
|
|
|
let c = raw::from_c_str(b);
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(c, "AAAAAAA".to_owned());
|
2012-06-24 22:18:18 -05:00
|
|
|
}
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-10 22:10:37 -05:00
|
|
|
#[test]
|
|
|
|
fn test_as_bytes() {
|
|
|
|
// no null
|
|
|
|
let v = [
|
|
|
|
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
|
|
|
|
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
|
|
|
|
109
|
|
|
|
];
|
|
|
|
assert_eq!("".as_bytes(), &[]);
|
|
|
|
assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
|
2014-02-28 03:23:06 -06:00
|
|
|
assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
|
2013-06-10 22:10:37 -05:00
|
|
|
}
|
|
|
|
|
2012-04-09 20:56:24 -05:00
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn test_as_bytes_fail() {
|
2013-06-10 22:10:37 -05:00
|
|
|
// Don't double free. (I'm not sure if this exercises the
|
|
|
|
// original problem code path anymore.)
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "".to_owned();
|
2013-07-03 22:02:09 -05:00
|
|
|
let _bytes = s.as_bytes();
|
2013-10-21 15:08:31 -05:00
|
|
|
fail!();
|
2012-04-09 20:56:24 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
2013-12-17 09:37:30 -06:00
|
|
|
fn test_as_ptr() {
|
|
|
|
let buf = "hello".as_ptr();
|
|
|
|
unsafe {
|
2014-02-10 15:50:42 -06:00
|
|
|
assert_eq!(*buf.offset(0), 'h' as u8);
|
|
|
|
assert_eq!(*buf.offset(1), 'e' as u8);
|
|
|
|
assert_eq!(*buf.offset(2), 'l' as u8);
|
|
|
|
assert_eq!(*buf.offset(3), 'l' as u8);
|
|
|
|
assert_eq!(*buf.offset(4), 'o' as u8);
|
2013-12-17 09:37:30 -06:00
|
|
|
}
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2013-06-30 10:29:38 -05:00
|
|
|
#[test]
|
2013-04-10 16:51:41 -05:00
|
|
|
fn test_subslice_offset() {
|
|
|
|
let a = "kernelsprite";
|
2013-06-09 09:44:58 -05:00
|
|
|
let b = a.slice(7, a.len());
|
|
|
|
let c = a.slice(0, a.len() - 6);
|
2013-06-13 10:39:06 -05:00
|
|
|
assert_eq!(a.subslice_offset(b), 7);
|
|
|
|
assert_eq!(a.subslice_offset(c), 0);
|
2013-04-10 17:48:31 -05:00
|
|
|
|
|
|
|
let string = "a\nb\nc";
|
2014-05-03 23:26:11 -05:00
|
|
|
let lines: Vec<&str> = string.lines().collect();
|
|
|
|
let lines = lines.as_slice();
|
2013-06-13 10:39:06 -05:00
|
|
|
assert_eq!(string.subslice_offset(lines[0]), 0);
|
|
|
|
assert_eq!(string.subslice_offset(lines[1]), 2);
|
|
|
|
assert_eq!(string.subslice_offset(lines[2]), 4);
|
2013-04-10 16:51:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn test_subslice_offset_2() {
|
|
|
|
let a = "alchemiter";
|
|
|
|
let b = "cruxtruder";
|
2013-06-13 10:39:06 -05:00
|
|
|
a.subslice_offset(b);
|
2013-04-10 16:51:41 -05:00
|
|
|
}
|
|
|
|
|
2012-01-17 19:28:21 -06:00
|
|
|
#[test]
|
|
|
|
fn vec_str_conversions() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s1: ~str = "All mimsy were the borogoves".to_owned();
|
2012-01-17 19:28:21 -06:00
|
|
|
|
2013-06-10 22:10:37 -05:00
|
|
|
let v: ~[u8] = s1.as_bytes().to_owned();
|
2013-12-23 10:30:49 -06:00
|
|
|
let s2: ~str = from_utf8(v).unwrap().to_owned();
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut i: uint = 0u;
|
2013-06-09 09:44:58 -05:00
|
|
|
let n1: uint = s1.len();
|
2013-06-08 20:38:47 -05:00
|
|
|
let n2: uint = v.len();
|
2013-05-18 21:02:45 -05:00
|
|
|
assert_eq!(n1, n2);
|
2012-01-17 19:28:21 -06:00
|
|
|
while i < n1 {
|
|
|
|
let a: u8 = s1[i];
|
|
|
|
let b: u8 = s2[i];
|
2013-10-21 15:08:31 -05:00
|
|
|
debug!("{}", a);
|
|
|
|
debug!("{}", b);
|
2013-05-18 21:02:45 -05:00
|
|
|
assert_eq!(a, b);
|
2012-01-17 19:28:21 -06:00
|
|
|
i += 1u;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_contains() {
|
2013-06-10 02:32:36 -05:00
|
|
|
assert!("abcde".contains("bcd"));
|
|
|
|
assert!("abcde".contains("abcd"));
|
|
|
|
assert!("abcde".contains("bcde"));
|
|
|
|
assert!("abcde".contains(""));
|
|
|
|
assert!("".contains(""));
|
|
|
|
assert!(!"abcde".contains("def"));
|
|
|
|
assert!(!"".contains("a"));
|
2012-02-12 08:14:49 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "ประเทศไทย中华Việt Nam".to_owned();
|
2013-06-10 02:32:36 -05:00
|
|
|
assert!(data.contains("ประเ"));
|
|
|
|
assert!(data.contains("ะเ"));
|
|
|
|
assert!(data.contains("中华"));
|
|
|
|
assert!(!data.contains("ไท华"));
|
2012-01-17 19:28:21 -06:00
|
|
|
}
|
|
|
|
|
2012-06-30 05:54:54 -05:00
|
|
|
#[test]
|
|
|
|
fn test_contains_char() {
|
2013-06-10 08:01:45 -05:00
|
|
|
assert!("abc".contains_char('b'));
|
|
|
|
assert!("a".contains_char('a'));
|
|
|
|
assert!(!"abc".contains_char('d'));
|
|
|
|
assert!(!"".contains_char('a'));
|
2012-06-30 05:54:54 -05:00
|
|
|
}
|
|
|
|
|
2012-01-30 22:44:48 -06:00
|
|
|
#[test]
|
2012-03-02 17:47:14 -06:00
|
|
|
fn test_utf16() {
|
|
|
|
let pairs =
|
2014-04-15 20:17:48 -05:00
|
|
|
[("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_owned(),
|
2014-05-03 23:26:11 -05:00
|
|
|
vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
|
|
|
|
0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
|
|
|
|
0xd800_u16, 0xdf30_u16, 0x000a_u16]),
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_owned(),
|
2014-05-03 23:26:11 -05:00
|
|
|
vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
|
|
|
|
0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
|
|
|
|
0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
|
|
|
|
0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
|
|
|
|
0x000a_u16]),
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_owned(),
|
2014-05-03 23:26:11 -05:00
|
|
|
vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
|
|
|
|
0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
|
|
|
|
0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
|
|
|
|
0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
|
|
|
|
0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
|
|
|
|
0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_owned(),
|
2014-05-03 23:26:11 -05:00
|
|
|
vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
|
2013-05-23 11:39:17 -05:00
|
|
|
0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
|
|
|
|
0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
|
|
|
|
0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
|
|
|
|
0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
|
|
|
|
0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
|
|
|
|
0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
|
|
|
|
0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
|
|
|
|
0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
|
|
|
|
0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
|
2014-02-16 07:57:16 -06:00
|
|
|
0x000a_u16 ]),
|
|
|
|
// Issue #12318, even-numbered non-BMP planes
|
2014-04-15 20:17:48 -05:00
|
|
|
("\U00020000".to_owned(),
|
2014-05-03 23:26:11 -05:00
|
|
|
vec![0xD840, 0xDC00])];
|
2012-03-02 17:47:14 -06:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for p in pairs.iter() {
|
2013-07-02 14:47:32 -05:00
|
|
|
let (s, u) = (*p).clone();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert!(is_utf16(u.as_slice()));
|
2014-02-16 06:52:14 -06:00
|
|
|
assert_eq!(s.to_utf16(), u);
|
2014-02-16 07:52:58 -06:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
|
|
|
|
assert_eq!(from_utf16_lossy(u.as_slice()), s);
|
2014-02-16 07:52:58 -06:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
|
|
|
|
assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
|
2012-03-02 17:47:14 -06:00
|
|
|
}
|
|
|
|
}
|
2012-03-30 00:28:26 -05:00
|
|
|
|
2014-02-16 16:57:56 -06:00
|
|
|
#[test]
|
|
|
|
fn test_utf16_invalid() {
|
|
|
|
// completely positive cases tested above.
|
|
|
|
// lead + eof
|
|
|
|
assert_eq!(from_utf16([0xD800]), None);
|
|
|
|
// lead + lead
|
|
|
|
assert_eq!(from_utf16([0xD800, 0xD800]), None);
|
|
|
|
|
|
|
|
// isolated trail
|
|
|
|
assert_eq!(from_utf16([0x0061, 0xDC00]), None);
|
|
|
|
|
|
|
|
// general
|
|
|
|
assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
|
|
|
|
}
|
|
|
|
|
2014-02-16 07:52:58 -06:00
|
|
|
#[test]
|
|
|
|
fn test_utf16_lossy() {
|
|
|
|
// completely positive cases tested above.
|
|
|
|
// lead + eof
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
// lead + lead
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
|
|
|
|
// isolated trail
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
|
|
|
|
// general
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), "\uFFFD𐒋\uFFFD".to_owned());
|
2014-02-16 07:52:58 -06:00
|
|
|
}
|
|
|
|
|
2014-02-18 05:25:32 -06:00
|
|
|
#[test]
|
|
|
|
fn test_truncate_utf16_at_nul() {
|
|
|
|
let v = [];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[]);
|
|
|
|
|
|
|
|
let v = [0, 2, 3];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[]);
|
|
|
|
|
|
|
|
let v = [1, 0, 3];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[1]);
|
|
|
|
|
|
|
|
let v = [1, 2, 0];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
|
|
|
|
|
|
|
|
let v = [1, 2, 3];
|
|
|
|
assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
|
|
|
|
}
|
|
|
|
|
2013-03-15 02:32:11 -05:00
|
|
|
#[test]
|
|
|
|
fn test_char_at() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2014-04-25 03:08:02 -05:00
|
|
|
let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
2013-03-15 02:32:11 -05:00
|
|
|
let mut pos = 0;
|
2013-08-03 11:45:23 -05:00
|
|
|
for ch in v.iter() {
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(s.char_at(pos) == *ch);
|
2013-03-15 02:32:11 -05:00
|
|
|
pos += from_char(*ch).len();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_char_at_reverse() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2014-04-25 03:08:02 -05:00
|
|
|
let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
2013-03-15 02:32:11 -05:00
|
|
|
let mut pos = s.len();
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
for ch in v.iter().rev() {
|
2013-03-28 20:39:09 -05:00
|
|
|
assert!(s.char_at_reverse(pos) == *ch);
|
2013-03-15 02:32:11 -05:00
|
|
|
pos -= from_char(*ch).len();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-05-31 17:31:13 -05:00
|
|
|
#[test]
|
|
|
|
fn test_escape_unicode() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_owned());
|
|
|
|
assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_owned());
|
|
|
|
assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_owned());
|
|
|
|
assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_owned());
|
|
|
|
assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_owned());
|
|
|
|
assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_owned());
|
|
|
|
assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_owned());
|
|
|
|
assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_owned());
|
|
|
|
assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_owned());
|
2012-05-31 17:31:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_escape_default() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".escape_default(), "abc".to_owned());
|
|
|
|
assert_eq!("a c".escape_default(), "a c".to_owned());
|
|
|
|
assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_owned());
|
|
|
|
assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_owned());
|
|
|
|
assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_owned());
|
|
|
|
assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_owned());
|
|
|
|
assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_owned());
|
|
|
|
assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_owned());
|
2012-05-31 17:31:13 -05:00
|
|
|
}
|
|
|
|
|
2013-03-01 21:07:12 -06:00
|
|
|
#[test]
|
|
|
|
fn test_total_ord() {
|
2014-05-01 00:32:13 -05:00
|
|
|
"1234".cmp(&("123")) == Greater;
|
|
|
|
"123".cmp(&("1234")) == Less;
|
|
|
|
"1234".cmp(&("1234")) == Equal;
|
|
|
|
"12345555".cmp(&("123456")) == Less;
|
|
|
|
"22".cmp(&("1234")) == Greater;
|
2013-03-01 21:07:12 -06:00
|
|
|
}
|
2013-03-28 18:37:12 -05:00
|
|
|
|
2013-07-10 13:32:59 -05:00
|
|
|
#[test]
|
|
|
|
fn test_char_range_at() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let data = "b¢€𤭢𤭢€¢b".to_owned();
|
2013-07-10 13:32:59 -05:00
|
|
|
assert_eq!('b', data.char_range_at(0).ch);
|
|
|
|
assert_eq!('¢', data.char_range_at(1).ch);
|
|
|
|
assert_eq!('€', data.char_range_at(3).ch);
|
|
|
|
assert_eq!('𤭢', data.char_range_at(6).ch);
|
|
|
|
assert_eq!('𤭢', data.char_range_at(10).ch);
|
|
|
|
assert_eq!('€', data.char_range_at(14).ch);
|
|
|
|
assert_eq!('¢', data.char_range_at(17).ch);
|
|
|
|
assert_eq!('b', data.char_range_at(19).ch);
|
|
|
|
}
|
|
|
|
|
2013-03-28 18:37:12 -05:00
|
|
|
#[test]
|
|
|
|
fn test_char_range_at_reverse_underflow() {
|
2013-06-10 06:46:36 -05:00
|
|
|
assert_eq!("abc".char_range_at_reverse(0).next, 0);
|
2013-03-28 18:37:12 -05:00
|
|
|
}
|
|
|
|
|
2013-06-15 08:17:53 -05:00
|
|
|
#[test]
|
|
|
|
fn test_add() {
|
2014-03-21 20:05:05 -05:00
|
|
|
#![allow(unnecessary_allocation)]
|
2013-06-15 08:17:53 -05:00
|
|
|
macro_rules! t (
|
2013-08-05 04:46:22 -05:00
|
|
|
($s1:expr, $s2:expr, $e:expr) => { {
|
|
|
|
let s1 = $s1;
|
|
|
|
let s2 = $s2;
|
|
|
|
let e = $e;
|
|
|
|
assert_eq!(s1 + s2, e.to_owned());
|
|
|
|
assert_eq!(s1.to_owned() + s2, e.to_owned());
|
|
|
|
} }
|
2013-06-15 08:17:53 -05:00
|
|
|
);
|
|
|
|
|
2013-08-05 04:46:22 -05:00
|
|
|
t!("foo", "bar", "foobar");
|
2014-04-15 20:17:48 -05:00
|
|
|
t!("foo", "bar".to_owned(), "foobar");
|
2013-08-05 04:46:22 -05:00
|
|
|
t!("ศไทย中", "华Việt Nam", "ศไทย中华Việt Nam");
|
2014-04-15 20:17:48 -05:00
|
|
|
t!("ศไทย中", "华Việt Nam".to_owned(), "ศไทย中华Việt Nam");
|
2013-06-15 08:17:53 -05:00
|
|
|
}
|
|
|
|
|
2013-04-18 07:50:55 -05:00
|
|
|
#[test]
|
|
|
|
fn test_iterator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2014-04-25 03:08:02 -05:00
|
|
|
let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
2013-04-18 07:50:55 -05:00
|
|
|
|
|
|
|
let mut pos = 0;
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut it = s.chars();
|
2013-06-08 07:04:46 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-06-08 07:04:46 -05:00
|
|
|
assert_eq!(c, v[pos]);
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_rev_iterator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2014-04-25 03:08:02 -05:00
|
|
|
let v = box ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
|
2013-06-08 07:04:46 -05:00
|
|
|
|
|
|
|
let mut pos = 0;
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut it = s.chars().rev();
|
2013-04-18 07:50:55 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-04-18 07:50:55 -05:00
|
|
|
assert_eq!(c, v[pos]);
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
}
|
2013-06-08 09:38:58 -05:00
|
|
|
|
2013-08-21 17:35:16 -05:00
|
|
|
#[test]
|
|
|
|
fn test_iterator_clone() {
|
|
|
|
let s = "ศไทย中华Việt Nam";
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut it = s.chars();
|
2013-08-21 17:35:16 -05:00
|
|
|
it.next();
|
|
|
|
assert!(it.zip(it.clone()).all(|(x,y)| x == y));
|
|
|
|
}
|
|
|
|
|
2013-06-08 09:38:58 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_bytesator() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-06-08 09:38:58 -05:00
|
|
|
let v = [
|
|
|
|
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
|
|
|
|
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
|
|
|
|
109
|
|
|
|
];
|
|
|
|
let mut pos = 0;
|
|
|
|
|
2013-11-23 04:18:51 -06:00
|
|
|
for b in s.bytes() {
|
2013-06-08 09:38:58 -05:00
|
|
|
assert_eq!(b, v[pos]);
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_bytes_revator() {
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "ศไทย中华Việt Nam".to_owned();
|
2013-06-08 09:38:58 -05:00
|
|
|
let v = [
|
|
|
|
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
|
|
|
|
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
|
|
|
|
109
|
|
|
|
];
|
|
|
|
let mut pos = v.len();
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
for b in s.bytes().rev() {
|
2013-06-08 09:38:58 -05:00
|
|
|
pos -= 1;
|
|
|
|
assert_eq!(b, v[pos]);
|
|
|
|
}
|
|
|
|
}
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2013-07-27 16:38:38 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_char_indicesator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2013-07-27 16:38:38 -05:00
|
|
|
let s = "ศไทย中华Việt Nam";
|
|
|
|
let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
|
|
|
|
let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
|
|
|
|
|
|
|
|
let mut pos = 0;
|
2013-11-23 04:18:51 -06:00
|
|
|
let mut it = s.char_indices();
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-07-27 16:38:38 -05:00
|
|
|
assert_eq!(c, (p[pos], v[pos]));
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
assert_eq!(pos, p.len());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_char_indices_revator() {
|
2013-09-08 10:01:16 -05:00
|
|
|
use iter::*;
|
2013-07-27 16:38:38 -05:00
|
|
|
let s = "ศไทย中华Việt Nam";
|
|
|
|
let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
|
|
|
|
let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
|
|
|
|
|
|
|
|
let mut pos = 0;
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
let mut it = s.char_indices().rev();
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-08-03 11:45:23 -05:00
|
|
|
for c in it {
|
2013-07-27 16:38:38 -05:00
|
|
|
assert_eq!(c, (p[pos], v[pos]));
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
assert_eq!(pos, v.len());
|
|
|
|
assert_eq!(pos, p.len());
|
|
|
|
}
|
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
|
|
|
fn test_split_char_iterator() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.split(' ').collect();
|
|
|
|
assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
|
|
|
|
assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
|
|
|
// Unicode
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.split('ä').collect();
|
|
|
|
assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
|
|
|
|
assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
rsplit.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
2013-07-27 16:38:38 -05:00
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
|
|
|
fn test_splitn_char_iterator() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.splitn(' ', 3).collect();
|
|
|
|
assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
|
|
|
|
assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
|
|
|
// Unicode
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.splitn('ä', 3).collect();
|
|
|
|
assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
|
|
|
|
assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
|
|
|
|
2013-08-25 01:54:47 -05:00
|
|
|
#[test]
|
|
|
|
fn test_rsplitn_char_iterator() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
|
|
|
// Unicode
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
}
|
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
|
|
|
fn test_split_char_iterator_no_trailing() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.split('\n').collect();
|
|
|
|
assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
|
2013-08-25 01:54:47 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let split: Vec<&str> = data.split_terminator('\n').collect();
|
|
|
|
assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
|
2013-08-25 01:54:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_rev_split_char_iterator_no_trailing() {
|
|
|
|
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
|
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut split: Vec<&str> = data.split('\n').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
2014-05-03 23:26:11 -05:00
|
|
|
let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
|
2013-08-25 01:54:47 -05:00
|
|
|
split.reverse();
|
2014-05-03 23:26:11 -05:00
|
|
|
assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_words() {
|
2013-06-09 08:10:50 -05:00
|
|
|
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
|
2014-05-03 23:26:11 -05:00
|
|
|
let words: Vec<&str> = data.words().collect();
|
|
|
|
assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
|
|
|
|
2013-08-10 20:36:38 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_nfd_chars() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".nfd_chars().collect::<~str>(), "abc".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<~str>(), "d\u0307\u01c4".to_owned());
|
|
|
|
assert_eq!("\u2026".nfd_chars().collect::<~str>(), "\u2026".to_owned());
|
|
|
|
assert_eq!("\u2126".nfd_chars().collect::<~str>(), "\u03a9".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u0323".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("\u1e0d\u0307".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("a\u0301".nfd_chars().collect::<~str>(), "a\u0301".to_owned());
|
|
|
|
assert_eq!("\u0301a".nfd_chars().collect::<~str>(), "\u0301a".to_owned());
|
|
|
|
assert_eq!("\ud4db".nfd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
|
|
|
|
assert_eq!("\uac1c".nfd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_nfkd_chars() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!("abc".nfkd_chars().collect::<~str>(), "abc".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<~str>(), "d\u0307DZ\u030c".to_owned());
|
|
|
|
assert_eq!("\u2026".nfkd_chars().collect::<~str>(), "...".to_owned());
|
|
|
|
assert_eq!("\u2126".nfkd_chars().collect::<~str>(), "\u03a9".to_owned());
|
|
|
|
assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
|
|
|
|
assert_eq!("a\u0301".nfkd_chars().collect::<~str>(), "a\u0301".to_owned());
|
|
|
|
assert_eq!("\u0301a".nfkd_chars().collect::<~str>(), "\u0301a".to_owned());
|
|
|
|
assert_eq!("\ud4db".nfkd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
|
|
|
|
assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
|
2013-08-10 20:36:38 -05:00
|
|
|
}
|
|
|
|
|
2013-06-09 08:10:50 -05:00
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_lines() {
|
2013-06-09 08:10:50 -05:00
|
|
|
let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
|
2014-05-03 23:26:11 -05:00
|
|
|
let lines: Vec<&str> = data.lines().collect();
|
|
|
|
assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
|
|
|
|
let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
|
2014-05-03 23:26:11 -05:00
|
|
|
let lines: Vec<&str> = data.lines().collect();
|
|
|
|
assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
|
2013-06-09 08:10:50 -05:00
|
|
|
}
|
2013-06-09 21:46:35 -05:00
|
|
|
|
|
|
|
#[test]
|
2013-11-23 04:18:51 -06:00
|
|
|
fn test_split_strator() {
|
2014-05-03 23:26:11 -05:00
|
|
|
fn t(s: &str, sep: &str, u: &[&str]) {
|
|
|
|
let v: Vec<&str> = s.split_str(sep).collect();
|
|
|
|
assert_eq!(v.as_slice(), u.as_slice());
|
2013-06-09 21:46:35 -05:00
|
|
|
}
|
2014-05-03 23:26:11 -05:00
|
|
|
t("--1233345--", "12345", ["--1233345--"]);
|
|
|
|
t("abc::hello::there", "::", ["abc", "hello", "there"]);
|
|
|
|
t("::hello::there", "::", ["", "hello", "there"]);
|
|
|
|
t("hello::there::", "::", ["hello", "there", ""]);
|
|
|
|
t("::hello::there::", "::", ["", "hello", "there", ""]);
|
|
|
|
t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
|
|
|
|
t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
|
|
|
|
t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
|
|
|
|
t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
|
|
|
|
t("", ".", [""]);
|
|
|
|
t("zz", "zz", ["",""]);
|
|
|
|
t("ok", "z", ["ok"]);
|
|
|
|
t("zzz", "zz", ["","z"]);
|
|
|
|
t("zzzzz", "zz", ["","","z"]);
|
2013-06-09 21:46:35 -05:00
|
|
|
}
|
2013-06-17 02:05:51 -05:00
|
|
|
|
|
|
|
#[test]
|
2013-08-10 08:38:00 -05:00
|
|
|
fn test_str_default() {
|
|
|
|
use default::Default;
|
|
|
|
fn t<S: Default + Str>() {
|
|
|
|
let s: S = Default::default();
|
2013-06-17 02:05:51 -05:00
|
|
|
assert_eq!(s.as_slice(), "");
|
|
|
|
}
|
|
|
|
|
|
|
|
t::<&str>();
|
|
|
|
t::<~str>();
|
|
|
|
}
|
2013-07-20 12:28:38 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_str_container() {
|
|
|
|
fn sum_len<S: Container>(v: &[S]) -> uint {
|
2013-08-09 22:09:47 -05:00
|
|
|
v.iter().map(|x| x.len()).sum()
|
2013-07-20 12:28:38 -05:00
|
|
|
}
|
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let s = "01234".to_owned();
|
2013-07-20 12:28:38 -05:00
|
|
|
assert_eq!(5, sum_len(["012", "", "34"]));
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(5, sum_len(["01".to_owned(), "2".to_owned(), "34".to_owned(), "".to_owned()]));
|
2013-07-20 12:28:38 -05:00
|
|
|
assert_eq!(5, sum_len([s.as_slice()]));
|
|
|
|
}
|
2013-08-24 00:05:35 -05:00
|
|
|
|
2013-08-25 19:07:29 -05:00
|
|
|
#[test]
|
2013-12-01 07:33:04 -06:00
|
|
|
fn test_str_from_utf8() {
|
2013-08-25 19:07:29 -05:00
|
|
|
let xs = bytes!("hello");
|
2013-12-23 10:30:49 -06:00
|
|
|
assert_eq!(from_utf8(xs), Some("hello"));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("ศไทย中华Việt Nam");
|
2013-12-23 10:30:49 -06:00
|
|
|
assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("hello", 0xff);
|
2013-12-23 10:30:49 -06:00
|
|
|
assert_eq!(from_utf8(xs), None);
|
2013-08-25 19:07:29 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2013-09-05 07:17:24 -05:00
|
|
|
fn test_str_from_utf8_owned() {
|
2013-08-25 19:07:29 -05:00
|
|
|
let xs = bytes!("hello").to_owned();
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_owned(xs), Some("hello".to_owned()));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("ศไทย中华Việt Nam").to_owned();
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_owned(xs), Some("ศไทย中华Việt Nam".to_owned()));
|
2013-08-25 19:07:29 -05:00
|
|
|
|
|
|
|
let xs = bytes!("hello", 0xff).to_owned();
|
2013-12-23 10:45:01 -06:00
|
|
|
assert_eq!(from_utf8_owned(xs), None);
|
2013-08-25 19:07:29 -05:00
|
|
|
}
|
2013-09-14 12:37:45 -05:00
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
#[test]
|
|
|
|
fn test_str_from_utf8_lossy() {
|
|
|
|
let xs = bytes!("hello");
|
2014-02-07 16:58:37 -06:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Slice("hello"));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!("ศไทย中华Việt Nam");
|
2014-02-07 16:58:37 -06:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
|
|
|
|
foo\U00010000bar".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
|
|
|
|
// surrogates
|
|
|
|
let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
|
|
|
|
\uFFFD\uFFFD\uFFFDbar".to_owned()));
|
2014-02-06 01:56:27 -06:00
|
|
|
}
|
|
|
|
|
2013-10-02 08:37:59 -05:00
|
|
|
#[test]
|
|
|
|
fn test_from_str() {
|
2014-05-01 00:32:13 -05:00
|
|
|
let owned: Option<~str> = from_str("string");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(owned, Some("string".to_owned()));
|
2013-10-02 08:37:59 -05:00
|
|
|
}
|
2014-02-07 18:36:59 -06:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_traits() {
|
|
|
|
let s = Slice("abcde");
|
|
|
|
assert_eq!(s.len(), 5);
|
|
|
|
assert_eq!(s.as_slice(), "abcde");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(s.to_str(), "abcde".to_owned());
|
|
|
|
assert_eq!(format!("{}", s), "abcde".to_owned());
|
|
|
|
assert!(s.lt(&Owned("bcdef".to_owned())));
|
2014-02-07 18:36:59 -06:00
|
|
|
assert_eq!(Slice(""), Default::default());
|
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let o = Owned("abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert_eq!(o.len(), 5);
|
|
|
|
assert_eq!(o.as_slice(), "abcde");
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(o.to_str(), "abcde".to_owned());
|
|
|
|
assert_eq!(format!("{}", o), "abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(o.lt(&Slice("bcdef")));
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Owned("".to_owned()), Default::default());
|
2014-02-07 18:36:59 -06:00
|
|
|
|
2014-02-28 03:23:06 -06:00
|
|
|
assert!(s.cmp(&o) == Equal);
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(s.equiv(&o));
|
|
|
|
|
2014-02-28 03:23:06 -06:00
|
|
|
assert!(o.cmp(&s) == Equal);
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(o.equiv(&s));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_methods() {
|
|
|
|
let s = Slice("abcde");
|
|
|
|
assert!(s.is_slice());
|
|
|
|
assert!(!s.is_owned());
|
|
|
|
|
2014-04-15 20:17:48 -05:00
|
|
|
let o = Owned("abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert!(!o.is_slice());
|
|
|
|
assert!(o.is_owned());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_clone() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Owned("abcde".to_owned()), Slice("abcde").clone());
|
|
|
|
assert_eq!(Owned("abcde".to_owned()), Owned("abcde".to_owned()).clone());
|
2014-02-07 18:36:59 -06:00
|
|
|
assert_eq!(Slice("abcde"), Slice("abcde").clone());
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Slice("abcde"), Owned("abcde".to_owned()).clone());
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_maybe_owned_into_owned() {
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(Slice("abcde").into_owned(), "abcde".to_owned());
|
|
|
|
assert_eq!(Owned("abcde".to_owned()).into_owned(), "abcde".to_owned());
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_into_maybe_owned() {
|
|
|
|
assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
|
2014-04-15 20:17:48 -05:00
|
|
|
assert_eq!(("abcde".to_owned()).into_maybe_owned(), Slice("abcde"));
|
|
|
|
assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_owned()));
|
|
|
|
assert_eq!(("abcde".to_owned()).into_maybe_owned(), Owned("abcde".to_owned()));
|
2014-02-07 18:36:59 -06:00
|
|
|
}
|
2012-01-23 02:36:58 -06:00
|
|
|
}
|
2013-07-22 12:52:38 -05:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod bench {
|
2014-02-13 19:49:11 -06:00
|
|
|
extern crate test;
|
2014-03-31 20:16:35 -05:00
|
|
|
use self::test::Bencher;
|
2013-08-10 12:32:05 -05:00
|
|
|
use super::*;
|
2013-08-18 06:57:34 -05:00
|
|
|
use prelude::*;
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_iterator(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.chars().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_iterator_ascii(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb
|
|
|
|
Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.chars().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_iterator_rev(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
b.iter(|| assert_eq!(s.chars().rev().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_indicesator(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.char_indices().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn char_indicesator_rev(b: &mut Bencher) {
|
2013-08-18 06:57:34 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let len = s.char_len();
|
|
|
|
|
Deprecate the rev_iter pattern in all places where a DoubleEndedIterator is provided (everywhere but treemap)
This commit deprecates rev_iter, mut_rev_iter, move_rev_iter everywhere (except treemap) and also
deprecates related functions like rsplit, rev_components, and rev_str_components. In every case,
these functions can be replaced with the non-reversed form followed by a call to .rev(). To make this
more concrete, a translation table for all functional changes necessary follows:
* container.rev_iter() -> container.iter().rev()
* container.mut_rev_iter() -> container.mut_iter().rev()
* container.move_rev_iter() -> container.move_iter().rev()
* sliceorstr.rsplit(sep) -> sliceorstr.split(sep).rev()
* path.rev_components() -> path.components().rev()
* path.rev_str_components() -> path.str_components().rev()
In terms of the type system, this change also deprecates any specialized reversed iterator types (except
in treemap), opting instead to use Rev directly if any type annotations are needed. However, since
methods directly returning reversed iterators are now discouraged, the need for such annotations should
be small. However, in those cases, the general pattern for conversion is to take whatever follows Rev in
the original reversed name and surround it with Rev<>:
* RevComponents<'a> -> Rev<Components<'a>>
* RevStrComponents<'a> -> Rev<StrComponents<'a>>
* RevItems<'a, T> -> Rev<Items<'a, T>>
* etc.
The reasoning behind this change is that it makes the standard API much simpler without reducing readability,
performance, or power. The presence of functions such as rev_iter adds more boilerplate code to libraries
(all of which simply call .iter().rev()), clutters up the documentation, and only helps code by saving two
characters. Additionally, the numerous type synonyms that were used to make the type signatures look nice
like RevItems add even more boilerplate and clutter up the docs even more. With this change, all that cruft
goes away.
[breaking-change]
2014-04-20 23:59:12 -05:00
|
|
|
b.iter(|| assert_eq!(s.char_indices().rev().len(), len));
|
2013-08-18 06:57:34 -05:00
|
|
|
}
|
2013-07-22 12:52:38 -05:00
|
|
|
|
2013-08-26 04:48:48 -05:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_unicode_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split('V').len(), 3));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_unicode_not_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
struct NotAscii(char);
|
|
|
|
impl CharEq for NotAscii {
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool {
|
2013-11-01 20:06:31 -05:00
|
|
|
let NotAscii(cc) = *self;
|
|
|
|
cc == c
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
fn only_ascii(&self) -> bool { false }
|
|
|
|
}
|
|
|
|
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(' ').len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_not_ascii(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
struct NotAscii(char);
|
|
|
|
impl CharEq for NotAscii {
|
|
|
|
#[inline]
|
2014-04-22 00:21:37 -05:00
|
|
|
fn matches(&mut self, c: char) -> bool {
|
2013-11-01 20:06:31 -05:00
|
|
|
let NotAscii(cc) = *self;
|
|
|
|
cc == c
|
|
|
|
}
|
2013-08-26 04:48:48 -05:00
|
|
|
fn only_ascii(&self) -> bool { false }
|
|
|
|
}
|
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_extern_fn(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
fn pred(c: char) -> bool { c == ' ' }
|
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(pred).len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_closure(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn split_slice(b: &mut Bencher) {
|
2013-08-26 04:48:48 -05:00
|
|
|
let s = "Mary had a little lamb, Little lamb, little-lamb.";
|
2013-11-23 04:18:51 -06:00
|
|
|
let len = s.split(' ').len();
|
2013-08-26 04:48:48 -05:00
|
|
|
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| assert_eq!(s.split(&[' ']).len(), len));
|
2013-08-26 04:48:48 -05:00
|
|
|
}
|
|
|
|
|
2013-07-22 12:52:38 -05:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn is_utf8_100_ascii(b: &mut Bencher) {
|
2013-07-22 12:52:38 -05:00
|
|
|
|
|
|
|
let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
|
|
|
|
Lorem ipsum dolor sit amet, consectetur. ");
|
|
|
|
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-12 09:39:21 -06:00
|
|
|
is_utf8(s)
|
2013-11-21 19:23:21 -06:00
|
|
|
});
|
2013-07-22 12:52:38 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn is_utf8_100_multibyte(b: &mut Bencher) {
|
2013-07-22 12:52:38 -05:00
|
|
|
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-12 09:39:21 -06:00
|
|
|
is_utf8(s)
|
2013-11-21 19:23:21 -06:00
|
|
|
});
|
2013-07-22 12:52:38 -05:00
|
|
|
}
|
|
|
|
|
2014-02-06 01:56:27 -06:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
|
2014-02-06 01:56:27 -06:00
|
|
|
let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
|
|
|
|
Lorem ipsum dolor sit amet, consectetur. ");
|
|
|
|
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-06 01:56:27 -06:00
|
|
|
let _ = from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
|
2014-02-06 01:56:27 -06:00
|
|
|
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
|
|
|
|
assert_eq!(100, s.len());
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-06 01:56:27 -06:00
|
|
|
let _ = from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_invalid(b: &mut Bencher) {
|
2014-02-06 01:56:27 -06:00
|
|
|
let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-02-06 01:56:27 -06:00
|
|
|
let _ = from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
|
2014-04-17 17:28:14 -05:00
|
|
|
let s = Vec::from_elem(100, 0xF5u8);
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2014-04-17 17:28:14 -05:00
|
|
|
let _ = from_utf8_lossy(s.as_slice());
|
2014-02-06 01:56:27 -06:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2013-09-10 19:16:11 -05:00
|
|
|
#[bench]
|
2014-03-31 20:16:35 -05:00
|
|
|
fn bench_connect(b: &mut Bencher) {
|
2013-09-10 19:16:11 -05:00
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
let sep = "→";
|
|
|
|
let v = [s, s, s, s, s, s, s, s, s, s];
|
2014-03-31 20:16:35 -05:00
|
|
|
b.iter(|| {
|
2013-09-10 19:16:11 -05:00
|
|
|
assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
|
2013-11-20 16:17:12 -06:00
|
|
|
})
|
2013-09-10 19:16:11 -05:00
|
|
|
}
|
2013-07-22 12:52:38 -05:00
|
|
|
}
|