2014-04-02 18:54:22 -05:00
|
|
|
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
2014-07-14 22:46:04 -05:00
|
|
|
//
|
|
|
|
// ignore-lexer-test FIXME #15679
|
2014-04-02 18:54:22 -05:00
|
|
|
|
|
|
|
//! An owned, growable string that enforces that its contents are valid UTF-8.
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#![stable(feature = "rust1", since = "1.0.0")]
|
2014-12-28 12:29:56 -06:00
|
|
|
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
use core::prelude::*;
|
|
|
|
|
|
|
|
use core::default::Default;
|
2015-01-20 17:45:07 -06:00
|
|
|
use core::error::Error;
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
use core::fmt;
|
2014-12-12 20:43:07 -06:00
|
|
|
use core::hash;
|
2015-02-18 09:04:30 -06:00
|
|
|
use core::iter::{IntoIterator, FromIterator};
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
use core::mem;
|
2015-01-03 22:43:24 -06:00
|
|
|
use core::ops::{self, Deref, Add, Index};
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
use core::ptr;
|
2014-08-18 10:29:44 -05:00
|
|
|
use core::raw::Slice as RawSlice;
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
use unicode::str as unicode_str;
|
|
|
|
use unicode::str::Utf16Item;
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
|
2015-02-12 01:16:32 -06:00
|
|
|
use borrow::{Cow, IntoCow};
|
2015-01-03 21:42:21 -06:00
|
|
|
use str::{self, CharRange, FromStr, Utf8Error};
|
2014-08-23 19:26:53 -05:00
|
|
|
use vec::{DerefVec, Vec, as_vec};
|
2014-04-02 18:54:22 -05:00
|
|
|
|
2014-04-10 05:55:34 -05:00
|
|
|
/// A growable string stored as a UTF-8 encoded buffer.
|
2015-01-03 21:54:18 -06:00
|
|
|
#[derive(Clone, PartialOrd, Eq, Ord)]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-05-22 18:57:53 -05:00
|
|
|
pub struct String {
|
2014-04-02 18:54:22 -05:00
|
|
|
vec: Vec<u8>,
|
|
|
|
}
|
|
|
|
|
2014-12-28 12:29:56 -06:00
|
|
|
/// A possible error value from the `String::from_utf8` function.
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-28 07:34:18 -06:00
|
|
|
#[derive(Debug)]
|
2014-12-28 12:29:56 -06:00
|
|
|
pub struct FromUtf8Error {
|
|
|
|
bytes: Vec<u8>,
|
|
|
|
error: Utf8Error,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A possible error value from the `String::from_utf16` function.
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-28 07:34:18 -06:00
|
|
|
#[derive(Debug)]
|
2014-12-28 12:29:56 -06:00
|
|
|
pub struct FromUtf16Error(());
|
|
|
|
|
2014-05-22 18:57:53 -05:00
|
|
|
impl String {
|
2014-04-20 23:49:39 -05:00
|
|
|
/// Creates a new string buffer initialized with the empty string.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::new();
|
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-05-22 18:57:53 -05:00
|
|
|
pub fn new() -> String {
|
|
|
|
String {
|
2014-04-02 18:54:22 -05:00
|
|
|
vec: Vec::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Creates a new string buffer with the given capacity.
|
2014-07-27 05:40:39 -05:00
|
|
|
/// The string will be able to hold exactly `capacity` bytes without
|
|
|
|
/// reallocating. If `capacity` is 0, the string will not allocate.
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::with_capacity(10);
|
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn with_capacity(capacity: usize) -> String {
|
2014-05-22 18:57:53 -05:00
|
|
|
String {
|
2014-04-02 18:54:22 -05:00
|
|
|
vec: Vec::with_capacity(capacity),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Creates a new string buffer from the given string.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let s = String::from_str("hello");
|
|
|
|
/// assert_eq!(s.as_slice(), "hello");
|
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-22 20:22:03 -06:00
|
|
|
#[unstable(feature = "collections",
|
2015-01-12 20:40:19 -06:00
|
|
|
reason = "needs investigation to see if to_string() can match perf")]
|
2014-05-22 18:57:53 -05:00
|
|
|
pub fn from_str(string: &str) -> String {
|
2015-01-02 08:12:27 -06:00
|
|
|
String { vec: ::slice::SliceExt::to_vec(string.as_bytes()) }
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
2014-05-14 18:55:24 -05:00
|
|
|
/// Returns the vector as a string buffer, if possible, taking care not to
|
|
|
|
/// copy it.
|
|
|
|
///
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
/// # Failure
|
|
|
|
///
|
|
|
|
/// If the given vector is not valid UTF-8, then the original vector and the
|
|
|
|
/// corresponding error is returned.
|
2014-06-30 09:41:30 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-06-30 09:41:30 -05:00
|
|
|
///
|
|
|
|
/// ```rust
|
2014-12-10 21:46:38 -06:00
|
|
|
/// use std::str::Utf8Error;
|
|
|
|
///
|
2014-06-30 09:41:30 -05:00
|
|
|
/// let hello_vec = vec![104, 101, 108, 108, 111];
|
2014-12-28 12:29:56 -06:00
|
|
|
/// let s = String::from_utf8(hello_vec).unwrap();
|
|
|
|
/// assert_eq!(s, "hello");
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// let invalid_vec = vec![240, 144, 128];
|
2014-12-28 12:29:56 -06:00
|
|
|
/// let s = String::from_utf8(invalid_vec).err().unwrap();
|
|
|
|
/// assert_eq!(s.utf8_error(), Utf8Error::TooShort);
|
2015-02-24 12:15:45 -06:00
|
|
|
/// assert_eq!(s.into_bytes(), [240, 144, 128]);
|
2014-06-30 09:41:30 -05:00
|
|
|
/// ```
|
2014-04-10 05:55:34 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-12-28 12:29:56 -06:00
|
|
|
pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
|
2015-02-01 20:53:25 -06:00
|
|
|
match str::from_utf8(&vec) {
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
Ok(..) => Ok(String { vec: vec }),
|
2014-12-28 12:29:56 -06:00
|
|
|
Err(e) => Err(FromUtf8Error { bytes: vec, error: e })
|
2014-04-10 05:55:34 -05:00
|
|
|
}
|
|
|
|
}
|
2014-07-10 11:21:16 -05:00
|
|
|
|
2014-08-04 05:48:39 -05:00
|
|
|
/// Converts a vector of bytes to a new UTF-8 string.
|
|
|
|
/// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
|
2014-07-10 11:21:16 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-10 11:21:16 -05:00
|
|
|
///
|
|
|
|
/// ```rust
|
|
|
|
/// let input = b"Hello \xF0\x90\x80World";
|
2014-07-04 15:38:13 -05:00
|
|
|
/// let output = String::from_utf8_lossy(input);
|
2014-12-09 16:08:10 -06:00
|
|
|
/// assert_eq!(output.as_slice(), "Hello \u{FFFD}World");
|
2014-07-10 11:21:16 -05:00
|
|
|
/// ```
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-12 01:16:32 -06:00
|
|
|
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
|
2015-01-01 00:22:43 -06:00
|
|
|
let mut i = 0;
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
match str::from_utf8(v) {
|
|
|
|
Ok(s) => return Cow::Borrowed(s),
|
2015-01-01 00:22:43 -06:00
|
|
|
Err(e) => {
|
|
|
|
if let Utf8Error::InvalidByte(firstbad) = e {
|
|
|
|
i = firstbad;
|
|
|
|
}
|
|
|
|
}
|
2014-07-10 11:21:16 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
static TAG_CONT_U8: u8 = 128u8;
|
|
|
|
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
|
|
|
|
let total = v.len();
|
2015-02-04 20:17:19 -06:00
|
|
|
fn unsafe_get(xs: &[u8], i: usize) -> u8 {
|
2014-12-30 12:51:18 -06:00
|
|
|
unsafe { *xs.get_unchecked(i) }
|
2014-07-10 11:21:16 -05:00
|
|
|
}
|
2015-02-04 20:17:19 -06:00
|
|
|
fn safe_get(xs: &[u8], i: usize, total: usize) -> u8 {
|
2014-07-10 11:21:16 -05:00
|
|
|
if i >= total {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
unsafe_get(xs, i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut res = String::with_capacity(total);
|
|
|
|
|
|
|
|
if i > 0 {
|
|
|
|
unsafe {
|
2015-01-12 15:59:18 -06:00
|
|
|
res.as_mut_vec().push_all(&v[..i])
|
2014-07-10 11:21:16 -05:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
// subseqidx is the index of the first byte of the subsequence we're looking at.
|
|
|
|
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
|
|
|
|
// them one by one.
|
2015-01-01 00:22:43 -06:00
|
|
|
let mut subseqidx = i;
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
while i < total {
|
|
|
|
let i_ = i;
|
|
|
|
let byte = unsafe_get(v, i);
|
|
|
|
i += 1;
|
|
|
|
|
2015-01-02 16:44:21 -06:00
|
|
|
macro_rules! error { () => ({
|
2014-07-10 11:21:16 -05:00
|
|
|
unsafe {
|
|
|
|
if subseqidx != i_ {
|
2015-01-07 10:58:31 -06:00
|
|
|
res.as_mut_vec().push_all(&v[subseqidx..i_]);
|
2014-07-10 11:21:16 -05:00
|
|
|
}
|
|
|
|
subseqidx = i;
|
2014-09-22 10:28:35 -05:00
|
|
|
res.as_mut_vec().push_all(REPLACEMENT);
|
2014-07-10 11:21:16 -05:00
|
|
|
}
|
2015-01-02 16:44:21 -06:00
|
|
|
})}
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
if byte < 128u8 {
|
|
|
|
// subseqidx handles this
|
|
|
|
} else {
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
let w = unicode_str::utf8_char_width(byte);
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
match w {
|
|
|
|
2 => {
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
3 => {
|
|
|
|
match (byte, safe_get(v, i, total)) {
|
2014-09-26 23:13:20 -05:00
|
|
|
(0xE0 , 0xA0 ... 0xBF) => (),
|
|
|
|
(0xE1 ... 0xEC, 0x80 ... 0xBF) => (),
|
|
|
|
(0xED , 0x80 ... 0x9F) => (),
|
|
|
|
(0xEE ... 0xEF, 0x80 ... 0xBF) => (),
|
2014-07-10 11:21:16 -05:00
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
4 => {
|
|
|
|
match (byte, safe_get(v, i, total)) {
|
2014-09-26 23:13:20 -05:00
|
|
|
(0xF0 , 0x90 ... 0xBF) => (),
|
|
|
|
(0xF1 ... 0xF3, 0x80 ... 0xBF) => (),
|
|
|
|
(0xF4 , 0x80 ... 0x8F) => (),
|
2014-07-10 11:21:16 -05:00
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
error!();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if subseqidx < total {
|
|
|
|
unsafe {
|
2015-01-07 10:58:31 -06:00
|
|
|
res.as_mut_vec().push_all(&v[subseqidx..total])
|
2014-07-10 11:21:16 -05:00
|
|
|
};
|
|
|
|
}
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
Cow::Owned(res)
|
2014-07-10 11:21:16 -05:00
|
|
|
}
|
|
|
|
|
2014-07-04 15:38:13 -05:00
|
|
|
/// Decode a UTF-16 encoded vector `v` into a `String`, returning `None`
|
2014-07-10 10:43:03 -05:00
|
|
|
/// if `v` contains any invalid data.
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-10 10:43:03 -05:00
|
|
|
///
|
|
|
|
/// ```rust
|
2014-07-04 15:38:13 -05:00
|
|
|
/// // 𝄞music
|
2014-11-17 02:39:01 -06:00
|
|
|
/// let mut v = &mut [0xD834, 0xDD1E, 0x006d, 0x0075,
|
|
|
|
/// 0x0073, 0x0069, 0x0063];
|
2014-12-28 12:29:56 -06:00
|
|
|
/// assert_eq!(String::from_utf16(v).unwrap(),
|
|
|
|
/// "𝄞music".to_string());
|
2014-07-10 10:43:03 -05:00
|
|
|
///
|
2014-07-04 15:38:13 -05:00
|
|
|
/// // 𝄞mu<invalid>ic
|
2014-07-10 10:43:03 -05:00
|
|
|
/// v[4] = 0xD800;
|
2014-12-28 12:29:56 -06:00
|
|
|
/// assert!(String::from_utf16(v).is_err());
|
2014-07-10 10:43:03 -05:00
|
|
|
/// ```
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-12-28 12:29:56 -06:00
|
|
|
pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> {
|
2014-10-03 15:20:04 -05:00
|
|
|
let mut s = String::with_capacity(v.len());
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
for c in unicode_str::utf16_items(v) {
|
2014-07-10 10:43:03 -05:00
|
|
|
match c {
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
Utf16Item::ScalarValue(c) => s.push(c),
|
2014-12-28 12:29:56 -06:00
|
|
|
Utf16Item::LoneSurrogate(_) => return Err(FromUtf16Error(())),
|
2014-07-10 10:43:03 -05:00
|
|
|
}
|
|
|
|
}
|
2014-12-28 12:29:56 -06:00
|
|
|
Ok(s)
|
2014-07-10 10:43:03 -05:00
|
|
|
}
|
2014-07-10 11:21:16 -05:00
|
|
|
|
2014-07-10 10:53:51 -05:00
|
|
|
/// Decode a UTF-16 encoded vector `v` into a string, replacing
|
|
|
|
/// invalid data with the replacement character (U+FFFD).
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
|
|
|
///
|
2014-07-10 10:53:51 -05:00
|
|
|
/// ```rust
|
2014-07-04 15:38:13 -05:00
|
|
|
/// // 𝄞mus<invalid>ic<invalid>
|
2014-11-17 02:39:01 -06:00
|
|
|
/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
|
|
|
|
/// 0x0073, 0xDD1E, 0x0069, 0x0063,
|
|
|
|
/// 0xD834];
|
2014-07-10 10:53:51 -05:00
|
|
|
///
|
|
|
|
/// assert_eq!(String::from_utf16_lossy(v),
|
2014-12-09 16:08:10 -06:00
|
|
|
/// "𝄞mus\u{FFFD}ic\u{FFFD}".to_string());
|
2014-07-10 10:53:51 -05:00
|
|
|
/// ```
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-07-10 10:53:51 -05:00
|
|
|
pub fn from_utf16_lossy(v: &[u16]) -> String {
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
unicode_str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
|
2014-07-10 10:53:51 -05:00
|
|
|
}
|
2014-07-04 15:18:11 -05:00
|
|
|
|
2014-11-20 12:11:15 -06:00
|
|
|
/// Creates a new `String` from a length, capacity, and pointer.
|
|
|
|
///
|
|
|
|
/// This is unsafe because:
|
|
|
|
/// * We call `Vec::from_raw_parts` to get a `Vec<u8>`;
|
|
|
|
/// * We assume that the `Vec` contains valid UTF-8.
|
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String {
|
2014-11-20 12:11:15 -06:00
|
|
|
String {
|
|
|
|
vec: Vec::from_raw_parts(buf, length, capacity),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Converts a vector of bytes to a new `String` without checking if
|
|
|
|
/// it contains valid UTF-8. This is unsafe because it assumes that
|
|
|
|
/// the UTF-8-ness of the vector has already been validated.
|
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-11-20 12:11:15 -06:00
|
|
|
pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
|
|
|
|
String { vec: bytes }
|
|
|
|
}
|
|
|
|
|
2014-04-10 05:55:34 -05:00
|
|
|
/// Return the underlying byte buffer, encoded as UTF-8.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let s = String::from_str("hello");
|
|
|
|
/// let bytes = s.into_bytes();
|
2015-02-24 12:15:45 -06:00
|
|
|
/// assert_eq!(bytes, [104, 101, 108, 108, 111]);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// ```
|
2014-04-10 05:55:34 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-04-10 05:55:34 -05:00
|
|
|
pub fn into_bytes(self) -> Vec<u8> {
|
|
|
|
self.vec
|
|
|
|
}
|
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
/// Pushes the given string onto this string buffer.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::from_str("foo");
|
|
|
|
/// s.push_str("bar");
|
|
|
|
/// assert_eq!(s.as_slice(), "foobar");
|
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-04-02 18:54:22 -05:00
|
|
|
pub fn push_str(&mut self, string: &str) {
|
|
|
|
self.vec.push_all(string.as_bytes())
|
|
|
|
}
|
|
|
|
|
2014-12-28 12:29:56 -06:00
|
|
|
/// Returns the number of bytes that this string buffer can hold without
|
|
|
|
/// reallocating.
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let s = String::with_capacity(10);
|
2014-09-22 10:28:35 -05:00
|
|
|
/// assert!(s.capacity() >= 10);
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
/// ```
|
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn capacity(&self) -> usize {
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
self.vec.capacity()
|
|
|
|
}
|
|
|
|
|
2014-12-28 12:29:56 -06:00
|
|
|
/// Reserves capacity for at least `additional` more bytes to be inserted
|
|
|
|
/// in the given `String`. The collection may reserve more space to avoid
|
|
|
|
/// frequent reallocations.
|
2014-11-06 11:24:47 -06:00
|
|
|
///
|
|
|
|
/// # Panics
|
|
|
|
///
|
2015-02-04 20:17:19 -06:00
|
|
|
/// Panics if the new capacity overflows `usize`.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::new();
|
|
|
|
/// s.reserve(10);
|
2014-09-22 10:28:35 -05:00
|
|
|
/// assert!(s.capacity() >= 10);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn reserve(&mut self, additional: usize) {
|
2014-11-06 11:24:47 -06:00
|
|
|
self.vec.reserve(additional)
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
2014-12-28 12:29:56 -06:00
|
|
|
/// Reserves the minimum capacity for exactly `additional` more bytes to be
|
|
|
|
/// inserted in the given `String`. Does nothing if the capacity is already
|
|
|
|
/// sufficient.
|
2014-11-06 11:24:47 -06:00
|
|
|
///
|
2014-12-28 12:29:56 -06:00
|
|
|
/// Note that the allocator may give the collection more space than it
|
|
|
|
/// requests. Therefore capacity can not be relied upon to be precisely
|
|
|
|
/// minimal. Prefer `reserve` if future insertions are expected.
|
2014-11-06 11:24:47 -06:00
|
|
|
///
|
|
|
|
/// # Panics
|
|
|
|
///
|
2015-02-04 20:17:19 -06:00
|
|
|
/// Panics if the new capacity overflows `usize`.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::new();
|
2014-11-06 11:24:47 -06:00
|
|
|
/// s.reserve(10);
|
|
|
|
/// assert!(s.capacity() >= 10);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn reserve_exact(&mut self, additional: usize) {
|
2014-11-06 11:24:47 -06:00
|
|
|
self.vec.reserve_exact(additional)
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Shrinks the capacity of this string buffer to match its length.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::from_str("foo");
|
|
|
|
/// s.reserve(100);
|
2014-09-22 10:28:35 -05:00
|
|
|
/// assert!(s.capacity() >= 100);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// s.shrink_to_fit();
|
2014-09-22 10:28:35 -05:00
|
|
|
/// assert_eq!(s.capacity(), 3);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-04-02 18:54:22 -05:00
|
|
|
pub fn shrink_to_fit(&mut self) {
|
|
|
|
self.vec.shrink_to_fit()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Adds the given character to the end of the string.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::from_str("abc");
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
/// s.push('1');
|
|
|
|
/// s.push('2');
|
|
|
|
/// s.push('3');
|
2014-07-27 05:40:39 -05:00
|
|
|
/// assert_eq!(s.as_slice(), "abc123");
|
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
pub fn push(&mut self, ch: char) {
|
2014-12-20 11:17:58 -06:00
|
|
|
if (ch as u32) < 0x80 {
|
|
|
|
self.vec.push(ch as u8);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
let cur_len = self.len();
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
// This may use up to 4 bytes.
|
2014-11-06 11:24:47 -06:00
|
|
|
self.vec.reserve(4);
|
2014-04-02 18:54:22 -05:00
|
|
|
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
unsafe {
|
2014-04-02 18:54:22 -05:00
|
|
|
// Attempt to not use an intermediate buffer by just pushing bytes
|
|
|
|
// directly onto this string.
|
2014-08-06 22:03:55 -05:00
|
|
|
let slice = RawSlice {
|
2015-02-05 14:08:33 -06:00
|
|
|
data: self.vec.as_ptr().offset(cur_len as isize),
|
std: Recreate a `collections` module
As with the previous commit with `librand`, this commit shuffles around some
`collections` code. The new state of the world is similar to that of librand:
* The libcollections crate now only depends on libcore and liballoc.
* The standard library has a new module, `std::collections`. All functionality
of libcollections is reexported through this module.
I would like to stress that this change is purely cosmetic. There are very few
alterations to these primitives.
There are a number of notable points about the new organization:
* std::{str, slice, string, vec} all moved to libcollections. There is no reason
that these primitives shouldn't be necessarily usable in a freestanding
context that has allocation. These are all reexported in their usual places in
the standard library.
* The `hashmap`, and transitively the `lru_cache`, modules no longer reside in
`libcollections`, but rather in libstd. The reason for this is because the
`HashMap::new` contructor requires access to the OSRng for initially seeding
the hash map. Beyond this requirement, there is no reason that the hashmap
could not move to libcollections.
I do, however, have a plan to move the hash map to the collections module. The
`HashMap::new` function could be altered to require that the `H` hasher
parameter ascribe to the `Default` trait, allowing the entire `hashmap` module
to live in libcollections. The key idea would be that the default hasher would
be different in libstd. Something along the lines of:
// src/libstd/collections/mod.rs
pub type HashMap<K, V, H = RandomizedSipHasher> =
core_collections::HashMap<K, V, H>;
This is not possible today because you cannot invoke static methods through
type aliases. If we modified the compiler, however, to allow invocation of
static methods through type aliases, then this type definition would
essentially be switching the default hasher from `SipHasher` in libcollections
to a libstd-defined `RandomizedSipHasher` type. This type's `Default`
implementation would randomly seed the `SipHasher` instance, and otherwise
perform the same as `SipHasher`.
This future state doesn't seem incredibly far off, but until that time comes,
the hashmap module will live in libstd to not compromise on functionality.
* In preparation for the hashmap moving to libcollections, the `hash` module has
moved from libstd to libcollections. A previously snapshotted commit enables a
distinct `Writer` trait to live in the `hash` module which `Hash`
implementations are now parameterized over.
Due to using a custom trait, the `SipHasher` implementation has lost its
specialized methods for writing integers. These can be re-added
backwards-compatibly in the future via default methods if necessary, but the
FNV hashing should satisfy much of the need for speedier hashing.
A list of breaking changes:
* HashMap::{get, get_mut} no longer fails with the key formatted into the error
message with `{:?}`, instead, a generic message is printed. With backtraces,
it should still be not-too-hard to track down errors.
* The HashMap, HashSet, and LruCache types are now available through
std::collections instead of the collections crate.
* Manual implementations of hash should be parameterized over `hash::Writer`
instead of just `Writer`.
[breaking-change]
2014-05-29 20:50:12 -05:00
|
|
|
len: 4,
|
|
|
|
};
|
2014-08-13 18:02:31 -05:00
|
|
|
let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0);
|
2014-04-02 18:54:22 -05:00
|
|
|
self.vec.set_len(cur_len + used);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Works with the underlying buffer as a byte slice.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let s = String::from_str("hello");
|
2014-08-06 04:59:40 -05:00
|
|
|
/// let b: &[_] = &[104, 101, 108, 108, 111];
|
|
|
|
/// assert_eq!(s.as_bytes(), b);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-05 12:48:20 -06:00
|
|
|
pub fn as_bytes(&self) -> &[u8] {
|
2015-02-01 20:53:25 -06:00
|
|
|
&self.vec
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
2014-08-04 05:48:39 -05:00
|
|
|
/// Shortens a string to the specified length.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-11-11 12:36:09 -06:00
|
|
|
/// # Panics
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-11-11 12:36:09 -06:00
|
|
|
/// Panics if `new_len` > current length,
|
2014-10-05 06:15:59 -05:00
|
|
|
/// or if `new_len` is not a character boundary.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::from_str("hello");
|
|
|
|
/// s.truncate(2);
|
|
|
|
/// assert_eq!(s.as_slice(), "he");
|
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn truncate(&mut self, new_len: usize) {
|
2014-11-27 10:45:50 -06:00
|
|
|
assert!(self.is_char_boundary(new_len));
|
2014-10-05 06:15:59 -05:00
|
|
|
self.vec.truncate(new_len)
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
2014-07-27 05:40:39 -05:00
|
|
|
/// Removes the last character from the string buffer and returns it.
|
|
|
|
/// Returns `None` if this string buffer is empty.
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::from_str("foo");
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
/// assert_eq!(s.pop(), Some('o'));
|
|
|
|
/// assert_eq!(s.pop(), Some('o'));
|
|
|
|
/// assert_eq!(s.pop(), Some('f'));
|
|
|
|
/// assert_eq!(s.pop(), None);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// ```
|
2014-05-08 15:42:40 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
collections: Stabilize String
# Rationale
When dealing with strings, many functions deal with either a `char` (unicode
codepoint) or a byte (utf-8 encoding related). There is often an inconsistent
way in which methods are referred to as to whether they contain "byte", "char",
or nothing in their name. There are also issues open to rename *all* methods to
reflect that they operate on utf8 encodings or bytes (e.g. utf8_len() or
byte_len()).
The current state of String seems to largely be what is desired, so this PR
proposes the following rationale for methods dealing with bytes or characters:
> When constructing a string, the input encoding *must* be mentioned (e.g.
> from_utf8). This makes it clear what exactly the input type is expected to be
> in terms of encoding.
>
> When a method operates on anything related to an *index* within the string
> such as length, capacity, position, etc, the method *implicitly* operates on
> bytes. It is an understood fact that String is a utf-8 encoded string, and
> burdening all methods with "bytes" would be redundant.
>
> When a method operates on the *contents* of a string, such as push() or pop(),
> then "char" is the default type. A String can loosely be thought of as being a
> collection of unicode codepoints, but not all collection-related operations
> make sense because some can be woefully inefficient.
# Method stabilization
The following methods have been marked #[stable]
* The String type itself
* String::new
* String::with_capacity
* String::from_utf16_lossy
* String::into_bytes
* String::as_bytes
* String::len
* String::clear
* String::as_slice
The following methods have been marked #[unstable]
* String::from_utf8 - The error type in the returned `Result` may change to
provide a nicer message when it's `unwrap()`'d
* String::from_utf8_lossy - The returned `MaybeOwned` type still needs
stabilization
* String::from_utf16 - The return type may change to become a `Result` which
includes more contextual information like where the error
occurred.
* String::from_chars - This is equivalent to iter().collect(), but currently not
as ergonomic.
* String::from_char - This method is the equivalent of Vec::from_elem, and has
been marked #[unstable] becuase it can be seen as a
duplicate of iterator-based functionality as well as
possibly being renamed.
* String::push_str - This *can* be emulated with .extend(foo.chars()), but is
less efficient because of decoding/encoding. Due to the
desire to minimize API surface this may be able to be
removed in the future for something possibly generic with
no loss in performance.
* String::grow - This is a duplicate of iterator-based functionality, which may
become more ergonomic in the future.
* String::capacity - This function was just added.
* String::push - This function was just added.
* String::pop - This function was just added.
* String::truncate - The failure conventions around String methods and byte
indices isn't totally clear at this time, so the failure
semantics and return value of this method are subject to
change.
* String::as_mut_vec - the naming of this method may change.
* string::raw::* - these functions are all waiting on [an RFC][2]
[2]: https://github.com/rust-lang/rfcs/pull/240
The following method have been marked #[experimental]
* String::from_str - This function only exists as it's more efficient than
to_string(), but having a less ergonomic function for
performance reasons isn't the greatest reason to keep it
around. Like Vec::push_all, this has been marked
experimental for now.
The following methods have been #[deprecated]
* String::append - This method has been deprecated to remain consistent with the
deprecation of Vec::append. While convenient, it is one of
the only functional-style apis on String, and requires more
though as to whether it belongs as a first-class method or
now (and how it relates to other collections).
* String::from_byte - This is fairly rare functionality and can be emulated with
str::from_utf8 plus an assert plus a call to to_string().
Additionally, String::from_char could possibly be used.
* String::byte_capacity - Renamed to String::capacity due to the rationale
above.
* String::push_char - Renamed to String::push due to the rationale above.
* String::pop_char - Renamed to String::pop due to the rationale above.
* String::push_bytes - There are a number of `unsafe` functions on the `String`
type which allow bypassing utf-8 checks. These have all
been deprecated in favor of calling `.as_mut_vec()` and
then operating directly on the vector returned. These
methods were deprecated because naming them with relation
to other methods was difficult to rationalize and it's
arguably more composable to call .as_mut_vec().
* String::as_mut_bytes - See push_bytes
* String::push_byte - See push_bytes
* String::pop_byte - See push_bytes
* String::shift_byte - See push_bytes
# Reservation methods
This commit does not yet touch the methods for reserving bytes. The methods on
Vec have also not yet been modified. These methods are discussed in the upcoming
[Collections reform RFC][1]
[1]: https://github.com/aturon/rfcs/blob/collections-conventions/active/0000-collections-conventions.md#implicit-growth
2014-09-22 09:12:10 -05:00
|
|
|
pub fn pop(&mut self) -> Option<char> {
|
2014-05-08 15:42:40 -05:00
|
|
|
let len = self.len();
|
|
|
|
if len == 0 {
|
|
|
|
return None
|
|
|
|
}
|
|
|
|
|
2014-11-27 10:45:50 -06:00
|
|
|
let CharRange {ch, next} = self.char_range_at_reverse(len);
|
2014-05-08 15:42:40 -05:00
|
|
|
unsafe {
|
|
|
|
self.vec.set_len(next);
|
|
|
|
}
|
|
|
|
Some(ch)
|
|
|
|
}
|
|
|
|
|
2014-09-22 10:24:14 -05:00
|
|
|
/// Removes the character from the string buffer at byte position `idx` and
|
2014-12-28 12:29:56 -06:00
|
|
|
/// returns it.
|
2014-05-08 15:42:40 -05:00
|
|
|
///
|
|
|
|
/// # Warning
|
|
|
|
///
|
2014-11-16 10:28:13 -06:00
|
|
|
/// This is an O(n) operation as it requires copying every element in the
|
2014-09-22 10:24:14 -05:00
|
|
|
/// buffer.
|
|
|
|
///
|
2014-10-09 14:17:22 -05:00
|
|
|
/// # Panics
|
2014-09-22 10:24:14 -05:00
|
|
|
///
|
2014-12-28 12:29:56 -06:00
|
|
|
/// If `idx` does not lie on a character boundary, or if it is out of
|
|
|
|
/// bounds, then this function will panic.
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::from_str("foo");
|
2014-12-28 12:29:56 -06:00
|
|
|
/// assert_eq!(s.remove(0), 'f');
|
|
|
|
/// assert_eq!(s.remove(1), 'o');
|
|
|
|
/// assert_eq!(s.remove(0), 'o');
|
2014-07-27 05:40:39 -05:00
|
|
|
/// ```
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn remove(&mut self, idx: usize) -> char {
|
2014-04-02 18:54:22 -05:00
|
|
|
let len = self.len();
|
2014-12-28 12:29:56 -06:00
|
|
|
assert!(idx <= len);
|
2014-04-02 18:54:22 -05:00
|
|
|
|
2014-11-27 10:45:50 -06:00
|
|
|
let CharRange { ch, next } = self.char_range_at(idx);
|
2014-05-08 15:42:40 -05:00
|
|
|
unsafe {
|
2015-02-23 13:39:16 -06:00
|
|
|
ptr::copy(self.vec.as_mut_ptr().offset(idx as isize),
|
|
|
|
self.vec.as_ptr().offset(next as isize),
|
|
|
|
len - next);
|
2014-09-22 10:24:14 -05:00
|
|
|
self.vec.set_len(len - (next - idx));
|
2014-05-08 15:42:40 -05:00
|
|
|
}
|
2014-12-28 12:29:56 -06:00
|
|
|
ch
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
2014-04-12 07:44:31 -05:00
|
|
|
|
2014-09-22 10:24:14 -05:00
|
|
|
/// Insert a character into the string buffer at byte position `idx`.
|
|
|
|
///
|
|
|
|
/// # Warning
|
|
|
|
///
|
2014-11-16 10:28:13 -06:00
|
|
|
/// This is an O(n) operation as it requires copying every element in the
|
2014-09-22 10:24:14 -05:00
|
|
|
/// buffer.
|
|
|
|
///
|
2014-10-09 14:17:22 -05:00
|
|
|
/// # Panics
|
2014-09-22 10:24:14 -05:00
|
|
|
///
|
|
|
|
/// If `idx` does not lie on a character boundary or is out of bounds, then
|
2014-10-09 14:17:22 -05:00
|
|
|
/// this function will panic.
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn insert(&mut self, idx: usize, ch: char) {
|
2014-09-22 10:24:14 -05:00
|
|
|
let len = self.len();
|
|
|
|
assert!(idx <= len);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert!(self.is_char_boundary(idx));
|
2014-11-06 11:24:47 -06:00
|
|
|
self.vec.reserve(4);
|
2014-12-30 02:19:41 -06:00
|
|
|
let mut bits = [0; 4];
|
2014-11-17 02:39:01 -06:00
|
|
|
let amt = ch.encode_utf8(&mut bits).unwrap();
|
2014-09-22 10:24:14 -05:00
|
|
|
|
|
|
|
unsafe {
|
2015-02-23 13:39:16 -06:00
|
|
|
ptr::copy(self.vec.as_mut_ptr().offset((idx + amt) as isize),
|
|
|
|
self.vec.as_ptr().offset(idx as isize),
|
|
|
|
len - idx);
|
|
|
|
ptr::copy(self.vec.as_mut_ptr().offset(idx as isize),
|
|
|
|
bits.as_ptr(),
|
|
|
|
amt);
|
2014-09-22 10:24:14 -05:00
|
|
|
self.vec.set_len(len + amt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-12 07:44:31 -05:00
|
|
|
/// Views the string buffer as a mutable sequence of bytes.
|
|
|
|
///
|
2014-07-27 05:40:39 -05:00
|
|
|
/// This is unsafe because it does not check
|
|
|
|
/// to ensure that the resulting string will be valid UTF-8.
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-07-27 05:40:39 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = String::from_str("hello");
|
|
|
|
/// unsafe {
|
|
|
|
/// let vec = s.as_mut_vec();
|
2015-02-24 12:15:45 -06:00
|
|
|
/// assert!(vec == &[104, 101, 108, 108, 111]);
|
2014-07-27 05:40:39 -05:00
|
|
|
/// vec.reverse();
|
|
|
|
/// }
|
|
|
|
/// assert_eq!(s.as_slice(), "olleh");
|
|
|
|
/// ```
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-05 12:48:20 -06:00
|
|
|
pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
|
2014-04-12 07:44:31 -05:00
|
|
|
&mut self.vec
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
|
2014-10-30 15:43:24 -05:00
|
|
|
/// Return the number of bytes in this string.
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-10-30 15:43:24 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let a = "foo".to_string();
|
|
|
|
/// assert_eq!(a.len(), 3);
|
|
|
|
/// ```
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
pub fn len(&self) -> usize { self.vec.len() }
|
2014-04-02 18:54:22 -05:00
|
|
|
|
2014-10-30 15:43:24 -05:00
|
|
|
/// Returns true if the string contains no bytes
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-10-30 15:43:24 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut v = String::new();
|
|
|
|
/// assert!(v.is_empty());
|
|
|
|
/// v.push('a');
|
|
|
|
/// assert!(!v.is_empty());
|
|
|
|
/// ```
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-10-30 15:43:24 -05:00
|
|
|
pub fn is_empty(&self) -> bool { self.len() == 0 }
|
|
|
|
|
|
|
|
/// Truncates the string, returning it to 0 length.
|
|
|
|
///
|
2014-12-08 23:28:07 -06:00
|
|
|
/// # Examples
|
2014-10-30 15:43:24 -05:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// let mut s = "foo".to_string();
|
|
|
|
/// s.clear();
|
|
|
|
/// assert!(s.is_empty());
|
|
|
|
/// ```
|
2014-05-11 05:49:09 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-10-30 15:43:24 -05:00
|
|
|
pub fn clear(&mut self) {
|
2014-05-11 05:49:09 -05:00
|
|
|
self.vec.clear()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-28 12:29:56 -06:00
|
|
|
impl FromUtf8Error {
|
|
|
|
/// Consume this error, returning the bytes that were attempted to make a
|
|
|
|
/// `String` with.
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-12-28 12:29:56 -06:00
|
|
|
pub fn into_bytes(self) -> Vec<u8> { self.bytes }
|
|
|
|
|
|
|
|
/// Access the underlying UTF8-error that was the cause of this error.
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-12-28 12:29:56 -06:00
|
|
|
pub fn utf8_error(&self) -> Utf8Error { self.error }
|
|
|
|
}
|
|
|
|
|
2015-01-24 11:15:42 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-20 17:45:07 -06:00
|
|
|
impl fmt::Display for FromUtf8Error {
|
2014-12-28 12:29:56 -06:00
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
2015-01-20 17:45:07 -06:00
|
|
|
fmt::Display::fmt(&self.error, f)
|
2014-12-20 02:09:35 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-20 17:45:07 -06:00
|
|
|
impl Error for FromUtf8Error {
|
|
|
|
fn description(&self) -> &str { "invalid utf-8" }
|
2014-12-28 12:29:56 -06:00
|
|
|
}
|
|
|
|
|
2015-01-24 11:15:42 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-20 17:45:07 -06:00
|
|
|
impl fmt::Display for FromUtf16Error {
|
2014-12-28 12:29:56 -06:00
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
2015-01-20 17:45:07 -06:00
|
|
|
fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
|
2014-12-20 02:09:35 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-20 17:45:07 -06:00
|
|
|
impl Error for FromUtf16Error {
|
|
|
|
fn description(&self) -> &str { "invalid utf-16" }
|
2014-12-28 12:29:56 -06:00
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-05-22 18:57:53 -05:00
|
|
|
impl FromIterator<char> for String {
|
2015-02-18 12:06:21 -06:00
|
|
|
fn from_iter<I: IntoIterator<Item=char>>(iter: I) -> String {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut buf = String::new();
|
2015-02-18 12:06:21 -06:00
|
|
|
buf.extend(iter);
|
2014-04-02 18:54:22 -05:00
|
|
|
buf
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-12-07 14:32:00 -06:00
|
|
|
impl<'a> FromIterator<&'a str> for String {
|
2015-02-18 12:06:21 -06:00
|
|
|
fn from_iter<I: IntoIterator<Item=&'a str>>(iter: I) -> String {
|
2014-12-07 14:32:00 -06:00
|
|
|
let mut buf = String::new();
|
2015-02-18 12:06:21 -06:00
|
|
|
buf.extend(iter);
|
2014-12-07 14:32:00 -06:00
|
|
|
buf
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-22 20:22:03 -06:00
|
|
|
#[unstable(feature = "collections",
|
2015-01-12 20:40:19 -06:00
|
|
|
reason = "waiting on Extend stabilization")]
|
2014-11-07 18:39:39 -06:00
|
|
|
impl Extend<char> for String {
|
2015-02-18 09:04:30 -06:00
|
|
|
fn extend<I: IntoIterator<Item=char>>(&mut self, iterable: I) {
|
|
|
|
let iterator = iterable.into_iter();
|
2014-12-07 14:31:24 -06:00
|
|
|
let (lower_bound, _) = iterator.size_hint();
|
|
|
|
self.reserve(lower_bound);
|
2014-04-02 18:54:22 -05:00
|
|
|
for ch in iterator {
|
2014-09-22 10:28:35 -05:00
|
|
|
self.push(ch)
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-22 20:22:03 -06:00
|
|
|
#[unstable(feature = "collections",
|
2015-01-12 20:40:19 -06:00
|
|
|
reason = "waiting on Extend stabilization")]
|
2014-12-07 14:32:00 -06:00
|
|
|
impl<'a> Extend<&'a str> for String {
|
2015-02-18 09:04:30 -06:00
|
|
|
fn extend<I: IntoIterator<Item=&'a str>>(&mut self, iterable: I) {
|
|
|
|
let iterator = iterable.into_iter();
|
2014-12-07 14:32:00 -06:00
|
|
|
// A guess that at least one byte per iterator element will be needed.
|
|
|
|
let (lower_bound, _) = iterator.size_hint();
|
|
|
|
self.reserve(lower_bound);
|
|
|
|
for s in iterator {
|
|
|
|
self.push_str(s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-11-20 23:14:05 -06:00
|
|
|
impl PartialEq for String {
|
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &String) -> bool { PartialEq::eq(&**self, &**other) }
|
|
|
|
#[inline]
|
|
|
|
fn ne(&self, other: &String) -> bool { PartialEq::ne(&**self, &**other) }
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! impl_eq {
|
|
|
|
($lhs:ty, $rhs: ty) => {
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-11-20 23:14:05 -06:00
|
|
|
impl<'a> PartialEq<$rhs> for $lhs {
|
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &$rhs) -> bool { PartialEq::eq(&**self, &**other) }
|
|
|
|
#[inline]
|
|
|
|
fn ne(&self, other: &$rhs) -> bool { PartialEq::ne(&**self, &**other) }
|
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-11-20 23:14:05 -06:00
|
|
|
impl<'a> PartialEq<$lhs> for $rhs {
|
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &$lhs) -> bool { PartialEq::eq(&**self, &**other) }
|
|
|
|
#[inline]
|
|
|
|
fn ne(&self, other: &$lhs) -> bool { PartialEq::ne(&**self, &**other) }
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-14 11:18:10 -06:00
|
|
|
impl_eq! { String, &'a str }
|
2015-02-12 01:16:32 -06:00
|
|
|
impl_eq! { Cow<'a, str>, String }
|
2014-11-20 23:14:05 -06:00
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-12 01:16:32 -06:00
|
|
|
impl<'a, 'b> PartialEq<&'b str> for Cow<'a, str> {
|
2014-11-20 23:14:05 -06:00
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &&'b str) -> bool { PartialEq::eq(&**self, &**other) }
|
|
|
|
#[inline]
|
|
|
|
fn ne(&self, other: &&'b str) -> bool { PartialEq::ne(&**self, &**other) }
|
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-12 01:16:32 -06:00
|
|
|
impl<'a, 'b> PartialEq<Cow<'a, str>> for &'b str {
|
2014-11-20 23:14:05 -06:00
|
|
|
#[inline]
|
2015-02-12 01:16:32 -06:00
|
|
|
fn eq(&self, other: &Cow<'a, str>) -> bool { PartialEq::eq(&**self, &**other) }
|
2014-11-20 23:14:05 -06:00
|
|
|
#[inline]
|
2015-02-12 01:16:32 -06:00
|
|
|
fn ne(&self, other: &Cow<'a, str>) -> bool { PartialEq::ne(&**self, &**other) }
|
2014-11-20 23:14:05 -06:00
|
|
|
}
|
|
|
|
|
2015-01-22 20:22:03 -06:00
|
|
|
#[unstable(feature = "collections", reason = "waiting on Str stabilization")]
|
2014-05-22 18:57:53 -05:00
|
|
|
impl Str for String {
|
2014-04-02 18:54:22 -05:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-05 12:48:20 -06:00
|
|
|
fn as_slice(&self) -> &str {
|
2015-02-01 20:53:25 -06:00
|
|
|
unsafe { mem::transmute(&*self.vec) }
|
2014-05-19 19:23:26 -05:00
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-05-22 18:57:53 -05:00
|
|
|
impl Default for String {
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-05-22 18:57:53 -05:00
|
|
|
fn default() -> String {
|
|
|
|
String::new()
|
2014-05-20 01:19:56 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-20 17:45:07 -06:00
|
|
|
impl fmt::Display for String {
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2014-12-20 02:09:35 -06:00
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
2015-01-20 17:45:07 -06:00
|
|
|
fmt::Display::fmt(&**self, f)
|
2014-12-20 02:09:35 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-24 11:15:42 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-20 17:45:07 -06:00
|
|
|
impl fmt::Debug for String {
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2014-04-02 18:54:22 -05:00
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
2015-01-20 17:45:07 -06:00
|
|
|
fmt::Debug::fmt(&**self, f)
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-17 22:48:07 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
|
|
impl hash::Hash for String {
|
|
|
|
#[inline]
|
|
|
|
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
|
|
|
|
(**self).hash(hasher)
|
|
|
|
}
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
|
2015-01-22 20:22:03 -06:00
|
|
|
#[unstable(feature = "collections",
|
2015-01-12 20:40:19 -06:00
|
|
|
reason = "recent addition, needs more experience")]
|
2014-12-31 14:45:13 -06:00
|
|
|
impl<'a> Add<&'a str> for String {
|
|
|
|
type Output = String;
|
|
|
|
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2014-12-01 13:08:56 -06:00
|
|
|
fn add(mut self, other: &str) -> String {
|
|
|
|
self.push_str(other);
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-24 11:15:42 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
impl ops::Index<ops::Range<usize>> for String {
|
2015-01-03 22:43:24 -06:00
|
|
|
type Output = str;
|
2014-10-04 17:59:10 -05:00
|
|
|
#[inline]
|
2015-02-04 20:17:19 -06:00
|
|
|
fn index(&self, index: &ops::Range<usize>) -> &str {
|
2015-02-18 13:48:57 -06:00
|
|
|
&self[..][*index]
|
2014-10-04 17:59:10 -05:00
|
|
|
}
|
2014-12-31 01:20:40 -06:00
|
|
|
}
|
2015-01-24 11:15:42 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
impl ops::Index<ops::RangeTo<usize>> for String {
|
2015-01-03 22:43:24 -06:00
|
|
|
type Output = str;
|
2014-10-04 17:59:10 -05:00
|
|
|
#[inline]
|
2015-02-04 20:17:19 -06:00
|
|
|
fn index(&self, index: &ops::RangeTo<usize>) -> &str {
|
2015-02-18 13:48:57 -06:00
|
|
|
&self[..][*index]
|
2014-10-04 17:59:10 -05:00
|
|
|
}
|
2014-12-31 01:20:40 -06:00
|
|
|
}
|
2015-01-24 11:15:42 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-04 20:17:19 -06:00
|
|
|
impl ops::Index<ops::RangeFrom<usize>> for String {
|
2015-01-03 22:43:24 -06:00
|
|
|
type Output = str;
|
2014-10-04 17:59:10 -05:00
|
|
|
#[inline]
|
2015-02-04 20:17:19 -06:00
|
|
|
fn index(&self, index: &ops::RangeFrom<usize>) -> &str {
|
2015-02-18 13:48:57 -06:00
|
|
|
&self[..][*index]
|
2014-10-04 17:59:10 -05:00
|
|
|
}
|
2014-12-31 01:20:40 -06:00
|
|
|
}
|
2015-01-27 22:06:46 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
|
|
impl ops::Index<ops::RangeFull> for String {
|
|
|
|
type Output = str;
|
|
|
|
#[inline]
|
|
|
|
fn index(&self, _index: &ops::RangeFull) -> &str {
|
2015-02-01 20:53:25 -06:00
|
|
|
unsafe { mem::transmute(&*self.vec) }
|
2015-01-27 22:06:46 -06:00
|
|
|
}
|
|
|
|
}
|
2014-09-26 23:46:22 -05:00
|
|
|
|
2015-01-23 23:48:20 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-01 13:53:20 -06:00
|
|
|
impl ops::Deref for String {
|
|
|
|
type Target = str;
|
|
|
|
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-02-05 12:48:20 -06:00
|
|
|
fn deref(&self) -> &str {
|
2015-02-18 13:48:57 -06:00
|
|
|
unsafe { mem::transmute(&self.vec[..]) }
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
}
|
2014-10-29 17:26:29 -05:00
|
|
|
}
|
|
|
|
|
2014-08-23 19:26:53 -05:00
|
|
|
/// Wrapper type providing a `&String` reference via `Deref`.
|
2015-01-22 20:22:03 -06:00
|
|
|
#[unstable(feature = "collections")]
|
2014-08-23 19:26:53 -05:00
|
|
|
pub struct DerefString<'a> {
|
|
|
|
x: DerefVec<'a, u8>
|
|
|
|
}
|
|
|
|
|
2015-01-01 13:53:20 -06:00
|
|
|
impl<'a> Deref for DerefString<'a> {
|
|
|
|
type Target = String;
|
|
|
|
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2014-08-23 19:26:53 -05:00
|
|
|
fn deref<'b>(&'b self) -> &'b String {
|
|
|
|
unsafe { mem::transmute(&*self.x) }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Convert a string slice to a wrapper type providing a `&String` reference.
|
2014-12-07 17:47:00 -06:00
|
|
|
///
|
|
|
|
/// # Examples
|
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// use std::string::as_string;
|
|
|
|
///
|
|
|
|
/// fn string_consumer(s: String) {
|
|
|
|
/// assert_eq!(s, "foo".to_string());
|
|
|
|
/// }
|
|
|
|
///
|
|
|
|
/// let string = as_string("foo").clone();
|
|
|
|
/// string_consumer(string);
|
|
|
|
/// ```
|
2015-01-22 20:22:03 -06:00
|
|
|
#[unstable(feature = "collections")]
|
2014-08-23 19:26:53 -05:00
|
|
|
pub fn as_string<'a>(x: &'a str) -> DerefString<'a> {
|
|
|
|
DerefString { x: as_vec(x.as_bytes()) }
|
|
|
|
}
|
|
|
|
|
2015-01-28 00:52:32 -06:00
|
|
|
#[unstable(feature = "collections", reason = "associated error type may change")]
|
2014-11-14 22:52:00 -06:00
|
|
|
impl FromStr for String {
|
2015-01-28 00:52:32 -06:00
|
|
|
type Err = ();
|
2014-11-14 22:52:00 -06:00
|
|
|
#[inline]
|
2015-01-28 00:52:32 -06:00
|
|
|
fn from_str(s: &str) -> Result<String, ()> {
|
|
|
|
Ok(String::from_str(s))
|
2014-11-14 22:52:00 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-15 19:38:03 -06:00
|
|
|
/// A generic trait for converting a value to a string
|
std: Stabilize the std::fmt module
This commit performs a final stabilization pass over the std::fmt module,
marking all necessary APIs as stable. One of the more interesting aspects of
this module is that it exposes a good deal of its runtime representation to the
outside world in order for `format_args!` to be able to construct the format
strings. Instead of hacking the compiler to assume that these items are stable,
this commit instead lays out a story for the stabilization and evolution of
these APIs.
There are three primary details used by the `format_args!` macro:
1. `Arguments` - an opaque package of a "compiled format string". This structure
is passed around and the `write` function is the source of truth for
transforming a compiled format string into a string at runtime. This must be
able to be constructed in stable code.
2. `Argument` - an opaque structure representing an argument to a format string.
This is *almost* a trait object as it's just a pointer/function pair, but due
to the function originating from one of many traits, it's not actually a
trait object. Like `Arguments`, this must be constructed from stable code.
3. `fmt::rt` - this module contains the runtime type definitions primarily for
the `rt::Argument` structure. Whenever an argument is formatted with
nonstandard flags, a corresponding `rt::Argument` is generated describing how
the argument is being formatted. This can be used to construct an
`Arguments`.
The primary interface to `std::fmt` is the `Arguments` structure, and as such
this type name is stabilize as-is today. It is expected for libraries to pass
around an `Arguments` structure to represent a pending formatted computation.
The remaining portions are largely "cruft" which would rather not be stabilized,
but due to the stability checks they must be. As a result, almost all pieces
have been renamed to represent that they are "version 1" of the formatting
representation. The theory is that at a later date if we change the
representation of these types we can add new definitions called "version 2" and
corresponding constructors for `Arguments`.
One of the other remaining large questions about the fmt module were how the
pending I/O reform would affect the signatures of methods in the module. Due to
[RFC 526][rfc], however, the writers of fmt are now incompatible with the
writers of io, so this question has largely been solved. As a result the
interfaces are largely stabilized as-is today.
[rfc]: https://github.com/rust-lang/rfcs/blob/master/text/0526-fmt-text-writer.md
Specifically, the following changes were made:
* The contents of `fmt::rt` were all moved under `fmt::rt::v1`
* `fmt::rt` is stable
* `fmt::rt::v1` is stable
* `Error` is stable
* `Writer` is stable
* `Writer::write_str` is stable
* `Writer::write_fmt` is stable
* `Formatter` is stable
* `Argument` has been renamed to `ArgumentV1` and is stable
* `ArgumentV1::new` is stable
* `ArgumentV1::from_uint` is stable
* `Arguments::new_v1` is stable (renamed from `new`)
* `Arguments::new_v1_formatted` is stable (renamed from `with_placeholders`)
* All formatting traits are now stable, as well as the `fmt` method.
* `fmt::write` is stable
* `fmt::format` is stable
* `Formatter::pad_integral` is stable
* `Formatter::pad` is stable
* `Formatter::write_str` is stable
* `Formatter::write_fmt` is stable
* Some assorted top level items which were only used by `format_args!` were
removed in favor of static functions on `ArgumentV1` as well.
* The formatting-flag-accessing methods remain unstable
Within the contents of the `fmt::rt::v1` module, the following actions were
taken:
* Reexports of all enum variants were removed
* All prefixes on enum variants were removed
* A few miscellaneous enum variants were renamed
* Otherwise all structs, fields, and variants were marked stable.
In addition to these actions in the `std::fmt` module, many implementations of
`Show` and `String` were stabilized as well.
In some other modules:
* `ToString` is now stable
* `ToString::to_string` is now stable
* `Vec` no longer implements `fmt::Writer` (this has moved to `String`)
This is a breaking change due to all of the changes to the `fmt::rt` module, but
this likely will not have much impact on existing programs.
Closes #20661
[breaking-change]
2015-01-13 17:42:53 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-11-15 19:38:03 -06:00
|
|
|
pub trait ToString {
|
|
|
|
/// Converts the value of `self` to an owned string
|
std: Stabilize the std::fmt module
This commit performs a final stabilization pass over the std::fmt module,
marking all necessary APIs as stable. One of the more interesting aspects of
this module is that it exposes a good deal of its runtime representation to the
outside world in order for `format_args!` to be able to construct the format
strings. Instead of hacking the compiler to assume that these items are stable,
this commit instead lays out a story for the stabilization and evolution of
these APIs.
There are three primary details used by the `format_args!` macro:
1. `Arguments` - an opaque package of a "compiled format string". This structure
is passed around and the `write` function is the source of truth for
transforming a compiled format string into a string at runtime. This must be
able to be constructed in stable code.
2. `Argument` - an opaque structure representing an argument to a format string.
This is *almost* a trait object as it's just a pointer/function pair, but due
to the function originating from one of many traits, it's not actually a
trait object. Like `Arguments`, this must be constructed from stable code.
3. `fmt::rt` - this module contains the runtime type definitions primarily for
the `rt::Argument` structure. Whenever an argument is formatted with
nonstandard flags, a corresponding `rt::Argument` is generated describing how
the argument is being formatted. This can be used to construct an
`Arguments`.
The primary interface to `std::fmt` is the `Arguments` structure, and as such
this type name is stabilize as-is today. It is expected for libraries to pass
around an `Arguments` structure to represent a pending formatted computation.
The remaining portions are largely "cruft" which would rather not be stabilized,
but due to the stability checks they must be. As a result, almost all pieces
have been renamed to represent that they are "version 1" of the formatting
representation. The theory is that at a later date if we change the
representation of these types we can add new definitions called "version 2" and
corresponding constructors for `Arguments`.
One of the other remaining large questions about the fmt module were how the
pending I/O reform would affect the signatures of methods in the module. Due to
[RFC 526][rfc], however, the writers of fmt are now incompatible with the
writers of io, so this question has largely been solved. As a result the
interfaces are largely stabilized as-is today.
[rfc]: https://github.com/rust-lang/rfcs/blob/master/text/0526-fmt-text-writer.md
Specifically, the following changes were made:
* The contents of `fmt::rt` were all moved under `fmt::rt::v1`
* `fmt::rt` is stable
* `fmt::rt::v1` is stable
* `Error` is stable
* `Writer` is stable
* `Writer::write_str` is stable
* `Writer::write_fmt` is stable
* `Formatter` is stable
* `Argument` has been renamed to `ArgumentV1` and is stable
* `ArgumentV1::new` is stable
* `ArgumentV1::from_uint` is stable
* `Arguments::new_v1` is stable (renamed from `new`)
* `Arguments::new_v1_formatted` is stable (renamed from `with_placeholders`)
* All formatting traits are now stable, as well as the `fmt` method.
* `fmt::write` is stable
* `fmt::format` is stable
* `Formatter::pad_integral` is stable
* `Formatter::pad` is stable
* `Formatter::write_str` is stable
* `Formatter::write_fmt` is stable
* Some assorted top level items which were only used by `format_args!` were
removed in favor of static functions on `ArgumentV1` as well.
* The formatting-flag-accessing methods remain unstable
Within the contents of the `fmt::rt::v1` module, the following actions were
taken:
* Reexports of all enum variants were removed
* All prefixes on enum variants were removed
* A few miscellaneous enum variants were renamed
* Otherwise all structs, fields, and variants were marked stable.
In addition to these actions in the `std::fmt` module, many implementations of
`Show` and `String` were stabilized as well.
In some other modules:
* `ToString` is now stable
* `ToString::to_string` is now stable
* `Vec` no longer implements `fmt::Writer` (this has moved to `String`)
This is a breaking change due to all of the changes to the `fmt::rt` module, but
this likely will not have much impact on existing programs.
Closes #20661
[breaking-change]
2015-01-13 17:42:53 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2014-11-15 19:38:03 -06:00
|
|
|
fn to_string(&self) -> String;
|
|
|
|
}
|
|
|
|
|
std: Stabilize the std::fmt module
This commit performs a final stabilization pass over the std::fmt module,
marking all necessary APIs as stable. One of the more interesting aspects of
this module is that it exposes a good deal of its runtime representation to the
outside world in order for `format_args!` to be able to construct the format
strings. Instead of hacking the compiler to assume that these items are stable,
this commit instead lays out a story for the stabilization and evolution of
these APIs.
There are three primary details used by the `format_args!` macro:
1. `Arguments` - an opaque package of a "compiled format string". This structure
is passed around and the `write` function is the source of truth for
transforming a compiled format string into a string at runtime. This must be
able to be constructed in stable code.
2. `Argument` - an opaque structure representing an argument to a format string.
This is *almost* a trait object as it's just a pointer/function pair, but due
to the function originating from one of many traits, it's not actually a
trait object. Like `Arguments`, this must be constructed from stable code.
3. `fmt::rt` - this module contains the runtime type definitions primarily for
the `rt::Argument` structure. Whenever an argument is formatted with
nonstandard flags, a corresponding `rt::Argument` is generated describing how
the argument is being formatted. This can be used to construct an
`Arguments`.
The primary interface to `std::fmt` is the `Arguments` structure, and as such
this type name is stabilize as-is today. It is expected for libraries to pass
around an `Arguments` structure to represent a pending formatted computation.
The remaining portions are largely "cruft" which would rather not be stabilized,
but due to the stability checks they must be. As a result, almost all pieces
have been renamed to represent that they are "version 1" of the formatting
representation. The theory is that at a later date if we change the
representation of these types we can add new definitions called "version 2" and
corresponding constructors for `Arguments`.
One of the other remaining large questions about the fmt module were how the
pending I/O reform would affect the signatures of methods in the module. Due to
[RFC 526][rfc], however, the writers of fmt are now incompatible with the
writers of io, so this question has largely been solved. As a result the
interfaces are largely stabilized as-is today.
[rfc]: https://github.com/rust-lang/rfcs/blob/master/text/0526-fmt-text-writer.md
Specifically, the following changes were made:
* The contents of `fmt::rt` were all moved under `fmt::rt::v1`
* `fmt::rt` is stable
* `fmt::rt::v1` is stable
* `Error` is stable
* `Writer` is stable
* `Writer::write_str` is stable
* `Writer::write_fmt` is stable
* `Formatter` is stable
* `Argument` has been renamed to `ArgumentV1` and is stable
* `ArgumentV1::new` is stable
* `ArgumentV1::from_uint` is stable
* `Arguments::new_v1` is stable (renamed from `new`)
* `Arguments::new_v1_formatted` is stable (renamed from `with_placeholders`)
* All formatting traits are now stable, as well as the `fmt` method.
* `fmt::write` is stable
* `fmt::format` is stable
* `Formatter::pad_integral` is stable
* `Formatter::pad` is stable
* `Formatter::write_str` is stable
* `Formatter::write_fmt` is stable
* Some assorted top level items which were only used by `format_args!` were
removed in favor of static functions on `ArgumentV1` as well.
* The formatting-flag-accessing methods remain unstable
Within the contents of the `fmt::rt::v1` module, the following actions were
taken:
* Reexports of all enum variants were removed
* All prefixes on enum variants were removed
* A few miscellaneous enum variants were renamed
* Otherwise all structs, fields, and variants were marked stable.
In addition to these actions in the `std::fmt` module, many implementations of
`Show` and `String` were stabilized as well.
In some other modules:
* `ToString` is now stable
* `ToString::to_string` is now stable
* `Vec` no longer implements `fmt::Writer` (this has moved to `String`)
This is a breaking change due to all of the changes to the `fmt::rt` module, but
this likely will not have much impact on existing programs.
Closes #20661
[breaking-change]
2015-01-13 17:42:53 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-01-20 17:45:07 -06:00
|
|
|
impl<T: fmt::Display + ?Sized> ToString for T {
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2014-12-20 02:09:35 -06:00
|
|
|
fn to_string(&self) -> String {
|
2015-02-13 17:56:32 -06:00
|
|
|
use core::fmt::Write;
|
2014-12-20 02:09:35 -06:00
|
|
|
let mut buf = String::new();
|
|
|
|
let _ = buf.write_fmt(format_args!("{}", self));
|
|
|
|
buf.shrink_to_fit();
|
|
|
|
buf
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-12 01:16:32 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
|
|
impl IntoCow<'static, str> for String {
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-02-12 01:16:32 -06:00
|
|
|
fn into_cow(self) -> Cow<'static, str> {
|
2014-11-21 16:10:42 -06:00
|
|
|
Cow::Owned(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-12 01:16:32 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
|
|
impl<'a> IntoCow<'a, str> for &'a str {
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2015-02-12 01:16:32 -06:00
|
|
|
fn into_cow(self) -> Cow<'a, str> {
|
2014-11-21 16:10:42 -06:00
|
|
|
Cow::Borrowed(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-12 01:16:32 -06:00
|
|
|
impl<'a> Str for Cow<'a, str> {
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
#[inline]
|
|
|
|
fn as_slice<'b>(&'b self) -> &'b str {
|
2015-02-01 20:53:25 -06:00
|
|
|
&**self
|
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one
`StrExt` trait to be included in the prelude. This means that
`UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into
one `StrExt` exported by the standard library. Some functionality is currently
duplicated with the `StrExt` present in libcore.
This commit also currently avoids any methods which require any form of pattern
to operate. These functions will be stabilized via a separate RFC.
Next, stability of methods and structures are as follows:
Stable
* from_utf8_unchecked
* CowString - after moving to std::string
* StrExt::as_bytes
* StrExt::as_ptr
* StrExt::bytes/Bytes - also made a struct instead of a typedef
* StrExt::char_indices/CharIndices - CharOffsets was renamed
* StrExt::chars/Chars
* StrExt::is_empty
* StrExt::len
* StrExt::lines/Lines
* StrExt::lines_any/LinesAny
* StrExt::slice_unchecked
* StrExt::trim
* StrExt::trim_left
* StrExt::trim_right
* StrExt::words/Words - also made a struct instead of a typedef
Unstable
* from_utf8 - the error type was changed to a `Result`, but the error type has
yet to prove itself
* from_c_str - this function will be handled by the c_str RFC
* FromStr - this trait will have an associated error type eventually
* StrExt::escape_default - needs iterators at least, unsure if it should make
the cut
* StrExt::escape_unicode - needs iterators at least, unsure if it should make
the cut
* StrExt::slice_chars - this function has yet to prove itself
* StrExt::slice_shift_char - awaiting conventions about slicing and shifting
* StrExt::graphemes/Graphemes - this functionality may only be in libunicode
* StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in
libunicode
* StrExt::width - this functionality may only be in libunicode
* StrExt::utf16_units - this functionality may only be in libunicode
* StrExt::nfd_chars - this functionality may only be in libunicode
* StrExt::nfkd_chars - this functionality may only be in libunicode
* StrExt::nfc_chars - this functionality may only be in libunicode
* StrExt::nfkc_chars - this functionality may only be in libunicode
* StrExt::is_char_boundary - naming is uncertain with container conventions
* StrExt::char_range_at - naming is uncertain with container conventions
* StrExt::char_range_at_reverse - naming is uncertain with container conventions
* StrExt::char_at - naming is uncertain with container conventions
* StrExt::char_at_reverse - naming is uncertain with container conventions
* StrVector::concat - this functionality may be replaced with iterators, but
it's not certain at this time
* StrVector::connect - as with concat, may be deprecated in favor of iterators
Deprecated
* StrAllocating and UnicodeStrPrelude have been merged into StrExit
* eq_slice - compiler implementation detail
* from_str - use the inherent parse() method
* is_utf8 - call from_utf8 instead
* replace - call the method instead
* truncate_utf16_at_nul - this is an implementation detail of windows and does
not need to be exposed.
* utf8_char_width - moved to libunicode
* utf16_items - moved to libunicode
* is_utf16 - moved to libunicode
* Utf16Items - moved to libunicode
* Utf16Item - moved to libunicode
* Utf16Encoder - moved to libunicode
* AnyLines - renamed to LinesAny and made a struct
* SendStr - use CowString<'static> instead
* str::raw - all functionality is deprecated
* StrExt::into_string - call to_string() instead
* StrExt::repeat - use iterators instead
* StrExt::char_len - use .chars().count() instead
* StrExt::is_alphanumeric - use .chars().all(..)
* StrExt::is_whitespace - use .chars().all(..)
Pending deprecation -- while slicing syntax is being worked out, these methods
are all #[unstable]
* Str - while currently used for generic programming, this trait will be
replaced with one of [], deref coercions, or a generic conversion trait.
* StrExt::slice - use slicing syntax instead
* StrExt::slice_to - use slicing syntax instead
* StrExt::slice_from - use slicing syntax instead
* StrExt::lev_distance - deprecated with no replacement
Awaiting stabilization due to patterns and/or matching
* StrExt::contains
* StrExt::contains_char
* StrExt::split
* StrExt::splitn
* StrExt::split_terminator
* StrExt::rsplitn
* StrExt::match_indices
* StrExt::split_str
* StrExt::starts_with
* StrExt::ends_with
* StrExt::trim_chars
* StrExt::trim_left_chars
* StrExt::trim_right_chars
* StrExt::find
* StrExt::rfind
* StrExt::find_str
* StrExt::subslice_offset
2014-12-10 11:02:31 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-12 01:16:32 -06:00
|
|
|
/// A clone-on-write string
|
|
|
|
#[deprecated(since = "1.0.0", reason = "use Cow<'a, str> instead")]
|
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
|
|
pub type CowString<'a> = Cow<'a, str>;
|
|
|
|
|
std: Stabilize the std::fmt module
This commit performs a final stabilization pass over the std::fmt module,
marking all necessary APIs as stable. One of the more interesting aspects of
this module is that it exposes a good deal of its runtime representation to the
outside world in order for `format_args!` to be able to construct the format
strings. Instead of hacking the compiler to assume that these items are stable,
this commit instead lays out a story for the stabilization and evolution of
these APIs.
There are three primary details used by the `format_args!` macro:
1. `Arguments` - an opaque package of a "compiled format string". This structure
is passed around and the `write` function is the source of truth for
transforming a compiled format string into a string at runtime. This must be
able to be constructed in stable code.
2. `Argument` - an opaque structure representing an argument to a format string.
This is *almost* a trait object as it's just a pointer/function pair, but due
to the function originating from one of many traits, it's not actually a
trait object. Like `Arguments`, this must be constructed from stable code.
3. `fmt::rt` - this module contains the runtime type definitions primarily for
the `rt::Argument` structure. Whenever an argument is formatted with
nonstandard flags, a corresponding `rt::Argument` is generated describing how
the argument is being formatted. This can be used to construct an
`Arguments`.
The primary interface to `std::fmt` is the `Arguments` structure, and as such
this type name is stabilize as-is today. It is expected for libraries to pass
around an `Arguments` structure to represent a pending formatted computation.
The remaining portions are largely "cruft" which would rather not be stabilized,
but due to the stability checks they must be. As a result, almost all pieces
have been renamed to represent that they are "version 1" of the formatting
representation. The theory is that at a later date if we change the
representation of these types we can add new definitions called "version 2" and
corresponding constructors for `Arguments`.
One of the other remaining large questions about the fmt module were how the
pending I/O reform would affect the signatures of methods in the module. Due to
[RFC 526][rfc], however, the writers of fmt are now incompatible with the
writers of io, so this question has largely been solved. As a result the
interfaces are largely stabilized as-is today.
[rfc]: https://github.com/rust-lang/rfcs/blob/master/text/0526-fmt-text-writer.md
Specifically, the following changes were made:
* The contents of `fmt::rt` were all moved under `fmt::rt::v1`
* `fmt::rt` is stable
* `fmt::rt::v1` is stable
* `Error` is stable
* `Writer` is stable
* `Writer::write_str` is stable
* `Writer::write_fmt` is stable
* `Formatter` is stable
* `Argument` has been renamed to `ArgumentV1` and is stable
* `ArgumentV1::new` is stable
* `ArgumentV1::from_uint` is stable
* `Arguments::new_v1` is stable (renamed from `new`)
* `Arguments::new_v1_formatted` is stable (renamed from `with_placeholders`)
* All formatting traits are now stable, as well as the `fmt` method.
* `fmt::write` is stable
* `fmt::format` is stable
* `Formatter::pad_integral` is stable
* `Formatter::pad` is stable
* `Formatter::write_str` is stable
* `Formatter::write_fmt` is stable
* Some assorted top level items which were only used by `format_args!` were
removed in favor of static functions on `ArgumentV1` as well.
* The formatting-flag-accessing methods remain unstable
Within the contents of the `fmt::rt::v1` module, the following actions were
taken:
* Reexports of all enum variants were removed
* All prefixes on enum variants were removed
* A few miscellaneous enum variants were renamed
* Otherwise all structs, fields, and variants were marked stable.
In addition to these actions in the `std::fmt` module, many implementations of
`Show` and `String` were stabilized as well.
In some other modules:
* `ToString` is now stable
* `ToString::to_string` is now stable
* `Vec` no longer implements `fmt::Writer` (this has moved to `String`)
This is a breaking change due to all of the changes to the `fmt::rt` module, but
this likely will not have much impact on existing programs.
Closes #20661
[breaking-change]
2015-01-13 17:42:53 -06:00
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
2015-02-13 17:56:32 -06:00
|
|
|
impl fmt::Write for String {
|
2015-01-10 04:07:14 -06:00
|
|
|
#[inline]
|
2014-12-12 12:59:41 -06:00
|
|
|
fn write_str(&mut self, s: &str) -> fmt::Result {
|
|
|
|
self.push_str(s);
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2014-12-19 06:02:22 -06:00
|
|
|
use prelude::*;
|
2014-05-29 21:03:06 -05:00
|
|
|
use test::Bencher;
|
|
|
|
|
2014-12-29 18:38:07 -06:00
|
|
|
use str::Utf8Error;
|
2015-01-02 01:53:35 -06:00
|
|
|
use core::iter::repeat;
|
|
|
|
use super::{as_string, CowString};
|
2014-04-02 18:54:22 -05:00
|
|
|
|
2014-08-23 19:26:53 -05:00
|
|
|
#[test]
|
|
|
|
fn test_as_string() {
|
|
|
|
let x = "foo";
|
2015-02-01 20:53:25 -06:00
|
|
|
assert_eq!(x, &**as_string(x));
|
2014-08-23 19:26:53 -05:00
|
|
|
}
|
|
|
|
|
2014-06-21 05:39:03 -05:00
|
|
|
#[test]
|
|
|
|
fn test_from_str() {
|
2015-01-28 00:52:32 -06:00
|
|
|
let owned: Option<::std::string::String> = "string".parse().ok();
|
2015-02-01 20:53:25 -06:00
|
|
|
assert_eq!(owned.as_ref().map(|s| &**s), Some("string"));
|
2014-06-21 05:39:03 -05:00
|
|
|
}
|
2014-07-10 11:21:16 -05:00
|
|
|
|
2015-01-08 05:48:32 -06:00
|
|
|
#[test]
|
|
|
|
fn test_unsized_to_string() {
|
|
|
|
let s: &str = "abc";
|
|
|
|
let _: String = (*s).to_string();
|
|
|
|
}
|
|
|
|
|
2014-07-10 11:21:16 -05:00
|
|
|
#[test]
|
|
|
|
fn test_from_utf8() {
|
2014-10-05 05:11:17 -05:00
|
|
|
let xs = b"hello".to_vec();
|
2014-12-28 12:29:56 -06:00
|
|
|
assert_eq!(String::from_utf8(xs).unwrap(),
|
|
|
|
String::from_str("hello"));
|
2014-07-10 11:21:16 -05:00
|
|
|
|
2014-10-05 05:11:17 -05:00
|
|
|
let xs = "ศไทย中华Việt Nam".as_bytes().to_vec();
|
2014-12-28 12:29:56 -06:00
|
|
|
assert_eq!(String::from_utf8(xs).unwrap(),
|
|
|
|
String::from_str("ศไทย中华Việt Nam"));
|
2014-07-10 11:21:16 -05:00
|
|
|
|
2014-10-05 05:11:17 -05:00
|
|
|
let xs = b"hello\xFF".to_vec();
|
2014-12-28 12:29:56 -06:00
|
|
|
let err = String::from_utf8(xs).err().unwrap();
|
|
|
|
assert_eq!(err.utf8_error(), Utf8Error::TooShort);
|
|
|
|
assert_eq!(err.into_bytes(), b"hello\xff".to_vec());
|
2014-07-10 11:21:16 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_from_utf8_lossy() {
|
|
|
|
let xs = b"hello";
|
2015-01-02 01:53:35 -06:00
|
|
|
let ys: CowString = "hello".into_cow();
|
2014-11-21 00:20:04 -06:00
|
|
|
assert_eq!(String::from_utf8_lossy(xs), ys);
|
2014-07-10 11:21:16 -05:00
|
|
|
|
2014-07-04 15:38:13 -05:00
|
|
|
let xs = "ศไทย中华Việt Nam".as_bytes();
|
2015-01-02 01:53:35 -06:00
|
|
|
let ys: CowString = "ศไทย中华Việt Nam".into_cow();
|
2014-11-21 00:20:04 -06:00
|
|
|
assert_eq!(String::from_utf8_lossy(xs), ys);
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
let xs = b"Hello\xC2 There\xFF Goodbye";
|
2014-07-04 15:38:13 -05:00
|
|
|
assert_eq!(String::from_utf8_lossy(xs),
|
2014-12-09 16:08:10 -06:00
|
|
|
String::from_str("Hello\u{FFFD} There\u{FFFD} Goodbye").into_cow());
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
|
|
|
|
assert_eq!(String::from_utf8_lossy(xs),
|
2014-12-09 16:08:10 -06:00
|
|
|
String::from_str("Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye").into_cow());
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
let xs = b"\xF5foo\xF5\x80bar";
|
2014-07-04 15:38:13 -05:00
|
|
|
assert_eq!(String::from_utf8_lossy(xs),
|
2014-12-09 16:08:10 -06:00
|
|
|
String::from_str("\u{FFFD}foo\u{FFFD}\u{FFFD}bar").into_cow());
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
|
2014-07-04 15:38:13 -05:00
|
|
|
assert_eq!(String::from_utf8_lossy(xs),
|
2014-12-09 16:08:10 -06:00
|
|
|
String::from_str("\u{FFFD}foo\u{FFFD}bar\u{FFFD}baz").into_cow());
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
|
|
|
|
assert_eq!(String::from_utf8_lossy(xs),
|
2014-12-09 16:08:10 -06:00
|
|
|
String::from_str("\u{FFFD}foo\u{FFFD}bar\u{FFFD}\u{FFFD}baz").into_cow());
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
|
2014-12-09 16:08:10 -06:00
|
|
|
assert_eq!(String::from_utf8_lossy(xs), String::from_str("\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\
|
|
|
|
foo\u{10000}bar").into_cow());
|
2014-07-10 11:21:16 -05:00
|
|
|
|
|
|
|
// surrogates
|
|
|
|
let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
|
2014-12-09 16:08:10 -06:00
|
|
|
assert_eq!(String::from_utf8_lossy(xs), String::from_str("\u{FFFD}\u{FFFD}\u{FFFD}foo\
|
|
|
|
\u{FFFD}\u{FFFD}\u{FFFD}bar").into_cow());
|
2014-07-10 11:21:16 -05:00
|
|
|
}
|
|
|
|
|
2014-07-10 10:53:51 -05:00
|
|
|
#[test]
|
|
|
|
fn test_from_utf16() {
|
|
|
|
let pairs =
|
2014-07-04 15:38:13 -05:00
|
|
|
[(String::from_str("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n"),
|
2014-07-10 10:53:51 -05:00
|
|
|
vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
|
|
|
|
0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
|
|
|
|
0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
|
|
|
|
0xd800_u16, 0xdf30_u16, 0x000a_u16]),
|
|
|
|
|
2014-07-04 15:38:13 -05:00
|
|
|
(String::from_str("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n"),
|
2014-07-10 10:53:51 -05:00
|
|
|
vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
|
|
|
|
0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
|
|
|
|
0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
|
|
|
|
0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
|
|
|
|
0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
|
|
|
|
0x000a_u16]),
|
|
|
|
|
2014-07-04 15:38:13 -05:00
|
|
|
(String::from_str("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n"),
|
2014-07-10 10:53:51 -05:00
|
|
|
vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
|
|
|
|
0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
|
|
|
|
0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
|
|
|
|
0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
|
|
|
|
0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
|
|
|
|
0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
|
|
|
|
0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
|
|
|
|
|
2014-07-04 15:38:13 -05:00
|
|
|
(String::from_str("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n"),
|
2014-07-10 10:53:51 -05:00
|
|
|
vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
|
|
|
|
0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
|
|
|
|
0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
|
|
|
|
0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
|
|
|
|
0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
|
|
|
|
0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
|
|
|
|
0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
|
|
|
|
0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
|
|
|
|
0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
|
|
|
|
0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
|
|
|
|
0x000a_u16 ]),
|
|
|
|
// Issue #12318, even-numbered non-BMP planes
|
2014-12-09 16:08:10 -06:00
|
|
|
(String::from_str("\u{20000}"),
|
2014-07-10 10:53:51 -05:00
|
|
|
vec![0xD840, 0xDC00])];
|
|
|
|
|
2015-01-31 11:20:46 -06:00
|
|
|
for p in &pairs {
|
2014-07-10 10:53:51 -05:00
|
|
|
let (s, u) = (*p).clone();
|
2014-11-27 10:45:50 -06:00
|
|
|
let s_as_utf16 = s.utf16_units().collect::<Vec<u16>>();
|
2015-02-01 20:53:25 -06:00
|
|
|
let u_as_string = String::from_utf16(&u).unwrap();
|
2014-07-10 10:53:51 -05:00
|
|
|
|
2015-02-01 20:53:25 -06:00
|
|
|
assert!(::unicode::str::is_utf16(&u));
|
2014-07-10 10:53:51 -05:00
|
|
|
assert_eq!(s_as_utf16, u);
|
|
|
|
|
|
|
|
assert_eq!(u_as_string, s);
|
2015-02-01 20:53:25 -06:00
|
|
|
assert_eq!(String::from_utf16_lossy(&u), s);
|
2014-07-10 10:53:51 -05:00
|
|
|
|
2015-02-01 20:53:25 -06:00
|
|
|
assert_eq!(String::from_utf16(&s_as_utf16).unwrap(), s);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(u_as_string.utf16_units().collect::<Vec<u16>>(), u);
|
2014-07-10 10:53:51 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_utf16_invalid() {
|
|
|
|
// completely positive cases tested above.
|
|
|
|
// lead + eof
|
2014-12-28 12:29:56 -06:00
|
|
|
assert!(String::from_utf16(&[0xD800]).is_err());
|
2014-07-10 10:53:51 -05:00
|
|
|
// lead + lead
|
2014-12-28 12:29:56 -06:00
|
|
|
assert!(String::from_utf16(&[0xD800, 0xD800]).is_err());
|
2014-07-10 10:53:51 -05:00
|
|
|
|
|
|
|
// isolated trail
|
2014-12-28 12:29:56 -06:00
|
|
|
assert!(String::from_utf16(&[0x0061, 0xDC00]).is_err());
|
2014-07-10 10:53:51 -05:00
|
|
|
|
|
|
|
// general
|
2014-12-28 12:29:56 -06:00
|
|
|
assert!(String::from_utf16(&[0xD800, 0xd801, 0xdc8b, 0xD800]).is_err());
|
2014-07-10 10:53:51 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_from_utf16_lossy() {
|
|
|
|
// completely positive cases tested above.
|
|
|
|
// lead + eof
|
2014-12-09 16:08:10 -06:00
|
|
|
assert_eq!(String::from_utf16_lossy(&[0xD800]), String::from_str("\u{FFFD}"));
|
2014-07-10 10:53:51 -05:00
|
|
|
// lead + lead
|
2014-12-09 16:08:10 -06:00
|
|
|
assert_eq!(String::from_utf16_lossy(&[0xD800, 0xD800]),
|
|
|
|
String::from_str("\u{FFFD}\u{FFFD}"));
|
2014-07-10 10:53:51 -05:00
|
|
|
|
|
|
|
// isolated trail
|
2014-12-09 16:08:10 -06:00
|
|
|
assert_eq!(String::from_utf16_lossy(&[0x0061, 0xDC00]), String::from_str("a\u{FFFD}"));
|
2014-07-10 10:53:51 -05:00
|
|
|
|
|
|
|
// general
|
2014-11-17 02:39:01 -06:00
|
|
|
assert_eq!(String::from_utf16_lossy(&[0xD800, 0xd801, 0xdc8b, 0xD800]),
|
2014-12-09 16:08:10 -06:00
|
|
|
String::from_str("\u{FFFD}𐒋\u{FFFD}"));
|
2014-07-10 10:53:51 -05:00
|
|
|
}
|
2014-06-21 05:39:03 -05:00
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
#[test]
|
|
|
|
fn test_push_bytes() {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut s = String::from_str("ABC");
|
2014-04-02 18:54:22 -05:00
|
|
|
unsafe {
|
2014-10-05 05:11:17 -05:00
|
|
|
let mv = s.as_mut_vec();
|
2014-11-17 02:39:01 -06:00
|
|
|
mv.push_all(&[b'D']);
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "ABCD");
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_push_str() {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut s = String::new();
|
2014-04-02 18:54:22 -05:00
|
|
|
s.push_str("");
|
2015-01-26 20:21:15 -06:00
|
|
|
assert_eq!(&s[0..], "");
|
2014-04-02 18:54:22 -05:00
|
|
|
s.push_str("abc");
|
2015-01-26 20:21:15 -06:00
|
|
|
assert_eq!(&s[0..], "abc");
|
2014-04-02 18:54:22 -05:00
|
|
|
s.push_str("ประเทศไทย中华Việt Nam");
|
2015-01-26 20:21:15 -06:00
|
|
|
assert_eq!(&s[0..], "abcประเทศไทย中华Việt Nam");
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2014-09-22 10:28:35 -05:00
|
|
|
fn test_push() {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut data = String::from_str("ประเทศไทย中");
|
2014-09-22 10:28:35 -05:00
|
|
|
data.push('华');
|
|
|
|
data.push('b'); // 1 byte
|
|
|
|
data.push('¢'); // 2 byte
|
|
|
|
data.push('€'); // 3 byte
|
|
|
|
data.push('𤭢'); // 4 byte
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(data, "ประเทศไทย中华b¢€𤭢");
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|
|
|
|
|
2014-05-08 15:42:40 -05:00
|
|
|
#[test]
|
2014-10-05 05:11:17 -05:00
|
|
|
fn test_pop() {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut data = String::from_str("ประเทศไทย中华b¢€𤭢");
|
2014-10-05 05:11:17 -05:00
|
|
|
assert_eq!(data.pop().unwrap(), '𤭢'); // 4 bytes
|
|
|
|
assert_eq!(data.pop().unwrap(), '€'); // 3 bytes
|
|
|
|
assert_eq!(data.pop().unwrap(), '¢'); // 2 bytes
|
|
|
|
assert_eq!(data.pop().unwrap(), 'b'); // 1 bytes
|
|
|
|
assert_eq!(data.pop().unwrap(), '华');
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(data, "ประเทศไทย中");
|
2014-05-08 15:42:40 -05:00
|
|
|
}
|
|
|
|
|
2014-04-02 18:54:22 -05:00
|
|
|
#[test]
|
|
|
|
fn test_str_truncate() {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut s = String::from_str("12345");
|
2014-04-02 18:54:22 -05:00
|
|
|
s.truncate(5);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "12345");
|
2014-04-02 18:54:22 -05:00
|
|
|
s.truncate(3);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "123");
|
2014-04-02 18:54:22 -05:00
|
|
|
s.truncate(0);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "");
|
2014-04-02 18:54:22 -05:00
|
|
|
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut s = String::from_str("12345");
|
2014-11-27 10:45:50 -06:00
|
|
|
let p = s.as_ptr();
|
2014-04-02 18:54:22 -05:00
|
|
|
s.truncate(3);
|
|
|
|
s.push_str("6");
|
2014-11-27 10:45:50 -06:00
|
|
|
let p_ = s.as_ptr();
|
2014-04-02 18:54:22 -05:00
|
|
|
assert_eq!(p_, p);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn test_str_truncate_invalid_len() {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut s = String::from_str("12345");
|
2014-04-02 18:54:22 -05:00
|
|
|
s.truncate(6);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn test_str_truncate_split_codepoint() {
|
2014-12-09 16:08:10 -06:00
|
|
|
let mut s = String::from_str("\u{FC}"); // ü
|
2014-04-02 18:54:22 -05:00
|
|
|
s.truncate(1);
|
|
|
|
}
|
2014-05-11 05:49:09 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_str_clear() {
|
2014-05-22 18:57:53 -05:00
|
|
|
let mut s = String::from_str("12345");
|
2014-05-11 05:49:09 -05:00
|
|
|
s.clear();
|
|
|
|
assert_eq!(s.len(), 0);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "");
|
2014-05-11 05:49:09 -05:00
|
|
|
}
|
2014-05-27 23:34:00 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_str_add() {
|
|
|
|
let a = String::from_str("12345");
|
|
|
|
let b = a + "2";
|
2014-12-01 17:02:39 -06:00
|
|
|
let b = b + "2";
|
2014-05-27 23:34:00 -05:00
|
|
|
assert_eq!(b.len(), 7);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(b, "1234522");
|
2014-05-27 23:34:00 -05:00
|
|
|
}
|
2014-07-10 11:21:16 -05:00
|
|
|
|
2014-09-22 10:24:14 -05:00
|
|
|
#[test]
|
|
|
|
fn remove() {
|
|
|
|
let mut s = "ศไทย中华Việt Nam; foobar".to_string();;
|
2014-12-28 12:29:56 -06:00
|
|
|
assert_eq!(s.remove(0), 'ศ');
|
2014-09-22 10:24:14 -05:00
|
|
|
assert_eq!(s.len(), 33);
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "ไทย中华Việt Nam; foobar");
|
2014-12-28 12:29:56 -06:00
|
|
|
assert_eq!(s.remove(17), 'ệ');
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "ไทย中华Vit Nam; foobar");
|
2014-09-22 10:24:14 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test] #[should_fail]
|
|
|
|
fn remove_bad() {
|
|
|
|
"ศ".to_string().remove(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn insert() {
|
|
|
|
let mut s = "foobar".to_string();
|
|
|
|
s.insert(0, 'ệ');
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "ệfoobar");
|
2014-09-22 10:24:14 -05:00
|
|
|
s.insert(6, 'ย');
|
2014-11-27 10:45:50 -06:00
|
|
|
assert_eq!(s, "ệfooยbar");
|
2014-09-22 10:24:14 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test] #[should_fail] fn insert_bad1() { "".to_string().insert(1, 't'); }
|
|
|
|
#[test] #[should_fail] fn insert_bad2() { "ệ".to_string().insert(1, 't'); }
|
|
|
|
|
2014-09-26 23:46:22 -05:00
|
|
|
#[test]
|
|
|
|
fn test_slicing() {
|
|
|
|
let s = "foobar".to_string();
|
2015-02-18 13:48:57 -06:00
|
|
|
assert_eq!("foobar", &s[..]);
|
2015-01-03 22:43:24 -06:00
|
|
|
assert_eq!("foo", &s[..3]);
|
|
|
|
assert_eq!("bar", &s[3..]);
|
|
|
|
assert_eq!("oob", &s[1..4]);
|
2014-09-26 23:46:22 -05:00
|
|
|
}
|
|
|
|
|
2014-11-15 19:38:03 -06:00
|
|
|
#[test]
|
|
|
|
fn test_simple_types() {
|
2015-01-25 15:05:03 -06:00
|
|
|
assert_eq!(1.to_string(), "1");
|
|
|
|
assert_eq!((-1).to_string(), "-1");
|
2015-02-04 20:17:19 -06:00
|
|
|
assert_eq!(200.to_string(), "200");
|
2014-11-27 18:09:59 -06:00
|
|
|
assert_eq!(2u8.to_string(), "2");
|
|
|
|
assert_eq!(true.to_string(), "true");
|
|
|
|
assert_eq!(false.to_string(), "false");
|
|
|
|
assert_eq!(("hi".to_string()).to_string(), "hi");
|
2014-11-15 19:38:03 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_vectors() {
|
2015-02-04 20:17:19 -06:00
|
|
|
let x: Vec<i32> = vec![];
|
2015-01-07 16:58:31 -06:00
|
|
|
assert_eq!(format!("{:?}", x), "[]");
|
2015-01-25 15:05:03 -06:00
|
|
|
assert_eq!(format!("{:?}", vec![1]), "[1]");
|
|
|
|
assert_eq!(format!("{:?}", vec![1, 2, 3]), "[1, 2, 3]");
|
|
|
|
assert!(format!("{:?}", vec![vec![], vec![1], vec![1, 1]]) ==
|
2015-01-20 17:45:07 -06:00
|
|
|
"[[], [1], [1, 1]]");
|
2014-11-15 19:38:03 -06:00
|
|
|
}
|
|
|
|
|
2014-12-07 14:43:11 -06:00
|
|
|
#[test]
|
|
|
|
fn test_from_iterator() {
|
|
|
|
let s = "ศไทย中华Việt Nam".to_string();
|
|
|
|
let t = "ศไทย中华";
|
|
|
|
let u = "Việt Nam";
|
|
|
|
|
|
|
|
let a: String = s.chars().collect();
|
2014-12-07 14:45:47 -06:00
|
|
|
assert_eq!(s, a);
|
2014-12-07 14:43:11 -06:00
|
|
|
|
|
|
|
let mut b = t.to_string();
|
|
|
|
b.extend(u.chars());
|
2014-12-07 14:45:47 -06:00
|
|
|
assert_eq!(s, b);
|
|
|
|
|
|
|
|
let c: String = vec![t, u].into_iter().collect();
|
|
|
|
assert_eq!(s, c);
|
|
|
|
|
|
|
|
let mut d = t.to_string();
|
|
|
|
d.extend(vec![u].into_iter());
|
|
|
|
assert_eq!(s, d);
|
2014-12-07 14:43:11 -06:00
|
|
|
}
|
|
|
|
|
2014-07-10 11:21:16 -05:00
|
|
|
#[bench]
|
|
|
|
fn bench_with_capacity(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
String::with_capacity(100)
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_push_str(b: &mut Bencher) {
|
|
|
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
|
|
b.iter(|| {
|
|
|
|
let mut r = String::new();
|
|
|
|
r.push_str(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2014-12-20 11:17:58 -06:00
|
|
|
const REPETITIONS: u64 = 10_000;
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_push_str_one_byte(b: &mut Bencher) {
|
|
|
|
b.bytes = REPETITIONS;
|
|
|
|
b.iter(|| {
|
|
|
|
let mut r = String::new();
|
2015-01-26 14:46:12 -06:00
|
|
|
for _ in 0..REPETITIONS {
|
2014-12-20 11:17:58 -06:00
|
|
|
r.push_str("a")
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_push_char_one_byte(b: &mut Bencher) {
|
|
|
|
b.bytes = REPETITIONS;
|
|
|
|
b.iter(|| {
|
|
|
|
let mut r = String::new();
|
2015-01-26 14:46:12 -06:00
|
|
|
for _ in 0..REPETITIONS {
|
2014-12-20 11:17:58 -06:00
|
|
|
r.push('a')
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_push_char_two_bytes(b: &mut Bencher) {
|
|
|
|
b.bytes = REPETITIONS * 2;
|
|
|
|
b.iter(|| {
|
|
|
|
let mut r = String::new();
|
2015-01-26 14:46:12 -06:00
|
|
|
for _ in 0..REPETITIONS {
|
2014-12-20 11:17:58 -06:00
|
|
|
r.push('â')
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2014-07-10 11:21:16 -05:00
|
|
|
#[bench]
|
|
|
|
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
|
|
|
|
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
|
|
|
|
Lorem ipsum dolor sit amet, consectetur. ";
|
|
|
|
|
|
|
|
assert_eq!(100, s.len());
|
|
|
|
b.iter(|| {
|
|
|
|
let _ = String::from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
|
2014-07-21 00:43:08 -05:00
|
|
|
let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
|
2014-07-10 11:21:16 -05:00
|
|
|
assert_eq!(100, s.len());
|
|
|
|
b.iter(|| {
|
|
|
|
let _ = String::from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn from_utf8_lossy_invalid(b: &mut Bencher) {
|
|
|
|
let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
|
|
|
|
b.iter(|| {
|
|
|
|
let _ = String::from_utf8_lossy(s);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
|
2015-01-02 01:53:35 -06:00
|
|
|
let s = repeat(0xf5u8).take(100).collect::<Vec<_>>();
|
2014-07-10 11:21:16 -05:00
|
|
|
b.iter(|| {
|
2015-02-01 20:53:25 -06:00
|
|
|
let _ = String::from_utf8_lossy(&s);
|
2014-07-10 11:21:16 -05:00
|
|
|
});
|
|
|
|
}
|
2015-01-19 12:21:58 -06:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_exact_size_shrink_to_fit(b: &mut Bencher) {
|
|
|
|
let s = "Hello there, the quick brown fox jumped over the lazy dog! \
|
|
|
|
Lorem ipsum dolor sit amet, consectetur. ";
|
|
|
|
// ensure our operation produces an exact-size string before we benchmark it
|
|
|
|
let mut r = String::with_capacity(s.len());
|
|
|
|
r.push_str(s);
|
|
|
|
assert_eq!(r.len(), r.capacity());
|
|
|
|
b.iter(|| {
|
|
|
|
let mut r = String::with_capacity(s.len());
|
|
|
|
r.push_str(s);
|
|
|
|
r.shrink_to_fit();
|
|
|
|
r
|
|
|
|
});
|
|
|
|
}
|
2014-04-02 18:54:22 -05:00
|
|
|
}
|