1900 lines
54 KiB
Rust
1900 lines
54 KiB
Rust
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||
// file at the top-level directory of this distribution and at
|
||
// http://rust-lang.org/COPYRIGHT.
|
||
//
|
||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||
// option. This file may not be copied, modified, or distributed
|
||
// except according to those terms.
|
||
|
||
//! A UTF-8 encoded, growable string.
|
||
//!
|
||
//! This module contains the [`String`] type, a trait for converting
|
||
//! [`ToString`]s, and several error types that may result from working with
|
||
//! [`String`]s.
|
||
//!
|
||
//! [`String`]: struct.String.html
|
||
//! [`ToString`]: trait.ToString.html
|
||
//!
|
||
//! # Examples
|
||
//!
|
||
//! There are multiple ways to create a new `String` from a string literal:
|
||
//!
|
||
//! ```rust
|
||
//! let s = "Hello".to_string();
|
||
//!
|
||
//! let s = String::from("world");
|
||
//! let s: String = "also this".into();
|
||
//! ```
|
||
//!
|
||
//! You can create a new `String` from an existing one by concatenating with
|
||
//! `+`:
|
||
//!
|
||
//! ```rust
|
||
//! let s = "Hello".to_string();
|
||
//!
|
||
//! let message = s + " world!";
|
||
//! ```
|
||
//!
|
||
//! If you have a vector of valid UTF-8 bytes, you can make a `String` out of
|
||
//! it. You can do the reverse too.
|
||
//!
|
||
//! ```rust
|
||
//! let sparkle_heart = vec![240, 159, 146, 150];
|
||
//!
|
||
//! // We know these bytes are valid, so we'll use `unwrap()`.
|
||
//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
|
||
//!
|
||
//! assert_eq!("💖", sparkle_heart);
|
||
//!
|
||
//! let bytes = sparkle_heart.into_bytes();
|
||
//!
|
||
//! assert_eq!(bytes, [240, 159, 146, 150]);
|
||
//! ```
|
||
|
||
#![stable(feature = "rust1", since = "1.0.0")]
|
||
|
||
use core::fmt;
|
||
use core::hash;
|
||
use core::iter::FromIterator;
|
||
use core::mem;
|
||
use core::ops::{self, Add, Index, IndexMut};
|
||
use core::ptr;
|
||
use core::str::pattern::Pattern;
|
||
use rustc_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
|
||
use rustc_unicode::str as unicode_str;
|
||
|
||
use borrow::{Cow, ToOwned};
|
||
use range::RangeArgument;
|
||
use str::{self, FromStr, Utf8Error, Chars};
|
||
use vec::Vec;
|
||
use boxed::Box;
|
||
|
||
/// A UTF-8 encoded, growable string.
|
||
///
|
||
/// The `String` type is the most common string type that has ownership over the
|
||
/// contents of the string. It has a close relationship with its borrowed
|
||
/// counterpart, the primitive [`str`].
|
||
///
|
||
/// [`str`]: ../../std/primitive.str.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// You can create a `String` from a literal string with `String::from`:
|
||
///
|
||
/// ```
|
||
/// let hello = String::from("Hello, world!");
|
||
/// ```
|
||
///
|
||
/// You can append a [`char`] to a `String` with the [`push()`] method, and
|
||
/// append a [`&str`] with the [`push_str()`] method:
|
||
///
|
||
/// ```
|
||
/// let mut hello = String::from("Hello, ");
|
||
///
|
||
/// hello.push('w');
|
||
/// hello.push_str("orld!");
|
||
/// ```
|
||
///
|
||
/// [`char`]: ../../std/primitive.char.html
|
||
/// [`push()`]: #method.push
|
||
/// [`push_str()`]: #method.push_str
|
||
///
|
||
/// If you have a vector of UTF-8 bytes, you can create a `String` from it with
|
||
/// the [`from_utf8()`] method:
|
||
///
|
||
/// ```
|
||
/// // some bytes, in a vector
|
||
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||
///
|
||
/// // We know these bytes are valid, so we'll use `unwrap()`.
|
||
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// [`from_utf8()`]: #method.from_utf8
|
||
///
|
||
/// # UTF-8
|
||
///
|
||
/// `String`s are always valid UTF-8. This has a few implications, the first of
|
||
/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
|
||
/// similar, but without the UTF-8 constraint. The second implication is that
|
||
/// you cannot index into a `String`:
|
||
///
|
||
/// ```ignore
|
||
/// let s = "hello";
|
||
///
|
||
/// println!("The first letter of s is {}", s[0]); // ERROR!!!
|
||
/// ```
|
||
///
|
||
/// [`OsString`]: ../../std/ffi/struct.OsString.html
|
||
///
|
||
/// Indexing is intended to be a constant-time operation, but UTF-8 encoding
|
||
/// does not allow us to do this. Furtheremore, it's not clear what sort of
|
||
/// thing the index should return: a byte, a codepoint, or a grapheme cluster.
|
||
/// The [`as_bytes()`] and [`chars()`] methods return iterators over the first
|
||
/// two, respectively.
|
||
///
|
||
/// [`as_bytes()`]: #method.as_bytes
|
||
/// [`chars()`]: #method.chars
|
||
///
|
||
/// # Deref
|
||
///
|
||
/// `String`s implement [`Deref`]`<Target=str>`, and so inherit all of [`str`]'s
|
||
/// methods. In addition, this means that you can pass a `String` to any
|
||
/// function which takes a [`&str`] by using an ampersand (`&`):
|
||
///
|
||
/// ```
|
||
/// fn takes_str(s: &str) { }
|
||
///
|
||
/// let s = String::from("Hello");
|
||
///
|
||
/// takes_str(&s);
|
||
/// ```
|
||
///
|
||
/// [`&str`]: ../../std/primitive.str.html
|
||
/// [`Deref`]: ../../std/ops/trait.Deref.html
|
||
///
|
||
/// This will create a [`&str`] from the `String` and pass it in. This
|
||
/// conversion is very inexpensive, and so generally, functions will accept
|
||
/// [`&str`]s as arguments unless they need a `String` for some specific reason.
|
||
///
|
||
///
|
||
/// # Representation
|
||
///
|
||
/// A `String` is made up of three components: a pointer to some bytes, a
|
||
/// length, and a capacity. The pointer points to an internal buffer `String`
|
||
/// uses to store its data. The length is the number of bytes currently stored
|
||
/// in the buffer, and the capacity is the size of the buffer in bytes. As such,
|
||
/// the length will always be less than or equal to the capacity.
|
||
///
|
||
/// This buffer is always stored on the heap.
|
||
///
|
||
/// You can look at these with the [`as_ptr()`], [`len()`], and [`capacity()`]
|
||
/// methods:
|
||
///
|
||
/// ```
|
||
/// use std::mem;
|
||
///
|
||
/// let story = String::from("Once upon a time...");
|
||
///
|
||
/// let ptr = story.as_ptr();
|
||
/// let len = story.len();
|
||
/// let capacity = story.capacity();
|
||
///
|
||
/// // story has nineteen bytes
|
||
/// assert_eq!(19, len);
|
||
///
|
||
/// // Now that we have our parts, we throw the story away.
|
||
/// mem::forget(story);
|
||
///
|
||
/// // We can re-build a String out of ptr, len, and capacity. This is all
|
||
/// // unsafe because we are responsible for making sure the components are
|
||
/// // valid:
|
||
/// let s = unsafe { String::from_raw_parts(ptr as *mut _, len, capacity) } ;
|
||
///
|
||
/// assert_eq!(String::from("Once upon a time..."), s);
|
||
/// ```
|
||
///
|
||
/// [`as_ptr()`]: #method.as_ptr
|
||
/// [`len()`]: #method.len
|
||
/// [`capacity()`]: #method.capacity
|
||
///
|
||
/// If a `String` has enough capacity, adding elements to it will not
|
||
/// re-allocate. For example, consider this program:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::new();
|
||
///
|
||
/// println!("{}", s.capacity());
|
||
///
|
||
/// for _ in 0..5 {
|
||
/// s.push_str("hello");
|
||
/// println!("{}", s.capacity());
|
||
/// }
|
||
/// ```
|
||
///
|
||
/// This will output the following:
|
||
///
|
||
/// ```text
|
||
/// 0
|
||
/// 5
|
||
/// 10
|
||
/// 20
|
||
/// 20
|
||
/// 40
|
||
/// ```
|
||
///
|
||
/// At first, we have no memory allocated at all, but as we append to the
|
||
/// string, it increases its capacity appropriately. If we instead use the
|
||
/// [`with_capacity()`] method to allocate the correct capacity initially:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::with_capacity(25);
|
||
///
|
||
/// println!("{}", s.capacity());
|
||
///
|
||
/// for _ in 0..5 {
|
||
/// s.push_str("hello");
|
||
/// println!("{}", s.capacity());
|
||
/// }
|
||
/// ```
|
||
///
|
||
/// [`with_capacity()`]: #method.with_capacity
|
||
///
|
||
/// We end up with a different output:
|
||
///
|
||
/// ```text
|
||
/// 25
|
||
/// 25
|
||
/// 25
|
||
/// 25
|
||
/// 25
|
||
/// 25
|
||
/// ```
|
||
///
|
||
/// Here, there's no need to allocate more memory inside the loop.
|
||
#[derive(PartialOrd, Eq, Ord)]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub struct String {
|
||
vec: Vec<u8>,
|
||
}
|
||
|
||
/// A possible error value when converting a `String` from a UTF-8 byte vector.
|
||
///
|
||
/// This type is the error type for the [`from_utf8()`] method on [`String`]. It
|
||
/// is designed in such a way to carefully avoid reallocations: the
|
||
/// [`into_bytes()`] method will give back the byte vector that was used in the
|
||
/// conversion attempt.
|
||
///
|
||
/// [`from_utf8()`]: struct.String.html#method.from_utf8
|
||
/// [`String`]: struct.String.html
|
||
/// [`into_bytes()`]: struct.FromUtf8Error.html#method.into_bytes
|
||
///
|
||
/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
|
||
/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
|
||
/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error`
|
||
/// through the [`utf8_error()`] method.
|
||
///
|
||
/// [`Utf8Error`]: ../../std/str/struct.Utf8Error.html
|
||
/// [`std::str`]: ../../std/str/index.html
|
||
/// [`u8`]: ../../std/primitive.u8.html
|
||
/// [`&str`]: ../../std/primitive.str.html
|
||
/// [`utf8_error()`]: #method.utf8_error
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // some invalid bytes, in a vector
|
||
/// let bytes = vec![0, 159];
|
||
///
|
||
/// let value = String::from_utf8(bytes);
|
||
///
|
||
/// assert!(value.is_err());
|
||
/// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes());
|
||
/// ```
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
#[derive(Debug)]
|
||
pub struct FromUtf8Error {
|
||
bytes: Vec<u8>,
|
||
error: Utf8Error,
|
||
}
|
||
|
||
/// A possible error value when converting a `String` from a UTF-16 byte slice.
|
||
///
|
||
/// This type is the error type for the [`from_utf16()`] method on [`String`].
|
||
///
|
||
/// [`from_utf16()`]: struct.String.html#method.from_utf16
|
||
/// [`String`]: struct.String.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // 𝄞mu<invalid>ic
|
||
/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
|
||
/// 0xD800, 0x0069, 0x0063];
|
||
///
|
||
/// assert!(String::from_utf16(v).is_err());
|
||
/// ```
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
#[derive(Debug)]
|
||
pub struct FromUtf16Error(());
|
||
|
||
impl String {
|
||
/// Creates a new empty `String`.
|
||
///
|
||
/// Given that the `String` is empty, this will not allocate any initial
|
||
/// buffer. While that means that this initial operation is very
|
||
/// inexpensive, but may cause excessive allocation later, when you add
|
||
/// data. If you have an idea of how much data the `String` will hold,
|
||
/// consider the [`with_capacity()`] method to prevent excessive
|
||
/// re-allocation.
|
||
///
|
||
/// [`with_capacity()`]: #method.with_capacity
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let s = String::new();
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn new() -> String {
|
||
String { vec: Vec::new() }
|
||
}
|
||
|
||
/// Creates a new empty `String` with a particular capacity.
|
||
///
|
||
/// `String`s have an internal buffer to hold their data. The capacity is
|
||
/// the length of that buffer, and can be queried with the [`capacity()`]
|
||
/// method. This method creates an empty `String`, but one with an initial
|
||
/// buffer that can hold `capacity` bytes. This is useful when you may be
|
||
/// appending a bunch of data to the `String`, reducing the number of
|
||
/// reallocations it needs to do.
|
||
///
|
||
/// [`capacity()`]: #method.capacity
|
||
///
|
||
/// If the given capacity is `0`, no allocation will occur, and this method
|
||
/// is identical to the [`new()`] method.
|
||
///
|
||
/// [`new()`]: #method.new
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::with_capacity(10);
|
||
///
|
||
/// // The String contains no chars, even though it has capacity for more
|
||
/// assert_eq!(s.len(), 0);
|
||
///
|
||
/// // These are all done without reallocating...
|
||
/// let cap = s.capacity();
|
||
/// for i in 0..10 {
|
||
/// s.push('a');
|
||
/// }
|
||
///
|
||
/// assert_eq!(s.capacity(), cap);
|
||
///
|
||
/// // ...but this may make the vector reallocate
|
||
/// s.push('a');
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn with_capacity(capacity: usize) -> String {
|
||
String { vec: Vec::with_capacity(capacity) }
|
||
}
|
||
|
||
// HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is
|
||
// required for this method definition, is not available. Since we don't
|
||
// require this method for testing purposes, I'll just stub it
|
||
// NB see the slice::hack module in slice.rs for more information
|
||
#[inline]
|
||
#[cfg(test)]
|
||
pub fn from_str(_: &str) -> String {
|
||
panic!("not available with cfg(test)");
|
||
}
|
||
|
||
/// Converts a vector of bytes to a `String`.
|
||
///
|
||
/// A string slice ([`&str`]) is made of bytes ([`u8`]), and a vector of bytes
|
||
/// ([`Vec<u8>`]) is made of bytes, so this function converts between the
|
||
/// two. Not all byte slices are valid `String`s, however: `String`
|
||
/// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
|
||
/// the bytes are valid UTF-8, and then does the conversion.
|
||
///
|
||
/// [`&str`]: ../../std/primitive.str.html
|
||
/// [`u8`]: ../../std/primitive.u8.html
|
||
/// [`Vec<u8>`]: ../../std/vec/struct.Vec.html
|
||
///
|
||
/// If you are sure that the byte slice is valid UTF-8, and you don't want
|
||
/// to incur the overhead of the validity check, there is an unsafe version
|
||
/// of this function, [`from_utf8_unchecked()`], which has the same behavior
|
||
/// but skips the check.
|
||
///
|
||
/// [`from_utf8_unchecked()`]: struct.String.html#method.from_utf8_unchecked
|
||
///
|
||
/// This method will take care to not copy the vector, for efficiency's
|
||
/// sake.
|
||
///
|
||
/// If you need a `&str` instead of a `String`, consider
|
||
/// [`str::from_utf8()`].
|
||
///
|
||
/// [`str::from_utf8()`]: ../../std/str/fn.from_utf8.html
|
||
///
|
||
/// # Errors
|
||
///
|
||
/// Returns `Err` if the slice is not UTF-8 with a description as to why the
|
||
/// provided bytes are not UTF-8. The vector you moved in is also included.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // some bytes, in a vector
|
||
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||
///
|
||
/// // We know these bytes are valid, so we'll use `unwrap()`.
|
||
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// Incorrect bytes:
|
||
///
|
||
/// ```
|
||
/// // some invalid bytes, in a vector
|
||
/// let sparkle_heart = vec![0, 159, 146, 150];
|
||
///
|
||
/// assert!(String::from_utf8(sparkle_heart).is_err());
|
||
/// ```
|
||
///
|
||
/// See the docs for [`FromUtf8Error`] for more details on what you can do
|
||
/// with this error.
|
||
///
|
||
/// [`FromUtf8Error`]: struct.FromUtf8Error.html
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
|
||
match str::from_utf8(&vec) {
|
||
Ok(..) => Ok(String { vec: vec }),
|
||
Err(e) => {
|
||
Err(FromUtf8Error {
|
||
bytes: vec,
|
||
error: e,
|
||
})
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Converts a slice of bytes to a string, including invalid characters.
|
||
///
|
||
/// Strings are made of bytes ([`u8`]), and a slice of bytes
|
||
/// ([`&[u8]`][byteslice]) is made of bytes, so this function converts
|
||
/// between the two. Not all byte slices are valid strings, however: strings
|
||
/// are required to be valid UTF-8. During this conversion,
|
||
/// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with
|
||
/// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: <20>
|
||
///
|
||
/// [`u8`]: ../../std/primitive.u8.html
|
||
/// [byteslice]: ../../std/primitive.slice.html
|
||
///
|
||
/// If you are sure that the byte slice is valid UTF-8, and you don't want
|
||
/// to incur the overhead of the conversion, there is an unsafe version
|
||
/// of this function, [`from_utf8_unchecked()`], which has the same behavior
|
||
/// but skips the checks.
|
||
///
|
||
/// [`from_utf8_unchecked()`]: struct.String.html#method.from_utf8_unchecked
|
||
///
|
||
/// This function returns a [`Cow<'a, str>`]. If our byte slice is invalid
|
||
/// UTF-8, then we need to insert the replacement characters, which will
|
||
/// change the size of the string, and hence, require a `String`. But if
|
||
/// it's already valid UTF-8, we don't need a new allocation. This return
|
||
/// type allows us to handle both cases.
|
||
///
|
||
/// [`Cow<'a, str>`]: ../../std/borrow/enum.Cow.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // some bytes, in a vector
|
||
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||
///
|
||
/// let sparkle_heart = String::from_utf8_lossy(&sparkle_heart);
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
///
|
||
/// Incorrect bytes:
|
||
///
|
||
/// ```
|
||
/// // some invalid bytes
|
||
/// let input = b"Hello \xF0\x90\x80World";
|
||
/// let output = String::from_utf8_lossy(input);
|
||
///
|
||
/// assert_eq!("Hello <20>World", output);
|
||
/// ```
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
|
||
let mut i;
|
||
match str::from_utf8(v) {
|
||
Ok(s) => return Cow::Borrowed(s),
|
||
Err(e) => i = e.valid_up_to(),
|
||
}
|
||
|
||
const TAG_CONT_U8: u8 = 128;
|
||
const REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
|
||
let total = v.len();
|
||
fn unsafe_get(xs: &[u8], i: usize) -> u8 {
|
||
unsafe { *xs.get_unchecked(i) }
|
||
}
|
||
fn safe_get(xs: &[u8], i: usize, total: usize) -> u8 {
|
||
if i >= total {
|
||
0
|
||
} else {
|
||
unsafe_get(xs, i)
|
||
}
|
||
}
|
||
|
||
let mut res = String::with_capacity(total);
|
||
|
||
if i > 0 {
|
||
unsafe { res.as_mut_vec().extend_from_slice(&v[..i]) };
|
||
}
|
||
|
||
// subseqidx is the index of the first byte of the subsequence we're
|
||
// looking at. It's used to copy a bunch of contiguous good codepoints
|
||
// at once instead of copying them one by one.
|
||
let mut subseqidx = i;
|
||
|
||
while i < total {
|
||
let i_ = i;
|
||
let byte = unsafe_get(v, i);
|
||
i += 1;
|
||
|
||
macro_rules! error { () => ({
|
||
unsafe {
|
||
if subseqidx != i_ {
|
||
res.as_mut_vec().extend_from_slice(&v[subseqidx..i_]);
|
||
}
|
||
subseqidx = i;
|
||
res.as_mut_vec().extend_from_slice(REPLACEMENT);
|
||
}
|
||
})}
|
||
|
||
if byte < 128 {
|
||
// subseqidx handles this
|
||
} else {
|
||
let w = unicode_str::utf8_char_width(byte);
|
||
|
||
match w {
|
||
2 => {
|
||
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
|
||
error!();
|
||
continue;
|
||
}
|
||
i += 1;
|
||
}
|
||
3 => {
|
||
match (byte, safe_get(v, i, total)) {
|
||
(0xE0, 0xA0...0xBF) => (),
|
||
(0xE1...0xEC, 0x80...0xBF) => (),
|
||
(0xED, 0x80...0x9F) => (),
|
||
(0xEE...0xEF, 0x80...0xBF) => (),
|
||
_ => {
|
||
error!();
|
||
continue;
|
||
}
|
||
}
|
||
i += 1;
|
||
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
|
||
error!();
|
||
continue;
|
||
}
|
||
i += 1;
|
||
}
|
||
4 => {
|
||
match (byte, safe_get(v, i, total)) {
|
||
(0xF0, 0x90...0xBF) => (),
|
||
(0xF1...0xF3, 0x80...0xBF) => (),
|
||
(0xF4, 0x80...0x8F) => (),
|
||
_ => {
|
||
error!();
|
||
continue;
|
||
}
|
||
}
|
||
i += 1;
|
||
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
|
||
error!();
|
||
continue;
|
||
}
|
||
i += 1;
|
||
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
|
||
error!();
|
||
continue;
|
||
}
|
||
i += 1;
|
||
}
|
||
_ => {
|
||
error!();
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if subseqidx < total {
|
||
unsafe { res.as_mut_vec().extend_from_slice(&v[subseqidx..total]) };
|
||
}
|
||
Cow::Owned(res)
|
||
}
|
||
|
||
/// Decode a UTF-16 encoded vector `v` into a `String`, returning `Err`
|
||
/// if `v` contains any invalid data.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // 𝄞music
|
||
/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
|
||
/// 0x0073, 0x0069, 0x0063];
|
||
/// assert_eq!(String::from("𝄞music"),
|
||
/// String::from_utf16(v).unwrap());
|
||
///
|
||
/// // 𝄞mu<invalid>ic
|
||
/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
|
||
/// 0xD800, 0x0069, 0x0063];
|
||
/// assert!(String::from_utf16(v).is_err());
|
||
/// ```
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> {
|
||
decode_utf16(v.iter().cloned()).collect::<Result<_, _>>().map_err(|_| FromUtf16Error(()))
|
||
}
|
||
|
||
/// Decode a UTF-16 encoded vector `v` into a string, replacing
|
||
/// invalid data with the replacement character (U+FFFD).
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // 𝄞mus<invalid>ic<invalid>
|
||
/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
|
||
/// 0x0073, 0xDD1E, 0x0069, 0x0063,
|
||
/// 0xD834];
|
||
///
|
||
/// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
|
||
/// String::from_utf16_lossy(v));
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn from_utf16_lossy(v: &[u16]) -> String {
|
||
decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect()
|
||
}
|
||
|
||
/// Creates a new `String` from a length, capacity, and pointer.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This is highly unsafe, due to the number of invariants that aren't
|
||
/// checked:
|
||
///
|
||
/// * The memory at `ptr` needs to have been previously allocated by the
|
||
/// same allocator the standard library uses.
|
||
/// * `length` needs to be less than or equal to `capacity`.
|
||
/// * `capacity` needs to be the correct value.
|
||
///
|
||
/// Violating these may cause problems like corrupting the allocator's
|
||
/// internal datastructures.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// use std::mem;
|
||
///
|
||
/// unsafe {
|
||
/// let s = String::from("hello");
|
||
/// let ptr = s.as_ptr();
|
||
/// let len = s.len();
|
||
/// let capacity = s.capacity();
|
||
///
|
||
/// mem::forget(s);
|
||
///
|
||
/// let s = String::from_raw_parts(ptr as *mut _, len, capacity);
|
||
///
|
||
/// assert_eq!(String::from("hello"), s);
|
||
/// }
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String {
|
||
String { vec: Vec::from_raw_parts(buf, length, capacity) }
|
||
}
|
||
|
||
/// Converts a vector of bytes to a `String` without checking that the
|
||
/// string contains valid UTF-8.
|
||
///
|
||
/// See the safe version, [`from_utf8()`], for more details.
|
||
///
|
||
/// [`from_utf8()`]: struct.String.html#method.from_utf8
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the bytes passed
|
||
/// to it are valid UTF-8. If this constraint is violated, it may cause
|
||
/// memory unsafety issues with future users of the `String`, as the rest of
|
||
/// the standard library assumes that `String`s are valid UTF-8.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // some bytes, in a vector
|
||
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||
///
|
||
/// let sparkle_heart = unsafe {
|
||
/// String::from_utf8_unchecked(sparkle_heart)
|
||
/// };
|
||
///
|
||
/// assert_eq!("💖", sparkle_heart);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
|
||
String { vec: bytes }
|
||
}
|
||
|
||
/// Converts a `String` into a byte vector.
|
||
///
|
||
/// This consumes the `String`, so we do not need to copy its contents.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let s = String::from("hello");
|
||
/// let bytes = s.into_bytes();
|
||
///
|
||
/// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn into_bytes(self) -> Vec<u8> {
|
||
self.vec
|
||
}
|
||
|
||
/// Extracts a string slice containing the entire string.
|
||
#[inline]
|
||
#[stable(feature = "string_as_str", since = "1.7.0")]
|
||
pub fn as_str(&self) -> &str {
|
||
self
|
||
}
|
||
|
||
/// Extracts a string slice containing the entire string.
|
||
#[inline]
|
||
#[stable(feature = "string_as_str", since = "1.7.0")]
|
||
pub fn as_mut_str(&mut self) -> &mut str {
|
||
self
|
||
}
|
||
|
||
/// Appends a given string slice onto the end of this `String`.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("foo");
|
||
///
|
||
/// s.push_str("bar");
|
||
///
|
||
/// assert_eq!("foobar", s);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn push_str(&mut self, string: &str) {
|
||
self.vec.extend_from_slice(string.as_bytes())
|
||
}
|
||
|
||
/// Returns this `String`'s capacity, in bytes.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let s = String::with_capacity(10);
|
||
///
|
||
/// assert!(s.capacity() >= 10);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn capacity(&self) -> usize {
|
||
self.vec.capacity()
|
||
}
|
||
|
||
/// Ensures that this `String`'s capacity is at least `additional` bytes
|
||
/// larger than its length.
|
||
///
|
||
/// The capacity may be increased by more than `additional` bytes if it
|
||
/// chooses, to prevent frequent reallocations.
|
||
///
|
||
/// If you do not want this "at least" behavior, see the [`reserve_exact()`]
|
||
/// method.
|
||
///
|
||
/// [`reserve_exact()`]: #method.reserve_exact
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if the new capacity overflows `usize`.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::new();
|
||
///
|
||
/// s.reserve(10);
|
||
///
|
||
/// assert!(s.capacity() >= 10);
|
||
/// ```
|
||
///
|
||
/// This may not actually increase the capacity:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::with_capacity(10);
|
||
/// s.push('a');
|
||
/// s.push('b');
|
||
///
|
||
/// // s now has a length of 2 and a capacity of 10
|
||
/// assert_eq!(2, s.len());
|
||
/// assert_eq!(10, s.capacity());
|
||
///
|
||
/// // Since we already have an extra 8 capacity, calling this...
|
||
/// s.reserve(8);
|
||
///
|
||
/// // ... doesn't actually increase.
|
||
/// assert_eq!(10, s.capacity());
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn reserve(&mut self, additional: usize) {
|
||
self.vec.reserve(additional)
|
||
}
|
||
|
||
/// Ensures that this `String`'s capacity is `additional` bytes
|
||
/// larger than its length.
|
||
///
|
||
/// Consider using the [`reserve()`] method unless you absolutely know
|
||
/// better than the allocator.
|
||
///
|
||
/// [`reserve()`]: #method.reserve
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if the new capacity overflows `usize`.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::new();
|
||
///
|
||
/// s.reserve_exact(10);
|
||
///
|
||
/// assert!(s.capacity() >= 10);
|
||
/// ```
|
||
///
|
||
/// This may not actually increase the capacity:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::with_capacity(10);
|
||
/// s.push('a');
|
||
/// s.push('b');
|
||
///
|
||
/// // s now has a length of 2 and a capacity of 10
|
||
/// assert_eq!(2, s.len());
|
||
/// assert_eq!(10, s.capacity());
|
||
///
|
||
/// // Since we already have an extra 8 capacity, calling this...
|
||
/// s.reserve_exact(8);
|
||
///
|
||
/// // ... doesn't actually increase.
|
||
/// assert_eq!(10, s.capacity());
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn reserve_exact(&mut self, additional: usize) {
|
||
self.vec.reserve_exact(additional)
|
||
}
|
||
|
||
/// Shrinks the capacity of this `String` to match its length.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("foo");
|
||
///
|
||
/// s.reserve(100);
|
||
/// assert!(s.capacity() >= 100);
|
||
///
|
||
/// s.shrink_to_fit();
|
||
/// assert_eq!(3, s.capacity());
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn shrink_to_fit(&mut self) {
|
||
self.vec.shrink_to_fit()
|
||
}
|
||
|
||
/// Appends the given `char` to the end of this `String`.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("abc");
|
||
///
|
||
/// s.push('1');
|
||
/// s.push('2');
|
||
/// s.push('3');
|
||
///
|
||
/// assert_eq!("abc123", s);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn push(&mut self, ch: char) {
|
||
match ch.len_utf8() {
|
||
1 => self.vec.push(ch as u8),
|
||
_ => self.vec.extend_from_slice(ch.encode_utf8().as_slice()),
|
||
}
|
||
}
|
||
|
||
/// Returns a byte slice of this `String`'s contents.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let s = String::from("hello");
|
||
///
|
||
/// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn as_bytes(&self) -> &[u8] {
|
||
&self.vec
|
||
}
|
||
|
||
/// Shortens this `String` to the specified length.
|
||
///
|
||
/// If `new_len` is greater than the string's current length, this has no
|
||
/// effect.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if `new_len` does not lie on a [`char`] boundary.
|
||
///
|
||
/// [`char`]: ../../std/primitive.char.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("hello");
|
||
///
|
||
/// s.truncate(2);
|
||
///
|
||
/// assert_eq!("he", s);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn truncate(&mut self, new_len: usize) {
|
||
if new_len <= self.len() {
|
||
assert!(self.is_char_boundary(new_len));
|
||
self.vec.truncate(new_len)
|
||
}
|
||
}
|
||
|
||
/// Removes the last character from the string buffer and returns it.
|
||
///
|
||
/// Returns `None` if this `String` is empty.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("foo");
|
||
///
|
||
/// assert_eq!(s.pop(), Some('o'));
|
||
/// assert_eq!(s.pop(), Some('o'));
|
||
/// assert_eq!(s.pop(), Some('f'));
|
||
///
|
||
/// assert_eq!(s.pop(), None);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn pop(&mut self) -> Option<char> {
|
||
let ch = match self.chars().rev().next() {
|
||
Some(ch) => ch,
|
||
None => return None,
|
||
};
|
||
let newlen = self.len() - ch.len_utf8();
|
||
unsafe {
|
||
self.vec.set_len(newlen);
|
||
}
|
||
Some(ch)
|
||
}
|
||
|
||
/// Removes a `char` from this `String` at a byte position and returns it.
|
||
///
|
||
/// This is an `O(n)` operation, as it requires copying every element in the
|
||
/// buffer.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if `idx` is larger than or equal to the `String`'s length,
|
||
/// or if it does not lie on a [`char`] boundary.
|
||
///
|
||
/// [`char`]: ../../std/primitive.char.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("foo");
|
||
///
|
||
/// assert_eq!(s.remove(0), 'f');
|
||
/// assert_eq!(s.remove(1), 'o');
|
||
/// assert_eq!(s.remove(0), 'o');
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn remove(&mut self, idx: usize) -> char {
|
||
let ch = match self[idx..].chars().next() {
|
||
Some(ch) => ch,
|
||
None => panic!("cannot remove a char from the end of a string"),
|
||
};
|
||
|
||
let next = idx + ch.len_utf8();
|
||
let len = self.len();
|
||
unsafe {
|
||
ptr::copy(self.vec.as_ptr().offset(next as isize),
|
||
self.vec.as_mut_ptr().offset(idx as isize),
|
||
len - next);
|
||
self.vec.set_len(len - (next - idx));
|
||
}
|
||
ch
|
||
}
|
||
|
||
/// Inserts a character into this `String` at a byte position.
|
||
///
|
||
/// This is an `O(n)` operation as it requires copying every element in the
|
||
/// buffer.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if `idx` is larger than the `String`'s length, or if it does not
|
||
/// lie on a [`char`] boundary.
|
||
///
|
||
/// [`char`]: ../../std/primitive.char.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::with_capacity(3);
|
||
///
|
||
/// s.insert(0, 'f');
|
||
/// s.insert(1, 'o');
|
||
/// s.insert(2, 'o');
|
||
///
|
||
/// assert_eq!("foo", s);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn insert(&mut self, idx: usize, ch: char) {
|
||
let len = self.len();
|
||
assert!(idx <= len);
|
||
assert!(self.is_char_boundary(idx));
|
||
let bits = ch.encode_utf8();
|
||
let bits = bits.as_slice();
|
||
let amt = bits.len();
|
||
self.vec.reserve(amt);
|
||
|
||
unsafe {
|
||
ptr::copy(self.vec.as_ptr().offset(idx as isize),
|
||
self.vec.as_mut_ptr().offset((idx + amt) as isize),
|
||
len - idx);
|
||
ptr::copy(bits.as_ptr(),
|
||
self.vec.as_mut_ptr().offset(idx as isize),
|
||
amt);
|
||
self.vec.set_len(len + amt);
|
||
}
|
||
}
|
||
|
||
/// Returns a mutable reference to the contents of this `String`.
|
||
///
|
||
/// # Safety
|
||
///
|
||
/// This function is unsafe because it does not check that the bytes passed
|
||
/// to it are valid UTF-8. If this constraint is violated, it may cause
|
||
/// memory unsafety issues with future users of the `String`, as the rest of
|
||
/// the standard library assumes that `String`s are valid UTF-8.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("hello");
|
||
///
|
||
/// unsafe {
|
||
/// let vec = s.as_mut_vec();
|
||
/// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]);
|
||
///
|
||
/// vec.reverse();
|
||
/// }
|
||
/// assert_eq!(s, "olleh");
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
|
||
&mut self.vec
|
||
}
|
||
|
||
/// Returns the length of this `String`, in bytes.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let a = String::from("foo");
|
||
///
|
||
/// assert_eq!(a.len(), 3);
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn len(&self) -> usize {
|
||
self.vec.len()
|
||
}
|
||
|
||
/// Returns `true` if this `String` has a length of zero.
|
||
///
|
||
/// Returns `false` otherwise.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut v = String::new();
|
||
/// assert!(v.is_empty());
|
||
///
|
||
/// v.push('a');
|
||
/// assert!(!v.is_empty());
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn is_empty(&self) -> bool {
|
||
self.len() == 0
|
||
}
|
||
|
||
/// Truncates this `String`, removing all contents.
|
||
///
|
||
/// While this means the `String` will have a length of zero, it does not
|
||
/// touch its capacity.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("foo");
|
||
///
|
||
/// s.clear();
|
||
///
|
||
/// assert!(s.is_empty());
|
||
/// assert_eq!(0, s.len());
|
||
/// assert_eq!(3, s.capacity());
|
||
/// ```
|
||
#[inline]
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn clear(&mut self) {
|
||
self.vec.clear()
|
||
}
|
||
|
||
/// Create a draining iterator that removes the specified range in the string
|
||
/// and yields the removed chars.
|
||
///
|
||
/// Note: The element range is removed even if the iterator is not
|
||
/// consumed until the end.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// Panics if the starting point or end point do not lie on a [`char`]
|
||
/// boundary, or if they're out of bounds.
|
||
///
|
||
/// [`char`]: ../../std/primitive.char.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let mut s = String::from("α is alpha, β is beta");
|
||
/// let beta_offset = s.find('β').unwrap_or(s.len());
|
||
///
|
||
/// // Remove the range up until the β from the string
|
||
/// let t: String = s.drain(..beta_offset).collect();
|
||
/// assert_eq!(t, "α is alpha, ");
|
||
/// assert_eq!(s, "β is beta");
|
||
///
|
||
/// // A full range clears the string
|
||
/// s.drain(..);
|
||
/// assert_eq!(s, "");
|
||
/// ```
|
||
#[stable(feature = "drain", since = "1.6.0")]
|
||
pub fn drain<R>(&mut self, range: R) -> Drain
|
||
where R: RangeArgument<usize>
|
||
{
|
||
// Memory safety
|
||
//
|
||
// The String version of Drain does not have the memory safety issues
|
||
// of the vector version. The data is just plain bytes.
|
||
// Because the range removal happens in Drop, if the Drain iterator is leaked,
|
||
// the removal will not happen.
|
||
let len = self.len();
|
||
let start = *range.start().unwrap_or(&0);
|
||
let end = *range.end().unwrap_or(&len);
|
||
|
||
// Take out two simultaneous borrows. The &mut String won't be accessed
|
||
// until iteration is over, in Drop.
|
||
let self_ptr = self as *mut _;
|
||
// slicing does the appropriate bounds checks
|
||
let chars_iter = self[start..end].chars();
|
||
|
||
Drain {
|
||
start: start,
|
||
end: end,
|
||
iter: chars_iter,
|
||
string: self_ptr,
|
||
}
|
||
}
|
||
|
||
/// Converts this `String` into a `Box<str>`.
|
||
///
|
||
/// This will drop any excess capacity.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let s = String::from("hello");
|
||
///
|
||
/// let b = s.into_boxed_str();
|
||
/// ```
|
||
#[stable(feature = "box_str", since = "1.4.0")]
|
||
pub fn into_boxed_str(self) -> Box<str> {
|
||
let slice = self.vec.into_boxed_slice();
|
||
unsafe { mem::transmute::<Box<[u8]>, Box<str>>(slice) }
|
||
}
|
||
}
|
||
|
||
impl FromUtf8Error {
|
||
/// Returns the bytes that were attempted to convert to a `String`.
|
||
///
|
||
/// This method is carefully constructed to avoid allocation. It will
|
||
/// consume the error, moving out the bytes, so that a copy of the bytes
|
||
/// does not need to be made.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // some invalid bytes, in a vector
|
||
/// let bytes = vec![0, 159];
|
||
///
|
||
/// let value = String::from_utf8(bytes);
|
||
///
|
||
/// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes());
|
||
/// ```
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn into_bytes(self) -> Vec<u8> {
|
||
self.bytes
|
||
}
|
||
|
||
/// Fetch a `Utf8Error` to get more details about the conversion failure.
|
||
///
|
||
/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
|
||
/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
|
||
/// an analogue to `FromUtf8Error`. See its documentation for more details
|
||
/// on using it.
|
||
///
|
||
/// [`Utf8Error`]: ../../std/str/struct.Utf8Error.html
|
||
/// [`std::str`]: ../../std/str/index.html
|
||
/// [`u8`]: ../../std/primitive.u8.html
|
||
/// [`&str`]: ../../std/primitive.str.html
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// // some invalid bytes, in a vector
|
||
/// let bytes = vec![0, 159];
|
||
///
|
||
/// let error = String::from_utf8(bytes).unwrap_err().utf8_error();
|
||
///
|
||
/// // the first byte is invalid here
|
||
/// assert_eq!(1, error.valid_up_to());
|
||
/// ```
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub fn utf8_error(&self) -> Utf8Error {
|
||
self.error
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl fmt::Display for FromUtf8Error {
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
fmt::Display::fmt(&self.error, f)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl fmt::Display for FromUtf16Error {
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl Clone for String {
|
||
fn clone(&self) -> Self {
|
||
String { vec: self.vec.clone() }
|
||
}
|
||
|
||
fn clone_from(&mut self, source: &Self) {
|
||
self.vec.clone_from(&source.vec);
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl FromIterator<char> for String {
|
||
fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> String {
|
||
let mut buf = String::new();
|
||
buf.extend(iter);
|
||
buf
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a> FromIterator<&'a str> for String {
|
||
fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> String {
|
||
let mut buf = String::new();
|
||
buf.extend(iter);
|
||
buf
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "extend_string", since = "1.4.0")]
|
||
impl FromIterator<String> for String {
|
||
fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> String {
|
||
let mut buf = String::new();
|
||
buf.extend(iter);
|
||
buf
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl Extend<char> for String {
|
||
fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
|
||
let iterator = iter.into_iter();
|
||
let (lower_bound, _) = iterator.size_hint();
|
||
self.reserve(lower_bound);
|
||
for ch in iterator {
|
||
self.push(ch)
|
||
}
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "extend_ref", since = "1.2.0")]
|
||
impl<'a> Extend<&'a char> for String {
|
||
fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) {
|
||
self.extend(iter.into_iter().cloned());
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a> Extend<&'a str> for String {
|
||
fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
|
||
for s in iter {
|
||
self.push_str(s)
|
||
}
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "extend_string", since = "1.4.0")]
|
||
impl Extend<String> for String {
|
||
fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) {
|
||
for s in iter {
|
||
self.push_str(&s)
|
||
}
|
||
}
|
||
}
|
||
|
||
/// A convenience impl that delegates to the impl for `&str`
|
||
#[unstable(feature = "pattern",
|
||
reason = "API not fully fleshed out and ready to be stabilized",
|
||
issue = "27721")]
|
||
impl<'a, 'b> Pattern<'a> for &'b String {
|
||
type Searcher = <&'b str as Pattern<'a>>::Searcher;
|
||
|
||
fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<'a>>::Searcher {
|
||
self[..].into_searcher(haystack)
|
||
}
|
||
|
||
#[inline]
|
||
fn is_contained_in(self, haystack: &'a str) -> bool {
|
||
self[..].is_contained_in(haystack)
|
||
}
|
||
|
||
#[inline]
|
||
fn is_prefix_of(self, haystack: &'a str) -> bool {
|
||
self[..].is_prefix_of(haystack)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl PartialEq for String {
|
||
#[inline]
|
||
fn eq(&self, other: &String) -> bool {
|
||
PartialEq::eq(&self[..], &other[..])
|
||
}
|
||
#[inline]
|
||
fn ne(&self, other: &String) -> bool {
|
||
PartialEq::ne(&self[..], &other[..])
|
||
}
|
||
}
|
||
|
||
macro_rules! impl_eq {
|
||
($lhs:ty, $rhs: ty) => {
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a, 'b> PartialEq<$rhs> for $lhs {
|
||
#[inline]
|
||
fn eq(&self, other: &$rhs) -> bool { PartialEq::eq(&self[..], &other[..]) }
|
||
#[inline]
|
||
fn ne(&self, other: &$rhs) -> bool { PartialEq::ne(&self[..], &other[..]) }
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a, 'b> PartialEq<$lhs> for $rhs {
|
||
#[inline]
|
||
fn eq(&self, other: &$lhs) -> bool { PartialEq::eq(&self[..], &other[..]) }
|
||
#[inline]
|
||
fn ne(&self, other: &$lhs) -> bool { PartialEq::ne(&self[..], &other[..]) }
|
||
}
|
||
|
||
}
|
||
}
|
||
|
||
impl_eq! { String, str }
|
||
impl_eq! { String, &'a str }
|
||
impl_eq! { Cow<'a, str>, str }
|
||
impl_eq! { Cow<'a, str>, &'b str }
|
||
impl_eq! { Cow<'a, str>, String }
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl Default for String {
|
||
#[inline]
|
||
fn default() -> String {
|
||
String::new()
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl fmt::Display for String {
|
||
#[inline]
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
fmt::Display::fmt(&**self, f)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl fmt::Debug for String {
|
||
#[inline]
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
fmt::Debug::fmt(&**self, f)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl hash::Hash for String {
|
||
#[inline]
|
||
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
|
||
(**self).hash(hasher)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a> Add<&'a str> for String {
|
||
type Output = String;
|
||
|
||
#[inline]
|
||
fn add(mut self, other: &str) -> String {
|
||
self.push_str(other);
|
||
self
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl ops::Index<ops::Range<usize>> for String {
|
||
type Output = str;
|
||
|
||
#[inline]
|
||
fn index(&self, index: ops::Range<usize>) -> &str {
|
||
&self[..][index]
|
||
}
|
||
}
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl ops::Index<ops::RangeTo<usize>> for String {
|
||
type Output = str;
|
||
|
||
#[inline]
|
||
fn index(&self, index: ops::RangeTo<usize>) -> &str {
|
||
&self[..][index]
|
||
}
|
||
}
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl ops::Index<ops::RangeFrom<usize>> for String {
|
||
type Output = str;
|
||
|
||
#[inline]
|
||
fn index(&self, index: ops::RangeFrom<usize>) -> &str {
|
||
&self[..][index]
|
||
}
|
||
}
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl ops::Index<ops::RangeFull> for String {
|
||
type Output = str;
|
||
|
||
#[inline]
|
||
fn index(&self, _index: ops::RangeFull) -> &str {
|
||
unsafe { str::from_utf8_unchecked(&self.vec) }
|
||
}
|
||
}
|
||
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
|
||
impl ops::Index<ops::RangeInclusive<usize>> for String {
|
||
type Output = str;
|
||
|
||
#[inline]
|
||
fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
|
||
Index::index(&**self, index)
|
||
}
|
||
}
|
||
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
|
||
impl ops::Index<ops::RangeToInclusive<usize>> for String {
|
||
type Output = str;
|
||
|
||
#[inline]
|
||
fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
|
||
Index::index(&**self, index)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "derefmut_for_string", since = "1.2.0")]
|
||
impl ops::IndexMut<ops::Range<usize>> for String {
|
||
#[inline]
|
||
fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
|
||
&mut self[..][index]
|
||
}
|
||
}
|
||
#[stable(feature = "derefmut_for_string", since = "1.2.0")]
|
||
impl ops::IndexMut<ops::RangeTo<usize>> for String {
|
||
#[inline]
|
||
fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
|
||
&mut self[..][index]
|
||
}
|
||
}
|
||
#[stable(feature = "derefmut_for_string", since = "1.2.0")]
|
||
impl ops::IndexMut<ops::RangeFrom<usize>> for String {
|
||
#[inline]
|
||
fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
|
||
&mut self[..][index]
|
||
}
|
||
}
|
||
#[stable(feature = "derefmut_for_string", since = "1.2.0")]
|
||
impl ops::IndexMut<ops::RangeFull> for String {
|
||
#[inline]
|
||
fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
|
||
unsafe { mem::transmute(&mut *self.vec) }
|
||
}
|
||
}
|
||
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
|
||
impl ops::IndexMut<ops::RangeInclusive<usize>> for String {
|
||
#[inline]
|
||
fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
|
||
IndexMut::index_mut(&mut **self, index)
|
||
}
|
||
}
|
||
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
|
||
impl ops::IndexMut<ops::RangeToInclusive<usize>> for String {
|
||
#[inline]
|
||
fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
|
||
IndexMut::index_mut(&mut **self, index)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl ops::Deref for String {
|
||
type Target = str;
|
||
|
||
#[inline]
|
||
fn deref(&self) -> &str {
|
||
unsafe { str::from_utf8_unchecked(&self.vec) }
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "derefmut_for_string", since = "1.2.0")]
|
||
impl ops::DerefMut for String {
|
||
#[inline]
|
||
fn deref_mut(&mut self) -> &mut str {
|
||
unsafe { mem::transmute(&mut *self.vec) }
|
||
}
|
||
}
|
||
|
||
/// An error when parsing a `String`.
|
||
///
|
||
/// This `enum` is slightly awkward: it will never actually exist. This error is
|
||
/// part of the type signature of the implementation of [`FromStr`] on
|
||
/// [`String`]. The return type of [`from_str()`], requires that an error be
|
||
/// defined, but, given that a [`String`] can always be made into a new
|
||
/// [`String`] without error, this type will never actually be returned. As
|
||
/// such, it is only here to satisfy said signature, and is useless otherwise.
|
||
///
|
||
/// [`FromStr`]: ../../std/str/trait.FromStr.html
|
||
/// [`String`]: struct.String.html
|
||
/// [`from_str()`]: ../../std/str/trait.FromStr.html#tymethod.from_str
|
||
#[stable(feature = "str_parse_error", since = "1.5.0")]
|
||
#[derive(Copy)]
|
||
pub enum ParseError {}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl FromStr for String {
|
||
type Err = ParseError;
|
||
#[inline]
|
||
fn from_str(s: &str) -> Result<String, ParseError> {
|
||
Ok(String::from(s))
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "str_parse_error", since = "1.5.0")]
|
||
impl Clone for ParseError {
|
||
fn clone(&self) -> ParseError {
|
||
match *self {}
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "str_parse_error", since = "1.5.0")]
|
||
impl fmt::Debug for ParseError {
|
||
fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
|
||
match *self {}
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "str_parse_error2", since = "1.8.0")]
|
||
impl fmt::Display for ParseError {
|
||
fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
|
||
match *self {}
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "str_parse_error", since = "1.5.0")]
|
||
impl PartialEq for ParseError {
|
||
fn eq(&self, _: &ParseError) -> bool {
|
||
match *self {}
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "str_parse_error", since = "1.5.0")]
|
||
impl Eq for ParseError {}
|
||
|
||
/// A trait for converting a value to a `String`.
|
||
///
|
||
/// This trait is automatically implemented for any type which implements the
|
||
/// [`Display`] trait. As such, `ToString` shouldn't be implemented directly:
|
||
/// [`Display`] should be implemented instead, and you get the `ToString`
|
||
/// implementation for free.
|
||
///
|
||
/// [`Display`]: ../../std/fmt/trait.Display.html
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
pub trait ToString {
|
||
/// Converts the given value to a `String`.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// Basic usage:
|
||
///
|
||
/// ```
|
||
/// let i = 5;
|
||
/// let five = String::from("5");
|
||
///
|
||
/// assert_eq!(five, i.to_string());
|
||
/// ```
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
fn to_string(&self) -> String;
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<T: fmt::Display + ?Sized> ToString for T {
|
||
#[inline]
|
||
default fn to_string(&self) -> String {
|
||
use core::fmt::Write;
|
||
let mut buf = String::new();
|
||
let _ = buf.write_fmt(format_args!("{}", self));
|
||
buf.shrink_to_fit();
|
||
buf
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "str_to_string_specialization", since = "1.9.0")]
|
||
impl ToString for str {
|
||
#[inline]
|
||
fn to_string(&self) -> String {
|
||
String::from(self)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl AsRef<str> for String {
|
||
#[inline]
|
||
fn as_ref(&self) -> &str {
|
||
self
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl AsRef<[u8]> for String {
|
||
#[inline]
|
||
fn as_ref(&self) -> &[u8] {
|
||
self.as_bytes()
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a> From<&'a str> for String {
|
||
fn from(s: &'a str) -> String {
|
||
s.to_owned()
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a> From<&'a str> for Cow<'a, str> {
|
||
#[inline]
|
||
fn from(s: &'a str) -> Cow<'a, str> {
|
||
Cow::Borrowed(s)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl<'a> From<String> for Cow<'a, str> {
|
||
#[inline]
|
||
fn from(s: String) -> Cow<'a, str> {
|
||
Cow::Owned(s)
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl Into<Vec<u8>> for String {
|
||
fn into(self) -> Vec<u8> {
|
||
self.into_bytes()
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "rust1", since = "1.0.0")]
|
||
impl fmt::Write for String {
|
||
#[inline]
|
||
fn write_str(&mut self, s: &str) -> fmt::Result {
|
||
self.push_str(s);
|
||
Ok(())
|
||
}
|
||
|
||
#[inline]
|
||
fn write_char(&mut self, c: char) -> fmt::Result {
|
||
self.push(c);
|
||
Ok(())
|
||
}
|
||
}
|
||
|
||
/// A draining iterator for `String`.
|
||
///
|
||
/// This struct is created by the [`drain()`] method on [`String`]. See its
|
||
/// documentation for more.
|
||
///
|
||
/// [`drain()`]: struct.String.html#method.drain
|
||
/// [`String`]: struct.String.html
|
||
#[stable(feature = "drain", since = "1.6.0")]
|
||
pub struct Drain<'a> {
|
||
/// Will be used as &'a mut String in the destructor
|
||
string: *mut String,
|
||
/// Start of part to remove
|
||
start: usize,
|
||
/// End of part to remove
|
||
end: usize,
|
||
/// Current remaining range to remove
|
||
iter: Chars<'a>,
|
||
}
|
||
|
||
#[stable(feature = "drain", since = "1.6.0")]
|
||
unsafe impl<'a> Sync for Drain<'a> {}
|
||
#[stable(feature = "drain", since = "1.6.0")]
|
||
unsafe impl<'a> Send for Drain<'a> {}
|
||
|
||
#[stable(feature = "drain", since = "1.6.0")]
|
||
impl<'a> Drop for Drain<'a> {
|
||
fn drop(&mut self) {
|
||
unsafe {
|
||
// Use Vec::drain. "Reaffirm" the bounds checks to avoid
|
||
// panic code being inserted again.
|
||
let self_vec = (*self.string).as_mut_vec();
|
||
if self.start <= self.end && self.end <= self_vec.len() {
|
||
self_vec.drain(self.start..self.end);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "drain", since = "1.6.0")]
|
||
impl<'a> Iterator for Drain<'a> {
|
||
type Item = char;
|
||
|
||
#[inline]
|
||
fn next(&mut self) -> Option<char> {
|
||
self.iter.next()
|
||
}
|
||
|
||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||
self.iter.size_hint()
|
||
}
|
||
}
|
||
|
||
#[stable(feature = "drain", since = "1.6.0")]
|
||
impl<'a> DoubleEndedIterator for Drain<'a> {
|
||
#[inline]
|
||
fn next_back(&mut self) -> Option<char> {
|
||
self.iter.next_back()
|
||
}
|
||
}
|