Rollup merge of #28812 - steveklabnik:improve_str_from_utf8_docs, r=brson
Our docs were very basic for the various versions of from_utf8, so this commit beefs them up. It also improves docs for the &str variant's error, Utf8Error.
This commit is contained in:
commit
c3c5de1b98
@ -92,26 +92,61 @@ pub fn from_str(_: &str) -> String {
|
|||||||
panic!("not available with cfg(test)");
|
panic!("not available with cfg(test)");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the vector as a string buffer, if possible, taking care not to
|
/// Converts a vector of bytes to a `String`.
|
||||||
/// copy it.
|
///
|
||||||
|
/// A string slice (`&str`) is made of bytes (`u8`), and a vector of bytes
|
||||||
|
/// (`Vec<u8>`) is made of bytes, so this function converts between the
|
||||||
|
/// two. Not all byte slices are valid `String`s, however: `String`
|
||||||
|
/// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
|
||||||
|
/// the bytes are valid UTF-8, and then does the conversion.
|
||||||
|
///
|
||||||
|
/// If you are sure that the byte slice is valid UTF-8, and you don't want
|
||||||
|
/// to incur the overhead of the validity check, there is an unsafe version
|
||||||
|
/// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
|
||||||
|
/// same behavior but skips the check.
|
||||||
|
///
|
||||||
|
/// [fromutf8]: struct.String.html#method.from_utf8_unchecked
|
||||||
|
///
|
||||||
|
/// This method will take care to not copy the vector, for efficiency's
|
||||||
|
/// sake.
|
||||||
|
///
|
||||||
|
/// If you need a `&str` instead of a `String`, consider
|
||||||
|
/// [`str::from_utf8()`][str].
|
||||||
|
///
|
||||||
|
/// [str]: ../str/fn.from_utf8.html
|
||||||
///
|
///
|
||||||
/// # Failure
|
/// # Failure
|
||||||
///
|
///
|
||||||
/// If the given vector is not valid UTF-8, then the original vector and the
|
/// Returns `Err` if the slice is not UTF-8 with a description as to why the
|
||||||
/// corresponding error is returned.
|
/// provided bytes are not UTF-8. The vector you moved in is also included.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// ```
|
/// Basic usage:
|
||||||
/// let hello_vec = vec![104, 101, 108, 108, 111];
|
|
||||||
/// let s = String::from_utf8(hello_vec).unwrap();
|
|
||||||
/// assert_eq!(s, "hello");
|
|
||||||
///
|
///
|
||||||
/// let invalid_vec = vec![240, 144, 128];
|
|
||||||
/// let s = String::from_utf8(invalid_vec).err().unwrap();
|
|
||||||
/// let err = s.utf8_error();
|
|
||||||
/// assert_eq!(s.into_bytes(), [240, 144, 128]);
|
|
||||||
/// ```
|
/// ```
|
||||||
|
/// // some bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// // We know these bytes are valid, so just use `unwrap()`.
|
||||||
|
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
|
||||||
|
///
|
||||||
|
/// assert_eq!("💖", sparkle_heart);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Incorrect bytes:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// // some invalid bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![0, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// assert!(String::from_utf8(sparkle_heart).is_err());
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// See the docs for [`FromUtf8Error`][error] for more details on what you
|
||||||
|
/// can do with this error.
|
||||||
|
///
|
||||||
|
/// [error]: struct.FromUtf8Error.html
|
||||||
#[inline]
|
#[inline]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
|
pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
|
||||||
@ -121,15 +156,49 @@ pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a vector of bytes to a new UTF-8 string.
|
/// Converts a slice of bytes to a `String`, including invalid characters.
|
||||||
/// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
|
///
|
||||||
|
/// A string slice (`&str`) is made of bytes (`u8`), and a slice of bytes
|
||||||
|
/// (`&[u8]`) is made of bytes, so this function converts between the two.
|
||||||
|
/// Not all byte slices are valid string slices, however: `&str` requires
|
||||||
|
/// that it is valid UTF-8. During this conversion, `from_utf8_lossy()`
|
||||||
|
/// will replace any invalid UTF-8 sequences with
|
||||||
|
/// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: <20>
|
||||||
|
///
|
||||||
|
/// If you are sure that the byte slice is valid UTF-8, and you don't want
|
||||||
|
/// to incur the overhead of the conversion, there is an unsafe version
|
||||||
|
/// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
|
||||||
|
/// same behavior but skips the checks.
|
||||||
|
///
|
||||||
|
/// [fromutf8]: struct.String.html#method.from_utf8_unchecked
|
||||||
|
///
|
||||||
|
/// If you need a `&str` instead of a `String`, consider
|
||||||
|
/// [`str::from_utf8()`][str].
|
||||||
|
///
|
||||||
|
/// [str]: ../str/fn.from_utf8.html
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
/// ```
|
/// ```
|
||||||
|
/// // some bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// // We know these bytes are valid, so just use `unwrap()`.
|
||||||
|
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
|
||||||
|
///
|
||||||
|
/// assert_eq!("💖", sparkle_heart);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Incorrect bytes:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// // some invalid bytes
|
||||||
/// let input = b"Hello \xF0\x90\x80World";
|
/// let input = b"Hello \xF0\x90\x80World";
|
||||||
/// let output = String::from_utf8_lossy(input);
|
/// let output = String::from_utf8_lossy(input);
|
||||||
/// assert_eq!(output, "Hello \u{FFFD}World");
|
///
|
||||||
|
/// assert_eq!("Hello <20>World", output);
|
||||||
/// ```
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
|
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
|
||||||
@ -309,9 +378,33 @@ pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> St
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a vector of bytes to a new `String` without checking if
|
/// Converts a vector of bytes to a `String` without checking that the
|
||||||
/// it contains valid UTF-8. This is unsafe because it assumes that
|
/// string contains valid UTF-8.
|
||||||
/// the UTF-8-ness of the vector has already been validated.
|
///
|
||||||
|
/// See the safe version, [`from_utrf8()`][fromutf8], for more.
|
||||||
|
///
|
||||||
|
/// [fromutf8]: struct.String.html#method.from_utf8
|
||||||
|
///
|
||||||
|
/// # Unsafety
|
||||||
|
///
|
||||||
|
/// This function is unsafe because it does not check that the bytes passed to
|
||||||
|
/// it are valid UTF-8. If this constraint is violated, undefined behavior
|
||||||
|
/// results, as the rest of Rust assumes that `String`s are valid UTF-8.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// // some bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// let sparkle_heart = unsafe {
|
||||||
|
/// String::from_utf8_unchecked(sparkle_heart)
|
||||||
|
/// };
|
||||||
|
///
|
||||||
|
/// assert_eq!("💖", sparkle_heart);
|
||||||
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
|
pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
|
||||||
|
@ -119,7 +119,11 @@ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|||||||
Section: Creating a string
|
Section: Creating a string
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/// Errors which can occur when attempting to interpret a byte slice as a `str`.
|
/// Errors which can occur when attempting to interpret a sequence of `u8`
|
||||||
|
/// as a string.
|
||||||
|
///
|
||||||
|
/// As such, the `from_utf8` family of functions and methods for both `String`s
|
||||||
|
/// and `&str`s make use of this error, for example.
|
||||||
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
|
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub struct Utf8Error {
|
pub struct Utf8Error {
|
||||||
@ -132,21 +136,104 @@ impl Utf8Error {
|
|||||||
///
|
///
|
||||||
/// It is the maximum index such that `from_utf8(input[..index])`
|
/// It is the maximum index such that `from_utf8(input[..index])`
|
||||||
/// would return `Some(_)`.
|
/// would return `Some(_)`.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// #![feature(utf8_error)]
|
||||||
|
///
|
||||||
|
/// use std::str;
|
||||||
|
///
|
||||||
|
/// // some invalid bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![0, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// // std::str::from_utf8 returns a Utf8Error
|
||||||
|
/// let error = str::from_utf8(&sparkle_heart).unwrap_err();
|
||||||
|
///
|
||||||
|
/// // the first byte is invalid here
|
||||||
|
/// assert_eq!(1, error.valid_up_to());
|
||||||
|
/// ```
|
||||||
#[unstable(feature = "utf8_error", reason = "method just added",
|
#[unstable(feature = "utf8_error", reason = "method just added",
|
||||||
issue = "27734")]
|
issue = "27734")]
|
||||||
pub fn valid_up_to(&self) -> usize { self.valid_up_to }
|
pub fn valid_up_to(&self) -> usize { self.valid_up_to }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a slice of bytes to a string slice without performing any
|
/// Converts a slice of bytes to a string slice.
|
||||||
/// allocations.
|
|
||||||
///
|
///
|
||||||
/// Once the slice has been validated as UTF-8, it is transmuted in-place and
|
/// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
|
||||||
/// returned as a '&str' instead of a '&[u8]'
|
/// is made of bytes, so this function converts between the two. Not all byte
|
||||||
|
/// slices are valid string slices, however: `&str` requires that it is valid
|
||||||
|
/// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
|
||||||
|
/// then does the conversion.
|
||||||
|
///
|
||||||
|
/// If you are sure that the byte slice is valid UTF-8, and you don't want to
|
||||||
|
/// incur the overhead of the validity check, there is an unsafe version of
|
||||||
|
/// this function, [`from_utf8_unchecked()`][fromutf8], which has the same
|
||||||
|
/// behavior but skips the check.
|
||||||
|
///
|
||||||
|
/// [fromutf8]: fn.from_utf8.html
|
||||||
|
///
|
||||||
|
/// If you need a `String` instead of a `&str`, consider
|
||||||
|
/// [`String::from_utf8()`][string].
|
||||||
|
///
|
||||||
|
/// [string]: ../string/struct.String.html#method.from_utf8
|
||||||
|
///
|
||||||
|
/// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
|
||||||
|
/// it, this function is one way to have a stack-allocated string. There is
|
||||||
|
/// an example of this in the examples section below.
|
||||||
///
|
///
|
||||||
/// # Failure
|
/// # Failure
|
||||||
///
|
///
|
||||||
/// Returns `Err` if the slice is not UTF-8 with a description as to why the
|
/// Returns `Err` if the slice is not UTF-8 with a description as to why the
|
||||||
/// provided slice is not UTF-8.
|
/// provided slice is not UTF-8.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use std::str;
|
||||||
|
///
|
||||||
|
/// // some bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// // We know these bytes are valid, so just use `unwrap()`.
|
||||||
|
/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
|
||||||
|
///
|
||||||
|
/// assert_eq!("💖", sparkle_heart);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Incorrect bytes:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use std::str;
|
||||||
|
///
|
||||||
|
/// // some invalid bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![0, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// assert!(str::from_utf8(&sparkle_heart).is_err());
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// See the docs for [`Utf8Error`][error] for more details on the kinds of
|
||||||
|
/// errors that can be returned.
|
||||||
|
///
|
||||||
|
/// [error]: struct.Utf8Error.html
|
||||||
|
///
|
||||||
|
/// A "stack allocated string":
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use std::str;
|
||||||
|
///
|
||||||
|
/// // some bytes, in a stack-allocated array
|
||||||
|
/// let sparkle_heart = [240, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// // We know these bytes are valid, so just use `unwrap()`.
|
||||||
|
/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
|
||||||
|
///
|
||||||
|
/// assert_eq!("💖", sparkle_heart);
|
||||||
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||||
try!(run_utf8_validation_iterator(&mut v.iter()));
|
try!(run_utf8_validation_iterator(&mut v.iter()));
|
||||||
@ -155,6 +242,33 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
|||||||
|
|
||||||
/// Converts a slice of bytes to a string slice without checking
|
/// Converts a slice of bytes to a string slice without checking
|
||||||
/// that the string contains valid UTF-8.
|
/// that the string contains valid UTF-8.
|
||||||
|
///
|
||||||
|
/// See the safe version, [`from_utrf8()`][fromutf8], for more.
|
||||||
|
///
|
||||||
|
/// [fromutf8]: fn.from_utf8.html
|
||||||
|
///
|
||||||
|
/// # Unsafety
|
||||||
|
///
|
||||||
|
/// This function is unsafe because it does not check that the bytes passed to
|
||||||
|
/// it are valid UTF-8. If this constraint is violated, undefined behavior
|
||||||
|
/// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use std::str;
|
||||||
|
///
|
||||||
|
/// // some bytes, in a vector
|
||||||
|
/// let sparkle_heart = vec![240, 159, 146, 150];
|
||||||
|
///
|
||||||
|
/// let sparkle_heart = unsafe {
|
||||||
|
/// str::from_utf8_unchecked(&sparkle_heart)
|
||||||
|
/// };
|
||||||
|
///
|
||||||
|
/// assert_eq!("💖", sparkle_heart);
|
||||||
|
/// ```
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
|
pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
|
||||||
|
Loading…
Reference in New Issue
Block a user