From 072dd6fabdacdbcd672c367af8ea3e721ea15d10 Mon Sep 17 00:00:00 2001 From: Steve Klabnik Date: Tue, 1 Dec 2015 16:07:53 -0500 Subject: [PATCH] Improve documentation on String's methods Part of #29376 --- src/libcollections/string.rs | 388 ++++++++++++++++++++++++++--------- 1 file changed, 294 insertions(+), 94 deletions(-) diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 4757ada474f..8c0b52f71f8 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -291,13 +291,23 @@ pub struct FromUtf8Error { pub struct FromUtf16Error(()); impl String { - /// Creates a new string buffer initialized with the empty string. + /// Creates a new empty `String`. + /// + /// Given that the `String` is empty, this will not allocate any initial + /// buffer. While that means that this initial operation is very + /// inexpensive, but may cause excessive allocation later, when you add + /// data. If you have an idea of how much data the `String` will hold, + /// consider the [`with_capacity()`] method to prevent excessive + /// re-allocation. + /// + /// [`with_capacity()`]: #method.with_capacity /// /// # Examples /// + /// Basic usage: + /// /// ``` - /// # #![allow(unused_mut)] - /// let mut s = String::new(); + /// let s = String::new(); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -305,12 +315,26 @@ impl String { String { vec: Vec::new() } } - /// Creates a new string buffer with the given capacity. - /// The string will be able to hold exactly `capacity` bytes without - /// reallocating. If `capacity` is 0, the string will not allocate. + /// Creates a new empty `String` with a particular capacity. + /// + /// `String`s have an internal buffer to hold their data. The capacity is + /// the length of that buffer, and can be queried with the [`capacity()`] + /// method. This method creates an empty `String`, but one with an initial + /// buffer that can hold `capacity` bytes. This is useful when you may be + /// appending a bunch of data to the `String`, reducing the number of + /// reallocations it needs to do. + /// + /// [`capacity()`]: #method.capacity + /// + /// If the given capacity is `0`, no allocation will occur, and this method + /// is identical to the [`new()`] method. + /// + /// [`new()`]: #method.new /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::with_capacity(10); /// @@ -346,26 +370,30 @@ impl String { /// Converts a vector of bytes to a `String`. /// - /// A string slice (`&str`) is made of bytes (`u8`), and a vector of bytes - /// (`Vec`) is made of bytes, so this function converts between the + /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a vector of bytes + /// ([`Vec`]) is made of bytes, so this function converts between the /// two. Not all byte slices are valid `String`s, however: `String` /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that /// the bytes are valid UTF-8, and then does the conversion. /// + /// [`&str`]: ../primitive.str.html + /// [`u8`]: ../primitive.u8.html + /// [`Vec`]: ../vec/struct.Vec.html + /// /// If you are sure that the byte slice is valid UTF-8, and you don't want /// to incur the overhead of the validity check, there is an unsafe version - /// of this function, [`from_utf8_unchecked()`][fromutf8], which has the - /// same behavior but skips the check. + /// of this function, [`from_utf8_unchecked()`], which has the same behavior + /// but skips the check. /// - /// [fromutf8]: struct.String.html#method.from_utf8_unchecked + /// [`from_utf8_unchecked()`]: struct.String.html#method.from_utf8_unchecked /// /// This method will take care to not copy the vector, for efficiency's /// sake. /// /// If you need a `&str` instead of a `String`, consider - /// [`str::from_utf8()`][str]. + /// [`str::from_utf8()`]. /// - /// [str]: ../str/fn.from_utf8.html + /// [`str::from_utf8()`]: ../str/fn.from_utf8.html /// /// # Failure /// @@ -395,10 +423,10 @@ impl String { /// assert!(String::from_utf8(sparkle_heart).is_err()); /// ``` /// - /// See the docs for [`FromUtf8Error`][error] for more details on what you - /// can do with this error. + /// See the docs for [`FromUtf8Error`] for more details on what you can do + /// with this error. /// - /// [error]: struct.FromUtf8Error.html + /// [`FromUtf8Error`]: struct.FromUtf8Error.html #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn from_utf8(vec: Vec) -> Result { @@ -415,24 +443,28 @@ impl String { /// Converts a slice of bytes to a `String`, including invalid characters. /// - /// A string slice (`&str`) is made of bytes (`u8`), and a slice of bytes - /// (`&[u8]`) is made of bytes, so this function converts between the two. - /// Not all byte slices are valid string slices, however: `&str` requires - /// that it is valid UTF-8. During this conversion, `from_utf8_lossy()` - /// will replace any invalid UTF-8 sequences with + /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a slice of + /// bytes ([`&[u8]`]) is made of bytes, so this function converts between + /// the two. Not all byte slices are valid string slices, however: [`&str`] + /// requires that it is valid UTF-8. During this conversion, + /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with /// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: � /// + /// [`&str`]: ../primitive.str.html + /// [`u8`]: ../primitive.u8.html + /// [`&[u8]`]: ../primitive.slice.html + /// /// If you are sure that the byte slice is valid UTF-8, and you don't want /// to incur the overhead of the conversion, there is an unsafe version - /// of this function, [`from_utf8_unchecked()`][fromutf8], which has the - /// same behavior but skips the checks. + /// of this function, [`from_utf8_unchecked()`], which has the same behavior + /// but skips the checks. /// - /// [fromutf8]: struct.String.html#method.from_utf8_unchecked + /// [`from_utf8_unchecked()`]: struct.String.html#method.from_utf8_unchecked /// - /// If you need a `&str` instead of a `String`, consider - /// [`str::from_utf8()`][str]. + /// If you need a [`&str`] instead of a `String`, consider + /// [`str::from_utf8()`]. /// - /// [str]: ../str/fn.from_utf8.html + /// [`str::from_utf8()`]: ../str/fn.from_utf8.html /// /// # Examples /// @@ -576,12 +608,14 @@ impl String { /// /// # Examples /// + /// Basic usage: + /// /// ``` /// // 𝄞music /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, /// 0x0073, 0x0069, 0x0063]; - /// assert_eq!(String::from_utf16(v).unwrap(), - /// "𝄞music".to_string()); + /// assert_eq!(String::from("𝄞music"), + /// String::from_utf16(v).unwrap()); /// /// // 𝄞muic /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, @@ -598,14 +632,16 @@ impl String { /// /// # Examples /// + /// Basic usage: + /// /// ``` /// // 𝄞music /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, /// 0x0073, 0xDD1E, 0x0069, 0x0063, /// 0xD834]; /// - /// assert_eq!(String::from_utf16_lossy(v), - /// "𝄞mus\u{FFFD}ic\u{FFFD}".to_string()); + /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"), + /// String::from_utf16_lossy(v)); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -617,13 +653,37 @@ impl String { /// /// # Safety /// - /// This is _very_ unsafe because: + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: /// - /// * We call `Vec::from_raw_parts` to get a `Vec`. Therefore, this - /// function inherits all of its unsafety, see [its - /// documentation](../vec/struct.Vec.html#method.from_raw_parts) - /// for the invariants it expects, they also apply to this function. - /// * We assume that the `Vec` contains valid UTF-8. + /// * The memory at `ptr` needs to have been previously allocated by the + /// same allocator the standard library uses. + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the correct value. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal datastructures. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::mem; + /// + /// unsafe { + /// let s = String::from("hello"); + /// let ptr = s.as_ptr(); + /// let len = s.len(); + /// let capacity = s.capacity(); + /// + /// mem::forget(s); + /// + /// let s = String::from_raw_parts(ptr as *mut _, len, capacity); + /// + /// assert_eq!(String::from("hello"), s); + /// } + /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String { @@ -633,15 +693,16 @@ impl String { /// Converts a vector of bytes to a `String` without checking that the /// string contains valid UTF-8. /// - /// See the safe version, [`from_utf8()`][fromutf8], for more. + /// See the safe version, [`from_utf8()`], for more details. /// - /// [fromutf8]: struct.String.html#method.from_utf8 + /// [`from_utf8()`]: struct.String.html#method.from_utf8 /// /// # Safety /// - /// This function is unsafe because it does not check that the bytes passed to - /// it are valid UTF-8. If this constraint is violated, undefined behavior - /// results, as the rest of Rust assumes that `String`s are valid UTF-8. + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. /// /// # Examples /// @@ -663,14 +724,19 @@ impl String { String { vec: bytes } } - /// Returns the underlying byte buffer, encoded as UTF-8. + /// Converts a `String` into a byte vector. + /// + /// This consumes the `String`, so we do not need to copy its contents. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = String::from("hello"); /// let bytes = s.into_bytes(); - /// assert_eq!(bytes, [104, 101, 108, 108, 111]); + /// + /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -687,14 +753,18 @@ impl String { self } - /// Pushes the given string onto this string buffer. + /// Appends a given string slice onto the end of this `String`. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("foo"); + /// /// s.push_str("bar"); - /// assert_eq!(s, "foobar"); + /// + /// assert_eq!("foobar", s); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -702,13 +772,15 @@ impl String { self.vec.extend_from_slice(string.as_bytes()) } - /// Returns the number of bytes that this string buffer can hold without - /// reallocating. + /// Returns this `String`'s capacity, in bytes. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = String::with_capacity(10); + /// /// assert!(s.capacity() >= 10); /// ``` #[inline] @@ -717,9 +789,16 @@ impl String { self.vec.capacity() } - /// Reserves capacity for at least `additional` more bytes to be inserted - /// in the given `String`. The collection may reserve more space to avoid - /// frequent reallocations. + /// Ensures that this `String`'s capacity is at least `additional` bytes + /// larger than its length. + /// + /// The capacity may be increased by more than `additional` bytes if it + /// chooses, to prevent frequent reallocations. + /// + /// If you do not want this "at least" behavior, see the [`reserve_exact()`] + /// method. + /// + /// [`reserve_exact()`]: #method.reserve_exact /// /// # Panics /// @@ -727,24 +806,46 @@ impl String { /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::new(); + /// /// s.reserve(10); + /// /// assert!(s.capacity() >= 10); /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn reserve(&mut self, additional: usize) { self.vec.reserve(additional) } - /// Reserves the minimum capacity for exactly `additional` more bytes to be - /// inserted in the given `String`. Does nothing if the capacity is already - /// sufficient. + /// Ensures that this `String`'s capacity is `additional` bytes + /// larger than its length. /// - /// Note that the allocator may give the collection more space than it - /// requests. Therefore capacity can not be relied upon to be precisely - /// minimal. Prefer `reserve` if future insertions are expected. + /// Consider using the [`reserve()`] method unless you absolutely know + /// better than the allocator. + /// + /// [`reserve()`]: #method.reserve /// /// # Panics /// @@ -752,27 +853,53 @@ impl String { /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::new(); + /// /// s.reserve_exact(10); + /// /// assert!(s.capacity() >= 10); /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve_exact(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn reserve_exact(&mut self, additional: usize) { self.vec.reserve_exact(additional) } - /// Shrinks the capacity of this string buffer to match its length. + /// Shrinks the capacity of this `String` to match its length. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("foo"); + /// /// s.reserve(100); /// assert!(s.capacity() >= 100); + /// /// s.shrink_to_fit(); - /// assert_eq!(s.capacity(), 3); + /// assert_eq!(3, s.capacity()); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -780,16 +907,20 @@ impl String { self.vec.shrink_to_fit() } - /// Adds the given character to the end of the string. + /// Appends the given `char` to the end of this `String`. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("abc"); + /// /// s.push('1'); /// s.push('2'); /// s.push('3'); - /// assert_eq!(s, "abc123"); + /// + /// assert_eq!("abc123", s); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -815,13 +946,16 @@ impl String { } } - /// Works with the underlying buffer as a byte slice. + /// Returns a byte slice of this `String`'s contents. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = String::from("hello"); - /// assert_eq!(s.as_bytes(), [104, 101, 108, 108, 111]); + /// + /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes()); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -829,19 +963,25 @@ impl String { &self.vec } - /// Shortens a string to the specified length. + /// Shortens this `String` to the specified length. /// /// # Panics /// - /// Panics if `new_len` > current length, - /// or if `new_len` is not a character boundary. + /// Panics if `new_len` > current length, or if `new_len` does not lie on a + /// [`char`] boundary. + /// + /// [`char`]: ../primitive.char.html /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("hello"); + /// /// s.truncate(2); - /// assert_eq!(s, "he"); + /// + /// assert_eq!("he", s); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -851,15 +991,20 @@ impl String { } /// Removes the last character from the string buffer and returns it. - /// Returns `None` if this string buffer is empty. + /// + /// Returns `None` if this `String` is empty. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("foo"); + /// /// assert_eq!(s.pop(), Some('o')); /// assert_eq!(s.pop(), Some('o')); /// assert_eq!(s.pop(), Some('f')); + /// /// assert_eq!(s.pop(), None); /// ``` #[inline] @@ -877,23 +1022,25 @@ impl String { Some(ch) } - /// Removes the character from the string buffer at byte position `idx` and - /// returns it. + /// Removes a `char` from this `String` at a byte position and returns it. /// - /// # Warning - /// - /// This is an O(n) operation as it requires copying every element in the + /// This is an `O(n)` operation, as it requires copying every element in the /// buffer. /// /// # Panics /// - /// If `idx` does not lie on a character boundary, or if it is out of - /// bounds, then this function will panic. + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// [`char`]: ../primitive.char.html /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("foo"); + /// /// assert_eq!(s.remove(0), 'f'); /// assert_eq!(s.remove(1), 'o'); /// assert_eq!(s.remove(0), 'o'); @@ -915,17 +1062,31 @@ impl String { ch } - /// Inserts a character into the string buffer at byte position `idx`. + /// Inserts a character into this `String` at a byte position. /// - /// # Warning - /// - /// This is an O(n) operation as it requires copying every element in the + /// This is an `O(n)` operation as it requires copying every element in the /// buffer. /// /// # Panics /// - /// If `idx` does not lie on a character boundary or is out of bounds, then - /// this function will panic. + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// [`char`]: ../primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(3); + /// + /// s.insert(0, 'f'); + /// s.insert(1, 'o'); + /// s.insert(2, 'o'); + /// + /// assert_eq!("foo", s); + /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(&mut self, idx: usize, ch: char) { @@ -947,18 +1108,26 @@ impl String { } } - /// Views the string buffer as a mutable sequence of bytes. + /// Returns a mutable reference to the contents of this `String`. /// - /// This is unsafe because it does not check - /// to ensure that the resulting string will be valid UTF-8. + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("hello"); + /// /// unsafe { /// let vec = s.as_mut_vec(); - /// assert!(vec == &[104, 101, 108, 108, 111]); + /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]); + /// /// vec.reverse(); /// } /// assert_eq!(s, "olleh"); @@ -969,12 +1138,15 @@ impl String { &mut self.vec } - /// Returns the number of bytes in this string. + /// Returns the length of this `String`, in bytes. /// /// # Examples /// + /// Basic usage: + /// /// ``` - /// let a = "foo".to_string(); + /// let a = String::from("foo"); + /// /// assert_eq!(a.len(), 3); /// ``` #[inline] @@ -983,13 +1155,18 @@ impl String { self.vec.len() } - /// Returns true if the string contains no bytes + /// Returns `true` if this `String` has a length of zero. + /// + /// Returns `false` otherwise. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut v = String::new(); /// assert!(v.is_empty()); + /// /// v.push('a'); /// assert!(!v.is_empty()); /// ``` @@ -999,14 +1176,23 @@ impl String { self.len() == 0 } - /// Truncates the string, returning it to 0 length. + /// Truncates this `String`, removing all contents. + /// + /// While this means the `String` will have a length of zero, it does not + /// touch its capacity. /// /// # Examples /// + /// Basic usage: + /// /// ``` - /// let mut s = "foo".to_string(); + /// let mut s = String::from("foo"); + /// /// s.clear(); + /// /// assert!(s.is_empty()); + /// assert_eq!(0, s.len()); + /// assert_eq!(3, s.capacity()); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -1020,11 +1206,15 @@ impl String { /// /// # Panics /// - /// Panics if the starting point or end point are not on character boundaries, - /// or if they are out of bounds. + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// [`char`]: ../primitive.char.html /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let mut s = String::from("α is alpha, β is beta"); /// let beta_offset = s.find('β').unwrap_or(s.len()); @@ -1066,9 +1256,19 @@ impl String { } } - /// Converts the string into `Box`. + /// Converts this `String` into a `Box`. /// - /// Note that this will drop any excess capacity. + /// This will drop any excess capacity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// let b = s.into_boxed_str(); + /// ``` #[stable(feature = "box_str", since = "1.4.0")] pub fn into_boxed_str(self) -> Box { let slice = self.vec.into_boxed_slice();