Auto merge of #113954 - matthiaskrgr:rollup-e2r9suz, r=matthiaskrgr
Rollup of 6 pull requests Successful merges: - #112490 (Remove `#[cfg(all())]` workarounds from `c_char`) - #113252 (Update the tracking issue for `const_cstr_from_ptr`) - #113442 (Allow limited access to `OsString` bytes) - #113876 (fix docs & example for `std::os::unix::prelude::FileExt::write_at`) - #113898 (Fix size_hint for EncodeUtf16) - #113934 (Multibyte character removal in String::pop and String::remove doctests) r? `@ghost` `@rustbot` modify labels: rollup
This commit is contained in:
commit
42f5419dd2
@ -1290,11 +1290,11 @@ impl String {
|
|||||||
/// Basic usage:
|
/// Basic usage:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// let mut s = String::from("foo");
|
/// let mut s = String::from("abč");
|
||||||
///
|
///
|
||||||
/// assert_eq!(s.pop(), Some('o'));
|
/// assert_eq!(s.pop(), Some('č'));
|
||||||
/// assert_eq!(s.pop(), Some('o'));
|
/// assert_eq!(s.pop(), Some('b'));
|
||||||
/// assert_eq!(s.pop(), Some('f'));
|
/// assert_eq!(s.pop(), Some('a'));
|
||||||
///
|
///
|
||||||
/// assert_eq!(s.pop(), None);
|
/// assert_eq!(s.pop(), None);
|
||||||
/// ```
|
/// ```
|
||||||
@ -1324,11 +1324,11 @@ impl String {
|
|||||||
/// Basic usage:
|
/// Basic usage:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// let mut s = String::from("foo");
|
/// let mut s = String::from("abç");
|
||||||
///
|
///
|
||||||
/// assert_eq!(s.remove(0), 'f');
|
/// assert_eq!(s.remove(0), 'a');
|
||||||
/// assert_eq!(s.remove(1), 'o');
|
/// assert_eq!(s.remove(1), 'ç');
|
||||||
/// assert_eq!(s.remove(0), 'o');
|
/// assert_eq!(s.remove(0), 'b');
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
@ -1738,6 +1738,28 @@ fn test_utf16_code_units() {
|
|||||||
assert_eq!("é\u{1F4A9}".encode_utf16().collect::<Vec<u16>>(), [0xE9, 0xD83D, 0xDCA9])
|
assert_eq!("é\u{1F4A9}".encode_utf16().collect::<Vec<u16>>(), [0xE9, 0xD83D, 0xDCA9])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_utf16_size_hint() {
|
||||||
|
assert_eq!("".encode_utf16().size_hint(), (0, Some(0)));
|
||||||
|
assert_eq!("123".encode_utf16().size_hint(), (1, Some(3)));
|
||||||
|
assert_eq!("1234".encode_utf16().size_hint(), (2, Some(4)));
|
||||||
|
assert_eq!("12345678".encode_utf16().size_hint(), (3, Some(8)));
|
||||||
|
|
||||||
|
fn hint_vec(src: &str) -> Vec<(usize, Option<usize>)> {
|
||||||
|
let mut it = src.encode_utf16();
|
||||||
|
let mut result = Vec::new();
|
||||||
|
result.push(it.size_hint());
|
||||||
|
while it.next().is_some() {
|
||||||
|
result.push(it.size_hint())
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(hint_vec("12"), [(1, Some(2)), (1, Some(1)), (0, Some(0))]);
|
||||||
|
assert_eq!(hint_vec("\u{101234}"), [(2, Some(4)), (1, Some(1)), (0, Some(0))]);
|
||||||
|
assert_eq!(hint_vec("\u{101234}a"), [(2, Some(5)), (2, Some(2)), (1, Some(1)), (0, Some(0))]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn starts_with_in_unicode() {
|
fn starts_with_in_unicode() {
|
||||||
assert!(!"├── Cargo.toml".starts_with("# "));
|
assert!(!"├── Cargo.toml".starts_with("# "));
|
||||||
|
@ -256,7 +256,7 @@ impl CStr {
|
|||||||
#[inline]
|
#[inline]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
#[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "101719")]
|
#[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
|
||||||
pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr {
|
pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr {
|
||||||
// SAFETY: The caller has provided a pointer that points to a valid C
|
// SAFETY: The caller has provided a pointer that points to a valid C
|
||||||
// string with a NUL terminator of size less than `isize::MAX`, whose
|
// string with a NUL terminator of size less than `isize::MAX`, whose
|
||||||
|
@ -52,11 +52,6 @@ macro_rules! type_alias {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type_alias! { "c_char.md", c_char = c_char_definition::c_char, NonZero_c_char = c_char_definition::NonZero_c_char;
|
type_alias! { "c_char.md", c_char = c_char_definition::c_char, NonZero_c_char = c_char_definition::NonZero_c_char;
|
||||||
// Make this type alias appear cfg-dependent so that Clippy does not suggest
|
|
||||||
// replacing `0 as c_char` with `0_i8`/`0_u8`. This #[cfg(all())] can be removed
|
|
||||||
// after the false positive in https://github.com/rust-lang/rust-clippy/issues/8093
|
|
||||||
// is fixed.
|
|
||||||
#[cfg(all())]
|
|
||||||
#[doc(cfg(all()))] }
|
#[doc(cfg(all()))] }
|
||||||
|
|
||||||
type_alias! { "c_schar.md", c_schar = i8, NonZero_c_schar = NonZeroI8; }
|
type_alias! { "c_schar.md", c_schar = i8, NonZero_c_schar = NonZeroI8; }
|
||||||
|
@ -1439,11 +1439,22 @@ impl<'a> Iterator for EncodeUtf16<'a> {
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
let (low, high) = self.chars.size_hint();
|
let len = self.chars.iter.len();
|
||||||
// every char gets either one u16 or two u16,
|
// The highest bytes:code units ratio occurs for 3-byte sequences,
|
||||||
// so this iterator is between 1 or 2 times as
|
// since a 4-byte sequence results in 2 code units. The lower bound
|
||||||
// long as the underlying iterator.
|
// is therefore determined by assuming the remaining bytes contain as
|
||||||
(low, high.and_then(|n| n.checked_mul(2)))
|
// many 3-byte sequences as possible. The highest bytes:code units
|
||||||
|
// ratio is for 1-byte sequences, so use this for the upper bound.
|
||||||
|
// `(len + 2)` can't overflow, because we know that the `slice::Iter`
|
||||||
|
// belongs to a slice in memory which has a maximum length of
|
||||||
|
// `isize::MAX` (that's well below `usize::MAX`)
|
||||||
|
if self.extra == 0 {
|
||||||
|
((len + 2) / 3, Some(len))
|
||||||
|
} else {
|
||||||
|
// We're in the middle of a surrogate pair, so add the remaining
|
||||||
|
// surrogate to the bounds.
|
||||||
|
((len + 2) / 3 + 1, Some(len + 1))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,6 +141,51 @@ impl OsString {
|
|||||||
OsString { inner: Buf::from_string(String::new()) }
|
OsString { inner: Buf::from_string(String::new()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts bytes to an `OsString` without checking that the bytes contains
|
||||||
|
/// valid [`OsStr`]-encoded data.
|
||||||
|
///
|
||||||
|
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
|
||||||
|
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
|
||||||
|
/// ASCII.
|
||||||
|
///
|
||||||
|
/// See the [module's toplevel documentation about conversions][conversions] for safe,
|
||||||
|
/// cross-platform [conversions] from/to native representations.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
|
||||||
|
/// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
|
||||||
|
/// built for the same target platform. For example, reconstructing an `OsString` from bytes sent
|
||||||
|
/// over the network or stored in a file will likely violate these safety rules.
|
||||||
|
///
|
||||||
|
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
|
||||||
|
/// split either immediately before or immediately after any valid non-empty UTF-8 substring.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// #![feature(os_str_bytes)]
|
||||||
|
///
|
||||||
|
/// use std::ffi::OsStr;
|
||||||
|
///
|
||||||
|
/// let os_str = OsStr::new("Mary had a little lamb");
|
||||||
|
/// let bytes = os_str.as_os_str_bytes();
|
||||||
|
/// let words = bytes.split(|b| *b == b' ');
|
||||||
|
/// let words: Vec<&OsStr> = words.map(|word| {
|
||||||
|
/// // SAFETY:
|
||||||
|
/// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
|
||||||
|
/// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
|
||||||
|
/// unsafe { OsStr::from_os_str_bytes_unchecked(word) }
|
||||||
|
/// }).collect();
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// [conversions]: super#conversions
|
||||||
|
#[inline]
|
||||||
|
#[unstable(feature = "os_str_bytes", issue = "111544")]
|
||||||
|
pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec<u8>) -> Self {
|
||||||
|
OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) }
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts to an [`OsStr`] slice.
|
/// Converts to an [`OsStr`] slice.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
@ -159,6 +204,26 @@ impl OsString {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts the `OsString` into a byte slice. To convert the byte slice back into an
|
||||||
|
/// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function.
|
||||||
|
///
|
||||||
|
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
|
||||||
|
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
|
||||||
|
/// ASCII.
|
||||||
|
///
|
||||||
|
/// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should
|
||||||
|
/// be treated as opaque and only comparable within the same rust version built for the same
|
||||||
|
/// target platform. For example, sending the bytes over the network or storing it in a file
|
||||||
|
/// will likely result in incompatible data. See [`OsString`] for more encoding details
|
||||||
|
/// and [`std::ffi`] for platform-specific, specified conversions.
|
||||||
|
///
|
||||||
|
/// [`std::ffi`]: crate::ffi
|
||||||
|
#[inline]
|
||||||
|
#[unstable(feature = "os_str_bytes", issue = "111544")]
|
||||||
|
pub fn into_os_str_bytes(self) -> Vec<u8> {
|
||||||
|
self.inner.into_os_str_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
|
/// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
|
||||||
///
|
///
|
||||||
/// On failure, ownership of the original `OsString` is returned.
|
/// On failure, ownership of the original `OsString` is returned.
|
||||||
|
@ -9,11 +9,6 @@ macro_rules! alias_core_ffi {
|
|||||||
($($t:ident)*) => {$(
|
($($t:ident)*) => {$(
|
||||||
#[stable(feature = "raw_os", since = "1.1.0")]
|
#[stable(feature = "raw_os", since = "1.1.0")]
|
||||||
#[doc = include_str!(concat!("../../../../core/src/ffi/", stringify!($t), ".md"))]
|
#[doc = include_str!(concat!("../../../../core/src/ffi/", stringify!($t), ".md"))]
|
||||||
// Make this type alias appear cfg-dependent so that Clippy does not suggest
|
|
||||||
// replacing expressions like `0 as c_char` with `0_i8`/`0_u8`. This #[cfg(all())] can be
|
|
||||||
// removed after the false positive in https://github.com/rust-lang/rust-clippy/issues/8093
|
|
||||||
// is fixed.
|
|
||||||
#[cfg(all())]
|
|
||||||
#[doc(cfg(all()))]
|
#[doc(cfg(all()))]
|
||||||
pub type $t = core::ffi::$t;
|
pub type $t = core::ffi::$t;
|
||||||
)*}
|
)*}
|
||||||
|
@ -149,7 +149,36 @@ pub trait FileExt {
|
|||||||
/// Note that similar to [`File::write`], it is not an error to return a
|
/// Note that similar to [`File::write`], it is not an error to return a
|
||||||
/// short write.
|
/// short write.
|
||||||
///
|
///
|
||||||
|
/// # Bug
|
||||||
|
/// On some systems, `write_at` utilises [`pwrite64`] to write to files.
|
||||||
|
/// However, this syscall has a [bug] where files opened with the `O_APPEND`
|
||||||
|
/// flag fail to respect the offset parameter, always appending to the end
|
||||||
|
/// of the file instead.
|
||||||
|
///
|
||||||
|
/// It is possible to inadvertantly set this flag, like in the example below.
|
||||||
|
/// Therefore, it is important to be vigilant while changing options to mitigate
|
||||||
|
/// unexpected behaviour.
|
||||||
|
///
|
||||||
|
/// ```no_run
|
||||||
|
/// use std::fs::File;
|
||||||
|
/// use std::io;
|
||||||
|
/// use std::os::unix::prelude::FileExt;
|
||||||
|
///
|
||||||
|
/// fn main() -> io::Result<()> {
|
||||||
|
/// // Open a file with the append option (sets the `O_APPEND` flag)
|
||||||
|
/// let file = File::options().append(true).open("foo.txt")?;
|
||||||
|
///
|
||||||
|
/// // We attempt to write at offset 10; instead appended to EOF
|
||||||
|
/// file.write_at(b"sushi", 10)?;
|
||||||
|
///
|
||||||
|
/// // foo.txt is 5 bytes long instead of 15
|
||||||
|
/// Ok(())
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
/// [`File::write`]: fs::File::write
|
/// [`File::write`]: fs::File::write
|
||||||
|
/// [`pwrite64`]: https://man7.org/linux/man-pages/man2/pwrite.2.html
|
||||||
|
/// [bug]: https://man7.org/linux/man-pages/man2/pwrite.2.html#BUGS
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
@ -159,7 +188,7 @@ pub trait FileExt {
|
|||||||
/// use std::os::unix::prelude::FileExt;
|
/// use std::os::unix::prelude::FileExt;
|
||||||
///
|
///
|
||||||
/// fn main() -> io::Result<()> {
|
/// fn main() -> io::Result<()> {
|
||||||
/// let file = File::open("foo.txt")?;
|
/// let file = File::create("foo.txt")?;
|
||||||
///
|
///
|
||||||
/// // We now write at the offset 10.
|
/// // We now write at the offset 10.
|
||||||
/// file.write_at(b"sushi", 10)?;
|
/// file.write_at(b"sushi", 10)?;
|
||||||
|
@ -96,6 +96,16 @@ impl AsInner<[u8]> for Buf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Buf {
|
impl Buf {
|
||||||
|
#[inline]
|
||||||
|
pub fn into_os_str_bytes(self) -> Vec<u8> {
|
||||||
|
self.inner
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
|
||||||
|
Self { inner: s }
|
||||||
|
}
|
||||||
|
|
||||||
pub fn from_string(s: String) -> Buf {
|
pub fn from_string(s: String) -> Buf {
|
||||||
Buf { inner: s.into_bytes() }
|
Buf { inner: s.into_bytes() }
|
||||||
}
|
}
|
||||||
|
@ -63,6 +63,16 @@ impl fmt::Display for Slice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Buf {
|
impl Buf {
|
||||||
|
#[inline]
|
||||||
|
pub fn into_os_str_bytes(self) -> Vec<u8> {
|
||||||
|
self.inner.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
|
||||||
|
Self { inner: Wtf8Buf::from_bytes_unchecked(s) }
|
||||||
|
}
|
||||||
|
|
||||||
pub fn with_capacity(capacity: usize) -> Buf {
|
pub fn with_capacity(capacity: usize) -> Buf {
|
||||||
Buf { inner: Wtf8Buf::with_capacity(capacity) }
|
Buf { inner: Wtf8Buf::with_capacity(capacity) }
|
||||||
}
|
}
|
||||||
|
@ -182,6 +182,15 @@ impl Wtf8Buf {
|
|||||||
Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
|
Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a WTF-8 string from a WTF-8 byte vec.
|
||||||
|
///
|
||||||
|
/// Since the byte vec is not checked for valid WTF-8, this functions is
|
||||||
|
/// marked unsafe.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
|
||||||
|
Wtf8Buf { bytes: value, is_known_utf8: false }
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a WTF-8 string from a UTF-8 `String`.
|
/// Creates a WTF-8 string from a UTF-8 `String`.
|
||||||
///
|
///
|
||||||
/// This takes ownership of the `String` and does not copy.
|
/// This takes ownership of the `String` and does not copy.
|
||||||
@ -402,6 +411,12 @@ impl Wtf8Buf {
|
|||||||
self.bytes.truncate(new_len)
|
self.bytes.truncate(new_len)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consumes the WTF-8 string and tries to convert it to a vec of bytes.
|
||||||
|
#[inline]
|
||||||
|
pub fn into_bytes(self) -> Vec<u8> {
|
||||||
|
self.bytes
|
||||||
|
}
|
||||||
|
|
||||||
/// Consumes the WTF-8 string and tries to convert it to UTF-8.
|
/// Consumes the WTF-8 string and tries to convert it to UTF-8.
|
||||||
///
|
///
|
||||||
/// This does not copy the data.
|
/// This does not copy the data.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user