Rollup merge of #113442 - epage:osstring, r=cuviper
Allow limited access to `OsString` bytes This extends #109698 to allow no-cost conversion between `Vec<u8>` and `OsString` as suggested in feedback from `os_str_bytes` crate in #111544.
This commit is contained in:
commit
0877d11e8d
@ -141,6 +141,51 @@ pub fn new() -> OsString {
|
||||
OsString { inner: Buf::from_string(String::new()) }
|
||||
}
|
||||
|
||||
/// Converts bytes to an `OsString` without checking that the bytes contains
|
||||
/// valid [`OsStr`]-encoded data.
|
||||
///
|
||||
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
|
||||
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
|
||||
/// ASCII.
|
||||
///
|
||||
/// See the [module's toplevel documentation about conversions][conversions] for safe,
|
||||
/// cross-platform [conversions] from/to native representations.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
|
||||
/// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
|
||||
/// built for the same target platform. For example, reconstructing an `OsString` from bytes sent
|
||||
/// over the network or stored in a file will likely violate these safety rules.
|
||||
///
|
||||
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
|
||||
/// split either immediately before or immediately after any valid non-empty UTF-8 substring.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(os_str_bytes)]
|
||||
///
|
||||
/// use std::ffi::OsStr;
|
||||
///
|
||||
/// let os_str = OsStr::new("Mary had a little lamb");
|
||||
/// let bytes = os_str.as_os_str_bytes();
|
||||
/// let words = bytes.split(|b| *b == b' ');
|
||||
/// let words: Vec<&OsStr> = words.map(|word| {
|
||||
/// // SAFETY:
|
||||
/// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
|
||||
/// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
|
||||
/// unsafe { OsStr::from_os_str_bytes_unchecked(word) }
|
||||
/// }).collect();
|
||||
/// ```
|
||||
///
|
||||
/// [conversions]: super#conversions
|
||||
#[inline]
|
||||
#[unstable(feature = "os_str_bytes", issue = "111544")]
|
||||
pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec<u8>) -> Self {
|
||||
OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) }
|
||||
}
|
||||
|
||||
/// Converts to an [`OsStr`] slice.
|
||||
///
|
||||
/// # Examples
|
||||
@ -159,6 +204,26 @@ pub fn as_os_str(&self) -> &OsStr {
|
||||
self
|
||||
}
|
||||
|
||||
/// Converts the `OsString` into a byte slice. To convert the byte slice back into an
|
||||
/// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function.
|
||||
///
|
||||
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
|
||||
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
|
||||
/// ASCII.
|
||||
///
|
||||
/// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should
|
||||
/// be treated as opaque and only comparable within the same rust version built for the same
|
||||
/// target platform. For example, sending the bytes over the network or storing it in a file
|
||||
/// will likely result in incompatible data. See [`OsString`] for more encoding details
|
||||
/// and [`std::ffi`] for platform-specific, specified conversions.
|
||||
///
|
||||
/// [`std::ffi`]: crate::ffi
|
||||
#[inline]
|
||||
#[unstable(feature = "os_str_bytes", issue = "111544")]
|
||||
pub fn into_os_str_bytes(self) -> Vec<u8> {
|
||||
self.inner.into_os_str_bytes()
|
||||
}
|
||||
|
||||
/// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
|
||||
///
|
||||
/// On failure, ownership of the original `OsString` is returned.
|
||||
|
@ -96,6 +96,16 @@ fn as_inner(&self) -> &[u8] {
|
||||
}
|
||||
|
||||
impl Buf {
|
||||
#[inline]
|
||||
pub fn into_os_str_bytes(self) -> Vec<u8> {
|
||||
self.inner
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
|
||||
Self { inner: s }
|
||||
}
|
||||
|
||||
pub fn from_string(s: String) -> Buf {
|
||||
Buf { inner: s.into_bytes() }
|
||||
}
|
||||
|
@ -63,6 +63,16 @@ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
}
|
||||
|
||||
impl Buf {
|
||||
#[inline]
|
||||
pub fn into_os_str_bytes(self) -> Vec<u8> {
|
||||
self.inner.into_bytes()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
|
||||
Self { inner: Wtf8Buf::from_bytes_unchecked(s) }
|
||||
}
|
||||
|
||||
pub fn with_capacity(capacity: usize) -> Buf {
|
||||
Buf { inner: Wtf8Buf::with_capacity(capacity) }
|
||||
}
|
||||
|
@ -182,6 +182,15 @@ pub fn with_capacity(capacity: usize) -> Wtf8Buf {
|
||||
Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
|
||||
}
|
||||
|
||||
/// Creates a WTF-8 string from a WTF-8 byte vec.
|
||||
///
|
||||
/// Since the byte vec is not checked for valid WTF-8, this functions is
|
||||
/// marked unsafe.
|
||||
#[inline]
|
||||
pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
|
||||
Wtf8Buf { bytes: value, is_known_utf8: false }
|
||||
}
|
||||
|
||||
/// Creates a WTF-8 string from a UTF-8 `String`.
|
||||
///
|
||||
/// This takes ownership of the `String` and does not copy.
|
||||
@ -402,6 +411,12 @@ pub fn truncate(&mut self, new_len: usize) {
|
||||
self.bytes.truncate(new_len)
|
||||
}
|
||||
|
||||
/// Consumes the WTF-8 string and tries to convert it to a vec of bytes.
|
||||
#[inline]
|
||||
pub fn into_bytes(self) -> Vec<u8> {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
/// Consumes the WTF-8 string and tries to convert it to UTF-8.
|
||||
///
|
||||
/// This does not copy the data.
|
||||
|
Loading…
Reference in New Issue
Block a user