From 7b9290384e88e6960bc574e83219df50df5fe4e1 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Sat, 13 May 2023 19:09:00 +0200 Subject: [PATCH] refactor: Remove bespoke from_os_str_bytes_unchecked --- library/std/src/path.rs | 36 +++++++++++++---------------- library/std/src/sys/windows/path.rs | 20 ++++++---------- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 6f770d4c91b..28cd3c4e4db 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -299,20 +299,6 @@ where } } -unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr { - // SAFETY: See note at the top of this module to understand why this and - // `OsStr::bytes` are used: - // - // This casts are safe as OsStr is internally a wrapper around [u8] on all - // platforms. - // - // Note that currently this relies on the special knowledge that std has; - // these types are single-element structs but are not marked - // repr(transparent) or repr(C) which would make these casts not allowable - // outside std. - unsafe { &*(s as *const [u8] as *const OsStr) } -} - // Detect scheme on Redox fn has_redox_scheme(s: &[u8]) -> bool { cfg!(target_os = "redox") && s.contains(&b':') @@ -344,7 +330,12 @@ fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { if before == Some(b"") { (Some(file), None) } else { - unsafe { (before.map(|s| u8_slice_as_os_str(s)), after.map(|s| u8_slice_as_os_str(s))) } + unsafe { + ( + before.map(|s| OsStr::from_os_str_bytes_unchecked(s)), + after.map(|s| OsStr::from_os_str_bytes_unchecked(s)), + ) + } } } @@ -364,7 +355,12 @@ fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) { }; let before = &slice[..i]; let after = &slice[i + 1..]; - unsafe { (u8_slice_as_os_str(before), Some(u8_slice_as_os_str(after))) } + unsafe { + ( + OsStr::from_os_str_bytes_unchecked(before), + Some(OsStr::from_os_str_bytes_unchecked(after)), + ) + } } //////////////////////////////////////////////////////////////////////////////// @@ -743,7 +739,7 @@ impl<'a> Components<'a> { // separately via `include_cur_dir` b".." => Some(Component::ParentDir), b"" => None, - _ => Some(Component::Normal(unsafe { u8_slice_as_os_str(comp) })), + _ => Some(Component::Normal(unsafe { OsStr::from_os_str_bytes_unchecked(comp) })), } } @@ -900,7 +896,7 @@ impl<'a> Iterator for Components<'a> { let raw = &self.path[..self.prefix_len()]; self.path = &self.path[self.prefix_len()..]; return Some(Component::Prefix(PrefixComponent { - raw: unsafe { u8_slice_as_os_str(raw) }, + raw: unsafe { OsStr::from_os_str_bytes_unchecked(raw) }, parsed: self.prefix.unwrap(), })); } @@ -972,7 +968,7 @@ impl<'a> DoubleEndedIterator for Components<'a> { State::Prefix if self.prefix_len() > 0 => { self.back = State::Done; return Some(Component::Prefix(PrefixComponent { - raw: unsafe { u8_slice_as_os_str(self.path) }, + raw: unsafe { OsStr::from_os_str_bytes_unchecked(self.path) }, parsed: self.prefix.unwrap(), })); } @@ -2011,7 +2007,7 @@ impl Path { // The following (private!) function allows construction of a path from a u8 // slice, which is only safe when it is known to follow the OsStr encoding. unsafe fn from_u8_slice(s: &[u8]) -> &Path { - unsafe { Path::new(u8_slice_as_os_str(s)) } + unsafe { Path::new(OsStr::from_os_str_bytes_unchecked(s)) } } // The following (private!) function reveals the byte encoding used for OsStr. fn as_u8_slice(&self) -> &[u8] { diff --git a/library/std/src/sys/windows/path.rs b/library/std/src/sys/windows/path.rs index 7a65d901ad2..c9c2d10e6c4 100644 --- a/library/std/src/sys/windows/path.rs +++ b/library/std/src/sys/windows/path.rs @@ -1,7 +1,6 @@ use super::{c, fill_utf16_buf, to_u16s}; use crate::ffi::{OsStr, OsString}; use crate::io; -use crate::mem; use crate::path::{Path, PathBuf, Prefix}; use crate::ptr; @@ -11,16 +10,6 @@ mod tests; pub const MAIN_SEP_STR: &str = "\\"; pub const MAIN_SEP: char = '\\'; -/// # Safety -/// -/// `bytes` must be a valid wtf8 encoded slice -#[inline] -unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr { - // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8, - // which is compatible with &[u8]. - mem::transmute(bytes) -} - #[inline] pub fn is_sep_byte(b: u8) -> bool { b == b'/' || b == b'\\' @@ -101,7 +90,7 @@ impl<'a> PrefixParserSlice<'a, '_> { // &[u8] and back. This is safe to do because (1) we only look at ASCII // contents of the encoding and (2) new &OsStr values are produced only // from ASCII-bounded slices of existing &OsStr values. - unsafe { bytes_as_os_str(&self.path.as_os_str_bytes()[self.index..]) } + unsafe { OsStr::from_os_str_bytes_unchecked(&self.path.as_os_str_bytes()[self.index..]) } } } @@ -210,7 +199,12 @@ fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { // is encoded in a single byte, therefore `bytes[separator_start]` and // `bytes[separator_end]` must be code point boundaries and thus // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. - unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) } + unsafe { + ( + OsStr::from_os_str_bytes_unchecked(component), + OsStr::from_os_str_bytes_unchecked(path), + ) + } } None => (path, OsStr::new("")), }