diff --git a/src/helpers.rs b/src/helpers.rs index 3e89fdf6f3c..e22839d9870 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,13 +1,5 @@ -use std::ffi::{OsStr, OsString}; -use std::path::{Path, PathBuf}; -use std::{iter, mem}; use std::convert::TryFrom; -use std::borrow::Cow; - -#[cfg(unix)] -use std::os::unix::ffi::{OsStrExt, OsStringExt}; -#[cfg(windows)] -use std::os::windows::ffi::{OsStrExt, OsStringExt}; +use std::mem; use rustc::mir; use rustc::ty::{ @@ -462,237 +454,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx } } } - - /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what - /// the Unix APIs usually handle. - fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, &'a OsStr> - where - 'tcx: 'a, - 'mir: 'a, - { - #[cfg(unix)] - fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { - Ok(OsStr::from_bytes(bytes)) - } - #[cfg(not(unix))] - fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { - let s = std::str::from_utf8(bytes) - .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?; - Ok(OsStr::new(s)) - } - - let this = self.eval_context_ref(); - let bytes = this.memory.read_c_str(scalar)?; - bytes_to_os_str(bytes) - } - - /// Helper function to read an OsString from a 0x0000-terminated sequence of u16, - /// which is what the Windows APIs usually handle. - fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, OsString> - where - 'tcx: 'a, - 'mir: 'a, - { - #[cfg(windows)] - pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec) -> InterpResult<'tcx, OsString> { - Ok(OsString::from_wide(&u16_vec[..])) - } - #[cfg(not(windows))] - pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec) -> InterpResult<'tcx, OsString> { - let s = String::from_utf16(&u16_vec[..]) - .map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?; - Ok(s.into()) - } - - let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?; - u16vec_to_osstring(u16_vec) - } - - /// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what - /// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying - /// to write if `size` is not large enough to fit the contents of `os_string` plus a null - /// terminator. It returns `Ok((true, length))` if the writing process was successful. The - /// string length returned does not include the null terminator. - fn write_os_str_to_c_str( - &mut self, - os_str: &OsStr, - scalar: Scalar, - size: u64, - ) -> InterpResult<'tcx, (bool, u64)> { - #[cfg(unix)] - fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { - Ok(os_str.as_bytes()) - } - #[cfg(not(unix))] - fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { - // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the - // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually - // valid. - os_str - .to_str() - .map(|s| s.as_bytes()) - .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) - } - - let bytes = os_str_to_bytes(os_str)?; - // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null - // terminator to memory using the `ptr` pointer would cause an out-of-bounds access. - let string_length = u64::try_from(bytes.len()).unwrap(); - if size <= string_length { - return Ok((false, string_length)); - } - self.eval_context_mut() - .memory - .write_bytes(scalar, bytes.iter().copied().chain(iter::once(0u8)))?; - Ok((true, string_length)) - } - - /// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what - /// the Windows APIs usually handle. This function returns `Ok((false, length))` without trying - /// to write if `size` is not large enough to fit the contents of `os_string` plus a null - /// terminator. It returns `Ok((true, length))` if the writing process was successful. The - /// string length returned does not include the null terminator. - fn write_os_str_to_wide_str( - &mut self, - os_str: &OsStr, - scalar: Scalar, - size: u64, - ) -> InterpResult<'tcx, (bool, u64)> { - #[cfg(windows)] - fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec> { - Ok(os_str.encode_wide().collect()) - } - #[cfg(not(windows))] - fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec> { - // On non-Windows platforms the best we can do to transform Vec from/to OS strings is to do the - // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually - // valid. - os_str - .to_str() - .map(|s| s.encode_utf16().collect()) - .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) - } - - let u16_vec = os_str_to_u16vec(os_str)?; - // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required - // 0x0000 terminator to memory would cause an out-of-bounds access. - let string_length = u64::try_from(u16_vec.len()).unwrap(); - if size <= string_length { - return Ok((false, string_length)); - } - - // Store the UTF-16 string. - self.eval_context_mut() - .memory - .write_u16s(scalar, u16_vec.into_iter().chain(iter::once(0x0000)))?; - Ok((true, string_length)) - } - - /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes. - fn alloc_os_str_as_c_str( - &mut self, - os_str: &OsStr, - memkind: MemoryKind, - ) -> Pointer { - let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator. - let this = self.eval_context_mut(); - - let arg_type = this.tcx.mk_array(this.tcx.types.u8, size); - let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind); - assert!(self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap().0); - arg_place.ptr.assert_ptr() - } - - /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`. - fn alloc_os_str_as_wide_str( - &mut self, - os_str: &OsStr, - memkind: MemoryKind, - ) -> Pointer { - let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator. - let this = self.eval_context_mut(); - - let arg_type = this.tcx.mk_array(this.tcx.types.u16, size); - let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind); - assert!(self.write_os_str_to_wide_str(os_str, arg_place.ptr, size).unwrap().0); - arg_place.ptr.assert_ptr() - } - - /// Read a null-terminated sequence of bytes, and perform path separator conversion if needed. - fn read_path_from_c_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, Cow<'a, Path>> - where - 'tcx: 'a, - 'mir: 'a, - { - let this = self.eval_context_ref(); - let os_str = this.read_os_str_from_c_str(scalar)?; - - #[cfg(windows)] - return Ok(if this.tcx.sess.target.target.target_os == "windows" { - // Windows-on-Windows, all fine. - Cow::Borrowed(Path::new(os_str)) - } else { - // Unix target, Windows host. Need to convert target '/' to host '\'. - let converted = os_str - .encode_wide() - .map(|wchar| if wchar == '/' as u16 { '\\' as u16 } else { wchar }) - .collect::>(); - Cow::Owned(PathBuf::from(OsString::from_wide(&converted))) - }); - #[cfg(unix)] - return Ok(if this.tcx.sess.target.target.target_os == "windows" { - // Windows target, Unix host. Need to convert target '\' to host '/'. - let converted = os_str - .as_bytes() - .iter() - .map(|&wchar| if wchar == '/' as u8 { '\\' as u8 } else { wchar }) - .collect::>(); - Cow::Owned(PathBuf::from(OsString::from_vec(converted))) - } else { - // Unix-on-Unix, all is fine. - Cow::Borrowed(Path::new(os_str)) - }); - } - - /// Write a Path to the machine memory, adjusting path separators if needed. - fn write_path_to_c_str( - &mut self, - path: &Path, - scalar: Scalar, - size: u64, - ) -> InterpResult<'tcx, (bool, u64)> { - let this = self.eval_context_mut(); - - #[cfg(windows)] - let os_str = if this.tcx.sess.target.target.target_os == "windows" { - // Windows-on-Windows, all fine. - Cow::Borrowed(path.as_os_str()) - } else { - // Unix target, Windows host. Need to convert host '\\' to target '/'. - let converted = path - .as_os_str() - .encode_wide() - .map(|wchar| if wchar == '\\' as u16 { '/' as u16 } else { wchar }) - .collect::>(); - Cow::Owned(OsString::from_wide(&converted)) - }; - #[cfg(unix)] - let os_str = if this.tcx.sess.target.target.target_os == "windows" { - // Windows target, Unix host. Need to convert host '/' to target '\'. - let converted = path - .as_os_str() - .as_bytes() - .iter() - .map(|&wchar| if wchar == '/' as u8 { '\\' as u8 } else { wchar }) - .collect::>(); - Cow::Owned(OsString::from_vec(converted)) - } else { - // Unix-on-Unix, all is fine. - Cow::Borrowed(path.as_os_str()) - }; - - this.write_os_str_to_c_str(&os_str, scalar, size) - } } pub fn immty_from_int_checked<'tcx>( diff --git a/src/lib.rs b/src/lib.rs index 32eb5b41e59..32416ccaaff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,6 +39,7 @@ pub use crate::shims::env::{EnvVars, EvalContextExt as EnvEvalContextExt}; pub use crate::shims::foreign_items::EvalContextExt as ForeignItemsEvalContextExt; pub use crate::shims::fs::{DirHandler, EvalContextExt as FileEvalContextExt, FileHandler}; pub use crate::shims::intrinsics::EvalContextExt as IntrinsicsEvalContextExt; +pub use crate::shims::os_str::EvalContextExt as OsStrEvalContextExt; pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as PanicEvalContextExt}; pub use crate::shims::time::EvalContextExt as TimeEvalContextExt; pub use crate::shims::tls::{EvalContextExt as TlsEvalContextExt, TlsData}; diff --git a/src/shims/mod.rs b/src/shims/mod.rs index 5b5a11b86b4..f950c804783 100644 --- a/src/shims/mod.rs +++ b/src/shims/mod.rs @@ -3,6 +3,7 @@ pub mod env; pub mod foreign_items; pub mod fs; pub mod intrinsics; +pub mod os_str; pub mod panic; pub mod time; pub mod tls; diff --git a/src/shims/os_str.rs b/src/shims/os_str.rs new file mode 100644 index 00000000000..2306d01de5d --- /dev/null +++ b/src/shims/os_str.rs @@ -0,0 +1,247 @@ +use std::borrow::Cow; +use std::convert::TryFrom; +use std::ffi::{OsStr, OsString}; +use std::iter; +#[cfg(unix)] +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +#[cfg(windows)] +use std::os::windows::ffi::{OsStrExt, OsStringExt}; +use std::path::{Path, PathBuf}; + +use rustc::ty::layout::LayoutOf; + +use crate::*; + +impl<'mir, 'tcx> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} +pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> { + /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what + /// the Unix APIs usually handle. + fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, &'a OsStr> + where + 'tcx: 'a, + 'mir: 'a, + { + #[cfg(unix)] + fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { + Ok(OsStr::from_bytes(bytes)) + } + #[cfg(not(unix))] + fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { + let s = std::str::from_utf8(bytes) + .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?; + Ok(OsStr::new(s)) + } + + let this = self.eval_context_ref(); + let bytes = this.memory.read_c_str(scalar)?; + bytes_to_os_str(bytes) + } + + /// Helper function to read an OsString from a 0x0000-terminated sequence of u16, + /// which is what the Windows APIs usually handle. + fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, OsString> + where + 'tcx: 'a, + 'mir: 'a, + { + #[cfg(windows)] + pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec) -> InterpResult<'tcx, OsString> { + Ok(OsString::from_wide(&u16_vec[..])) + } + #[cfg(not(windows))] + pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec) -> InterpResult<'tcx, OsString> { + let s = String::from_utf16(&u16_vec[..]) + .map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?; + Ok(s.into()) + } + + let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?; + u16vec_to_osstring(u16_vec) + } + + /// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what + /// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying + /// to write if `size` is not large enough to fit the contents of `os_string` plus a null + /// terminator. It returns `Ok((true, length))` if the writing process was successful. The + /// string length returned does not include the null terminator. + fn write_os_str_to_c_str( + &mut self, + os_str: &OsStr, + scalar: Scalar, + size: u64, + ) -> InterpResult<'tcx, (bool, u64)> { + #[cfg(unix)] + fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + Ok(os_str.as_bytes()) + } + #[cfg(not(unix))] + fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the + // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually + // valid. + os_str + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) + } + + let bytes = os_str_to_bytes(os_str)?; + // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null + // terminator to memory using the `ptr` pointer would cause an out-of-bounds access. + let string_length = u64::try_from(bytes.len()).unwrap(); + if size <= string_length { + return Ok((false, string_length)); + } + self.eval_context_mut() + .memory + .write_bytes(scalar, bytes.iter().copied().chain(iter::once(0u8)))?; + Ok((true, string_length)) + } + + /// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what + /// the Windows APIs usually handle. This function returns `Ok((false, length))` without trying + /// to write if `size` is not large enough to fit the contents of `os_string` plus a null + /// terminator. It returns `Ok((true, length))` if the writing process was successful. The + /// string length returned does not include the null terminator. + fn write_os_str_to_wide_str( + &mut self, + os_str: &OsStr, + scalar: Scalar, + size: u64, + ) -> InterpResult<'tcx, (bool, u64)> { + #[cfg(windows)] + fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec> { + Ok(os_str.encode_wide().collect()) + } + #[cfg(not(windows))] + fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec> { + // On non-Windows platforms the best we can do to transform Vec from/to OS strings is to do the + // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually + // valid. + os_str + .to_str() + .map(|s| s.encode_utf16().collect()) + .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) + } + + let u16_vec = os_str_to_u16vec(os_str)?; + // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required + // 0x0000 terminator to memory would cause an out-of-bounds access. + let string_length = u64::try_from(u16_vec.len()).unwrap(); + if size <= string_length { + return Ok((false, string_length)); + } + + // Store the UTF-16 string. + self.eval_context_mut() + .memory + .write_u16s(scalar, u16_vec.into_iter().chain(iter::once(0x0000)))?; + Ok((true, string_length)) + } + + /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes. + fn alloc_os_str_as_c_str( + &mut self, + os_str: &OsStr, + memkind: MemoryKind, + ) -> Pointer { + let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator. + let this = self.eval_context_mut(); + + let arg_type = this.tcx.mk_array(this.tcx.types.u8, size); + let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind); + assert!(self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap().0); + arg_place.ptr.assert_ptr() + } + + /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`. + fn alloc_os_str_as_wide_str( + &mut self, + os_str: &OsStr, + memkind: MemoryKind, + ) -> Pointer { + let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator. + let this = self.eval_context_mut(); + + let arg_type = this.tcx.mk_array(this.tcx.types.u16, size); + let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind); + assert!(self.write_os_str_to_wide_str(os_str, arg_place.ptr, size).unwrap().0); + arg_place.ptr.assert_ptr() + } + + /// Read a null-terminated sequence of bytes, and perform path separator conversion if needed. + fn read_path_from_c_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, Cow<'a, Path>> + where + 'tcx: 'a, + 'mir: 'a, + { + let this = self.eval_context_ref(); + let os_str = this.read_os_str_from_c_str(scalar)?; + + #[cfg(windows)] + return Ok(if this.tcx.sess.target.target.target_os == "windows" { + // Windows-on-Windows, all fine. + Cow::Borrowed(Path::new(os_str)) + } else { + // Unix target, Windows host. Need to convert target '/' to host '\'. + let converted = os_str + .encode_wide() + .map(|wchar| if wchar == '/' as u16 { '\\' as u16 } else { wchar }) + .collect::>(); + Cow::Owned(PathBuf::from(OsString::from_wide(&converted))) + }); + #[cfg(unix)] + return Ok(if this.tcx.sess.target.target.target_os == "windows" { + // Windows target, Unix host. Need to convert target '\' to host '/'. + let converted = os_str + .as_bytes() + .iter() + .map(|&wchar| if wchar == '/' as u8 { '\\' as u8 } else { wchar }) + .collect::>(); + Cow::Owned(PathBuf::from(OsString::from_vec(converted))) + } else { + // Unix-on-Unix, all is fine. + Cow::Borrowed(Path::new(os_str)) + }); + } + + /// Write a Path to the machine memory, adjusting path separators if needed. + fn write_path_to_c_str( + &mut self, + path: &Path, + scalar: Scalar, + size: u64, + ) -> InterpResult<'tcx, (bool, u64)> { + let this = self.eval_context_mut(); + + #[cfg(windows)] + let os_str = if this.tcx.sess.target.target.target_os == "windows" { + // Windows-on-Windows, all fine. + Cow::Borrowed(path.as_os_str()) + } else { + // Unix target, Windows host. Need to convert host '\\' to target '/'. + let converted = path + .as_os_str() + .encode_wide() + .map(|wchar| if wchar == '\\' as u16 { '/' as u16 } else { wchar }) + .collect::>(); + Cow::Owned(OsString::from_wide(&converted)) + }; + #[cfg(unix)] + let os_str = if this.tcx.sess.target.target.target_os == "windows" { + // Windows target, Unix host. Need to convert host '/' to target '\'. + let converted = path + .as_os_str() + .as_bytes() + .iter() + .map(|&wchar| if wchar == '/' as u8 { '\\' as u8 } else { wchar }) + .collect::>(); + Cow::Owned(OsString::from_vec(converted)) + } else { + // Unix-on-Unix, all is fine. + Cow::Borrowed(path.as_os_str()) + }; + + this.write_os_str_to_c_str(&os_str, scalar, size) + } +}