2020-03-28 09:43:47 -05:00
|
|
|
use std::borrow::Cow;
|
|
|
|
use std::convert::TryFrom;
|
|
|
|
use std::ffi::{OsStr, OsString};
|
|
|
|
use std::iter;
|
2020-03-28 10:20:16 -05:00
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
2020-03-28 09:43:47 -05:00
|
|
|
#[cfg(unix)]
|
|
|
|
use std::os::unix::ffi::{OsStrExt, OsStringExt};
|
|
|
|
#[cfg(windows)]
|
|
|
|
use std::os::windows::ffi::{OsStrExt, OsStringExt};
|
|
|
|
|
2020-04-02 17:05:35 -05:00
|
|
|
use rustc_target::abi::LayoutOf;
|
2020-03-28 09:43:47 -05:00
|
|
|
|
|
|
|
use crate::*;
|
|
|
|
|
2020-03-29 12:10:23 -05:00
|
|
|
/// Represent how path separator conversion should be done.
|
2020-10-04 15:28:09 -05:00
|
|
|
pub enum PathConversion {
|
2020-03-29 12:10:23 -05:00
|
|
|
HostToTarget,
|
|
|
|
TargetToHost,
|
|
|
|
}
|
|
|
|
|
2020-10-04 07:00:26 -05:00
|
|
|
#[cfg(unix)]
|
|
|
|
pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
|
|
|
|
Ok(os_str.as_bytes())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(not(unix))]
|
|
|
|
pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
|
|
|
|
// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
|
|
|
|
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
|
|
|
|
// valid.
|
|
|
|
os_str
|
|
|
|
.to_str()
|
|
|
|
.map(|s| s.as_bytes())
|
|
|
|
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(unix)]
|
|
|
|
pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
|
|
|
|
Ok(OsStr::from_bytes(bytes))
|
|
|
|
}
|
|
|
|
#[cfg(not(unix))]
|
|
|
|
pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
|
|
|
|
let s = std::str::from_utf8(bytes)
|
|
|
|
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
|
|
|
|
Ok(OsStr::new(s))
|
|
|
|
}
|
|
|
|
|
2020-04-01 18:55:52 -05:00
|
|
|
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
|
2020-03-28 09:43:47 -05:00
|
|
|
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
|
|
|
|
/// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
|
|
|
|
/// the Unix APIs usually handle.
|
|
|
|
fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
|
|
|
|
where
|
|
|
|
'tcx: 'a,
|
|
|
|
'mir: 'a,
|
|
|
|
{
|
|
|
|
let this = self.eval_context_ref();
|
|
|
|
let bytes = this.memory.read_c_str(scalar)?;
|
2020-10-04 07:00:26 -05:00
|
|
|
bytes_to_os_str(bytes)
|
2020-03-28 09:43:47 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
|
|
|
|
/// which is what the Windows APIs usually handle.
|
|
|
|
fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString>
|
|
|
|
where
|
|
|
|
'tcx: 'a,
|
|
|
|
'mir: 'a,
|
|
|
|
{
|
|
|
|
#[cfg(windows)]
|
|
|
|
pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
|
|
|
|
Ok(OsString::from_wide(&u16_vec[..]))
|
|
|
|
}
|
|
|
|
#[cfg(not(windows))]
|
|
|
|
pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
|
|
|
|
let s = String::from_utf16(&u16_vec[..])
|
|
|
|
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
|
|
|
|
Ok(s.into())
|
|
|
|
}
|
|
|
|
|
|
|
|
let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?;
|
|
|
|
u16vec_to_osstring(u16_vec)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what
|
|
|
|
/// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying
|
|
|
|
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
|
|
|
|
/// terminator. It returns `Ok((true, length))` if the writing process was successful. The
|
|
|
|
/// string length returned does not include the null terminator.
|
|
|
|
fn write_os_str_to_c_str(
|
|
|
|
&mut self,
|
|
|
|
os_str: &OsStr,
|
|
|
|
scalar: Scalar<Tag>,
|
|
|
|
size: u64,
|
|
|
|
) -> InterpResult<'tcx, (bool, u64)> {
|
2020-10-04 07:00:26 -05:00
|
|
|
let bytes = os_str_to_bytes(os_str)?;
|
2020-03-28 09:43:47 -05:00
|
|
|
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null
|
|
|
|
// terminator to memory using the `ptr` pointer would cause an out-of-bounds access.
|
|
|
|
let string_length = u64::try_from(bytes.len()).unwrap();
|
|
|
|
if size <= string_length {
|
|
|
|
return Ok((false, string_length));
|
|
|
|
}
|
|
|
|
self.eval_context_mut()
|
|
|
|
.memory
|
|
|
|
.write_bytes(scalar, bytes.iter().copied().chain(iter::once(0u8)))?;
|
|
|
|
Ok((true, string_length))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what
|
|
|
|
/// the Windows APIs usually handle. This function returns `Ok((false, length))` without trying
|
|
|
|
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
|
|
|
|
/// terminator. It returns `Ok((true, length))` if the writing process was successful. The
|
|
|
|
/// string length returned does not include the null terminator.
|
|
|
|
fn write_os_str_to_wide_str(
|
|
|
|
&mut self,
|
|
|
|
os_str: &OsStr,
|
|
|
|
scalar: Scalar<Tag>,
|
|
|
|
size: u64,
|
|
|
|
) -> InterpResult<'tcx, (bool, u64)> {
|
|
|
|
#[cfg(windows)]
|
|
|
|
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
|
|
|
|
Ok(os_str.encode_wide().collect())
|
|
|
|
}
|
|
|
|
#[cfg(not(windows))]
|
|
|
|
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
|
|
|
|
// On non-Windows platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
|
|
|
|
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
|
|
|
|
// valid.
|
|
|
|
os_str
|
|
|
|
.to_str()
|
|
|
|
.map(|s| s.encode_utf16().collect())
|
|
|
|
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
|
|
|
|
}
|
|
|
|
|
|
|
|
let u16_vec = os_str_to_u16vec(os_str)?;
|
|
|
|
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required
|
|
|
|
// 0x0000 terminator to memory would cause an out-of-bounds access.
|
|
|
|
let string_length = u64::try_from(u16_vec.len()).unwrap();
|
|
|
|
if size <= string_length {
|
|
|
|
return Ok((false, string_length));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Store the UTF-16 string.
|
|
|
|
self.eval_context_mut()
|
|
|
|
.memory
|
|
|
|
.write_u16s(scalar, u16_vec.into_iter().chain(iter::once(0x0000)))?;
|
|
|
|
Ok((true, string_length))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes.
|
|
|
|
fn alloc_os_str_as_c_str(
|
|
|
|
&mut self,
|
|
|
|
os_str: &OsStr,
|
|
|
|
memkind: MemoryKind<MiriMemoryKind>,
|
|
|
|
) -> Pointer<Tag> {
|
|
|
|
let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator.
|
|
|
|
let this = self.eval_context_mut();
|
|
|
|
|
|
|
|
let arg_type = this.tcx.mk_array(this.tcx.types.u8, size);
|
|
|
|
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
|
|
|
|
assert!(self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap().0);
|
|
|
|
arg_place.ptr.assert_ptr()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`.
|
|
|
|
fn alloc_os_str_as_wide_str(
|
|
|
|
&mut self,
|
|
|
|
os_str: &OsStr,
|
|
|
|
memkind: MemoryKind<MiriMemoryKind>,
|
|
|
|
) -> Pointer<Tag> {
|
|
|
|
let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator.
|
|
|
|
let this = self.eval_context_mut();
|
|
|
|
|
|
|
|
let arg_type = this.tcx.mk_array(this.tcx.types.u16, size);
|
|
|
|
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
|
|
|
|
assert!(self.write_os_str_to_wide_str(os_str, arg_place.ptr, size).unwrap().0);
|
|
|
|
arg_place.ptr.assert_ptr()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Read a null-terminated sequence of bytes, and perform path separator conversion if needed.
|
|
|
|
fn read_path_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, Cow<'a, Path>>
|
|
|
|
where
|
|
|
|
'tcx: 'a,
|
|
|
|
'mir: 'a,
|
|
|
|
{
|
|
|
|
let this = self.eval_context_ref();
|
2020-03-29 12:10:23 -05:00
|
|
|
let os_str = this.read_os_str_from_c_str(scalar)?;
|
2020-03-28 09:43:47 -05:00
|
|
|
|
2020-10-04 15:28:09 -05:00
|
|
|
Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
|
2020-03-28 13:44:41 -05:00
|
|
|
Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
|
|
|
|
Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Read a null-terminated sequence of `u16`s, and perform path separator conversion if needed.
|
|
|
|
fn read_path_from_wide_str(&self, scalar: Scalar<Tag>) -> InterpResult<'tcx, PathBuf> {
|
|
|
|
let this = self.eval_context_ref();
|
2020-03-29 12:10:23 -05:00
|
|
|
let os_str = this.read_os_str_from_wide_str(scalar)?;
|
2020-03-28 13:44:41 -05:00
|
|
|
|
2021-05-16 04:28:01 -05:00
|
|
|
Ok(this
|
|
|
|
.convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost)
|
|
|
|
.into_owned()
|
|
|
|
.into())
|
2020-03-28 09:43:47 -05:00
|
|
|
}
|
|
|
|
|
2020-03-28 13:44:41 -05:00
|
|
|
/// Write a Path to the machine memory (as a null-terminated sequence of bytes),
|
|
|
|
/// adjusting path separators if needed.
|
2020-03-28 09:43:47 -05:00
|
|
|
fn write_path_to_c_str(
|
|
|
|
&mut self,
|
|
|
|
path: &Path,
|
|
|
|
scalar: Scalar<Tag>,
|
|
|
|
size: u64,
|
|
|
|
) -> InterpResult<'tcx, (bool, u64)> {
|
|
|
|
let this = self.eval_context_mut();
|
2021-05-16 04:28:01 -05:00
|
|
|
let os_str = this
|
|
|
|
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
|
2020-03-28 09:43:47 -05:00
|
|
|
this.write_os_str_to_c_str(&os_str, scalar, size)
|
|
|
|
}
|
2020-03-28 13:44:41 -05:00
|
|
|
|
|
|
|
/// Write a Path to the machine memory (as a null-terminated sequence of `u16`s),
|
|
|
|
/// adjusting path separators if needed.
|
|
|
|
fn write_path_to_wide_str(
|
|
|
|
&mut self,
|
|
|
|
path: &Path,
|
|
|
|
scalar: Scalar<Tag>,
|
|
|
|
size: u64,
|
|
|
|
) -> InterpResult<'tcx, (bool, u64)> {
|
|
|
|
let this = self.eval_context_mut();
|
2021-05-16 04:28:01 -05:00
|
|
|
let os_str = this
|
|
|
|
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
|
2020-03-28 13:44:41 -05:00
|
|
|
this.write_os_str_to_wide_str(&os_str, scalar, size)
|
|
|
|
}
|
2020-10-04 11:34:13 -05:00
|
|
|
|
|
|
|
fn convert_path_separator<'a>(
|
|
|
|
&self,
|
|
|
|
os_str: Cow<'a, OsStr>,
|
2020-10-04 15:28:09 -05:00
|
|
|
direction: PathConversion,
|
2020-10-04 11:34:13 -05:00
|
|
|
) -> Cow<'a, OsStr> {
|
|
|
|
let this = self.eval_context_ref();
|
2020-11-11 03:29:10 -06:00
|
|
|
let target_os = &this.tcx.sess.target.os;
|
2020-10-04 11:34:13 -05:00
|
|
|
#[cfg(windows)]
|
|
|
|
return if target_os == "windows" {
|
|
|
|
// Windows-on-Windows, all fine.
|
|
|
|
os_str
|
|
|
|
} else {
|
|
|
|
// Unix target, Windows host.
|
|
|
|
let (from, to) = match direction {
|
2020-10-04 15:28:09 -05:00
|
|
|
PathConversion::HostToTarget => ('\\', '/'),
|
|
|
|
PathConversion::TargetToHost => ('/', '\\'),
|
2020-10-04 11:34:13 -05:00
|
|
|
};
|
|
|
|
let converted = os_str
|
|
|
|
.encode_wide()
|
|
|
|
.map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
Cow::Owned(OsString::from_wide(&converted))
|
|
|
|
};
|
|
|
|
#[cfg(unix)]
|
|
|
|
return if target_os == "windows" {
|
|
|
|
// Windows target, Unix host.
|
|
|
|
let (from, to) = match direction {
|
2020-10-04 15:28:09 -05:00
|
|
|
PathConversion::HostToTarget => ('/', '\\'),
|
|
|
|
PathConversion::TargetToHost => ('\\', '/'),
|
2020-10-04 11:34:13 -05:00
|
|
|
};
|
|
|
|
let converted = os_str
|
|
|
|
.as_bytes()
|
|
|
|
.iter()
|
|
|
|
.map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
Cow::Owned(OsString::from_vec(converted))
|
|
|
|
} else {
|
|
|
|
// Unix-on-Unix, all is fine.
|
|
|
|
os_str
|
|
|
|
};
|
|
|
|
}
|
2020-03-28 13:44:41 -05:00
|
|
|
}
|