334 lines
10 KiB
Rust
Raw Normal View History

// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
2014-11-24 16:21:39 -08:00
//! Implementation of `std::os` functionality for Windows
// FIXME: move various extern bindings from here into liblibc or
// something similar
use prelude::*;
2014-11-24 16:21:39 -08:00
use io::{IoResult, IoError};
2014-12-30 10:51:18 -08:00
use iter::repeat;
use libc::{c_int, c_void};
2014-11-24 16:21:39 -08:00
use libc;
use os;
use path::BytesContainer;
use ptr;
use slice;
use sys::fs::FileDesc;
use os::TMPBUF_SZ;
2014-11-24 16:21:39 -08:00
use libc::types::os::arch::extra::DWORD;
const BUF_BYTES : uint = 2048u;
std: Stabilize the std::str module This commit starts out by consolidating all `str` extension traits into one `StrExt` trait to be included in the prelude. This means that `UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into one `StrExt` exported by the standard library. Some functionality is currently duplicated with the `StrExt` present in libcore. This commit also currently avoids any methods which require any form of pattern to operate. These functions will be stabilized via a separate RFC. Next, stability of methods and structures are as follows: Stable * from_utf8_unchecked * CowString - after moving to std::string * StrExt::as_bytes * StrExt::as_ptr * StrExt::bytes/Bytes - also made a struct instead of a typedef * StrExt::char_indices/CharIndices - CharOffsets was renamed * StrExt::chars/Chars * StrExt::is_empty * StrExt::len * StrExt::lines/Lines * StrExt::lines_any/LinesAny * StrExt::slice_unchecked * StrExt::trim * StrExt::trim_left * StrExt::trim_right * StrExt::words/Words - also made a struct instead of a typedef Unstable * from_utf8 - the error type was changed to a `Result`, but the error type has yet to prove itself * from_c_str - this function will be handled by the c_str RFC * FromStr - this trait will have an associated error type eventually * StrExt::escape_default - needs iterators at least, unsure if it should make the cut * StrExt::escape_unicode - needs iterators at least, unsure if it should make the cut * StrExt::slice_chars - this function has yet to prove itself * StrExt::slice_shift_char - awaiting conventions about slicing and shifting * StrExt::graphemes/Graphemes - this functionality may only be in libunicode * StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in libunicode * StrExt::width - this functionality may only be in libunicode * StrExt::utf16_units - this functionality may only be in libunicode * StrExt::nfd_chars - this functionality may only be in libunicode * StrExt::nfkd_chars - this functionality may only be in libunicode * StrExt::nfc_chars - this functionality may only be in libunicode * StrExt::nfkc_chars - this functionality may only be in libunicode * StrExt::is_char_boundary - naming is uncertain with container conventions * StrExt::char_range_at - naming is uncertain with container conventions * StrExt::char_range_at_reverse - naming is uncertain with container conventions * StrExt::char_at - naming is uncertain with container conventions * StrExt::char_at_reverse - naming is uncertain with container conventions * StrVector::concat - this functionality may be replaced with iterators, but it's not certain at this time * StrVector::connect - as with concat, may be deprecated in favor of iterators Deprecated * StrAllocating and UnicodeStrPrelude have been merged into StrExit * eq_slice - compiler implementation detail * from_str - use the inherent parse() method * is_utf8 - call from_utf8 instead * replace - call the method instead * truncate_utf16_at_nul - this is an implementation detail of windows and does not need to be exposed. * utf8_char_width - moved to libunicode * utf16_items - moved to libunicode * is_utf16 - moved to libunicode * Utf16Items - moved to libunicode * Utf16Item - moved to libunicode * Utf16Encoder - moved to libunicode * AnyLines - renamed to LinesAny and made a struct * SendStr - use CowString<'static> instead * str::raw - all functionality is deprecated * StrExt::into_string - call to_string() instead * StrExt::repeat - use iterators instead * StrExt::char_len - use .chars().count() instead * StrExt::is_alphanumeric - use .chars().all(..) * StrExt::is_whitespace - use .chars().all(..) Pending deprecation -- while slicing syntax is being worked out, these methods are all #[unstable] * Str - while currently used for generic programming, this trait will be replaced with one of [], deref coercions, or a generic conversion trait. * StrExt::slice - use slicing syntax instead * StrExt::slice_to - use slicing syntax instead * StrExt::slice_from - use slicing syntax instead * StrExt::lev_distance - deprecated with no replacement Awaiting stabilization due to patterns and/or matching * StrExt::contains * StrExt::contains_char * StrExt::split * StrExt::splitn * StrExt::split_terminator * StrExt::rsplitn * StrExt::match_indices * StrExt::split_str * StrExt::starts_with * StrExt::ends_with * StrExt::trim_chars * StrExt::trim_left_chars * StrExt::trim_right_chars * StrExt::find * StrExt::rfind * StrExt::find_str * StrExt::subslice_offset
2014-12-10 09:02:31 -08:00
/// Return a slice of `v` ending at (and not including) the first NUL
/// (0).
pub fn truncate_utf16_at_nul<'a>(v: &'a [u16]) -> &'a [u16] {
match v.iter().position(|c| *c == 0) {
// don't include the 0
Some(i) => v[..i],
None => v
}
}
pub fn errno() -> uint {
use libc::types::os::arch::extra::DWORD;
#[link_name = "kernel32"]
extern "system" {
fn GetLastError() -> DWORD;
}
unsafe {
GetLastError() as uint
}
}
/// Get a detailed string description for the given error number
pub fn error_string(errnum: i32) -> String {
use libc::types::os::arch::extra::DWORD;
use libc::types::os::arch::extra::LPWSTR;
use libc::types::os::arch::extra::LPVOID;
use libc::types::os::arch::extra::WCHAR;
#[link_name = "kernel32"]
extern "system" {
fn FormatMessageW(flags: DWORD,
lpSrc: LPVOID,
msgId: DWORD,
langId: DWORD,
buf: LPWSTR,
nsize: DWORD,
args: *const c_void)
-> DWORD;
}
static FORMAT_MESSAGE_FROM_SYSTEM: DWORD = 0x00001000;
static FORMAT_MESSAGE_IGNORE_INSERTS: DWORD = 0x00000200;
// This value is calculated from the macro
// MAKELANGID(LANG_SYSTEM_DEFAULT, SUBLANG_SYS_DEFAULT)
let langId = 0x0800 as DWORD;
let mut buf = [0 as WCHAR, ..TMPBUF_SZ];
unsafe {
let res = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
ptr::null_mut(),
errnum as DWORD,
langId,
buf.as_mut_ptr(),
buf.len() as DWORD,
ptr::null());
if res == 0 {
// Sometimes FormatMessageW can fail e.g. system doesn't like langId,
let fm_err = errno();
return format!("OS Error {} (FormatMessageW() returned error {})", errnum, fm_err);
}
std: Stabilize the std::str module This commit starts out by consolidating all `str` extension traits into one `StrExt` trait to be included in the prelude. This means that `UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into one `StrExt` exported by the standard library. Some functionality is currently duplicated with the `StrExt` present in libcore. This commit also currently avoids any methods which require any form of pattern to operate. These functions will be stabilized via a separate RFC. Next, stability of methods and structures are as follows: Stable * from_utf8_unchecked * CowString - after moving to std::string * StrExt::as_bytes * StrExt::as_ptr * StrExt::bytes/Bytes - also made a struct instead of a typedef * StrExt::char_indices/CharIndices - CharOffsets was renamed * StrExt::chars/Chars * StrExt::is_empty * StrExt::len * StrExt::lines/Lines * StrExt::lines_any/LinesAny * StrExt::slice_unchecked * StrExt::trim * StrExt::trim_left * StrExt::trim_right * StrExt::words/Words - also made a struct instead of a typedef Unstable * from_utf8 - the error type was changed to a `Result`, but the error type has yet to prove itself * from_c_str - this function will be handled by the c_str RFC * FromStr - this trait will have an associated error type eventually * StrExt::escape_default - needs iterators at least, unsure if it should make the cut * StrExt::escape_unicode - needs iterators at least, unsure if it should make the cut * StrExt::slice_chars - this function has yet to prove itself * StrExt::slice_shift_char - awaiting conventions about slicing and shifting * StrExt::graphemes/Graphemes - this functionality may only be in libunicode * StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in libunicode * StrExt::width - this functionality may only be in libunicode * StrExt::utf16_units - this functionality may only be in libunicode * StrExt::nfd_chars - this functionality may only be in libunicode * StrExt::nfkd_chars - this functionality may only be in libunicode * StrExt::nfc_chars - this functionality may only be in libunicode * StrExt::nfkc_chars - this functionality may only be in libunicode * StrExt::is_char_boundary - naming is uncertain with container conventions * StrExt::char_range_at - naming is uncertain with container conventions * StrExt::char_range_at_reverse - naming is uncertain with container conventions * StrExt::char_at - naming is uncertain with container conventions * StrExt::char_at_reverse - naming is uncertain with container conventions * StrVector::concat - this functionality may be replaced with iterators, but it's not certain at this time * StrVector::connect - as with concat, may be deprecated in favor of iterators Deprecated * StrAllocating and UnicodeStrPrelude have been merged into StrExit * eq_slice - compiler implementation detail * from_str - use the inherent parse() method * is_utf8 - call from_utf8 instead * replace - call the method instead * truncate_utf16_at_nul - this is an implementation detail of windows and does not need to be exposed. * utf8_char_width - moved to libunicode * utf16_items - moved to libunicode * is_utf16 - moved to libunicode * Utf16Items - moved to libunicode * Utf16Item - moved to libunicode * Utf16Encoder - moved to libunicode * AnyLines - renamed to LinesAny and made a struct * SendStr - use CowString<'static> instead * str::raw - all functionality is deprecated * StrExt::into_string - call to_string() instead * StrExt::repeat - use iterators instead * StrExt::char_len - use .chars().count() instead * StrExt::is_alphanumeric - use .chars().all(..) * StrExt::is_whitespace - use .chars().all(..) Pending deprecation -- while slicing syntax is being worked out, these methods are all #[unstable] * Str - while currently used for generic programming, this trait will be replaced with one of [], deref coercions, or a generic conversion trait. * StrExt::slice - use slicing syntax instead * StrExt::slice_to - use slicing syntax instead * StrExt::slice_from - use slicing syntax instead * StrExt::lev_distance - deprecated with no replacement Awaiting stabilization due to patterns and/or matching * StrExt::contains * StrExt::contains_char * StrExt::split * StrExt::splitn * StrExt::split_terminator * StrExt::rsplitn * StrExt::match_indices * StrExt::split_str * StrExt::starts_with * StrExt::ends_with * StrExt::trim_chars * StrExt::trim_left_chars * StrExt::trim_right_chars * StrExt::find * StrExt::rfind * StrExt::find_str * StrExt::subslice_offset
2014-12-10 09:02:31 -08:00
let msg = String::from_utf16(truncate_utf16_at_nul(&buf));
match msg {
2014-12-29 16:38:07 -08:00
Ok(msg) => format!("OS Error {}: {}", errnum, msg),
Err(..) => format!("OS Error {} (FormatMessageW() returned \
invalid UTF-16)", errnum),
}
}
}
pub unsafe fn pipe() -> IoResult<(FileDesc, FileDesc)> {
// Windows pipes work subtly differently than unix pipes, and their
// inheritance has to be handled in a different way that I do not
// fully understand. Here we explicitly make the pipe non-inheritable,
// which means to pass it to a subprocess they need to be duplicated
// first, as in std::run.
let mut fds = [0, ..2];
match libc::pipe(fds.as_mut_ptr(), 1024 as ::libc::c_uint,
(libc::O_BINARY | libc::O_NOINHERIT) as c_int) {
0 => {
assert!(fds[0] != -1 && fds[0] != 0);
assert!(fds[1] != -1 && fds[1] != 0);
Ok((FileDesc::new(fds[0], true), FileDesc::new(fds[1], true)))
}
_ => Err(IoError::last_error()),
}
}
2014-11-24 16:21:39 -08:00
pub fn fill_utf16_buf_and_decode(f: |*mut u16, DWORD| -> DWORD) -> Option<String> {
unsafe {
let mut n = TMPBUF_SZ as DWORD;
let mut res = None;
let mut done = false;
while !done {
2014-12-30 16:29:27 -08:00
let mut buf: Vec<u16> = repeat(0u16).take(n as uint).collect();
2014-11-24 16:21:39 -08:00
let k = f(buf.as_mut_ptr(), n);
if k == (0 as DWORD) {
done = true;
} else if k == n &&
libc::GetLastError() ==
libc::ERROR_INSUFFICIENT_BUFFER as DWORD {
n *= 2 as DWORD;
} else if k >= n {
n = k;
} else {
done = true;
}
if k != 0 && done {
let sub = buf.slice(0, k as uint);
// We want to explicitly catch the case when the
// closure returned invalid UTF-16, rather than
// set `res` to None and continue.
2014-12-29 16:38:07 -08:00
let s = String::from_utf16(sub).ok()
2014-11-24 16:21:39 -08:00
.expect("fill_utf16_buf_and_decode: closure created invalid UTF-16");
res = Some(s)
2014-11-24 16:21:39 -08:00
}
}
return res;
}
}
pub fn getcwd() -> IoResult<Path> {
use libc::DWORD;
use libc::GetCurrentDirectoryW;
use io::OtherIoError;
let mut buf = [0 as u16, ..BUF_BYTES];
unsafe {
if libc::GetCurrentDirectoryW(buf.len() as DWORD, buf.as_mut_ptr()) == 0 as DWORD {
return Err(IoError::last_error());
}
}
2014-12-10 19:46:38 -08:00
match String::from_utf16(truncate_utf16_at_nul(&buf)) {
2014-12-29 16:38:07 -08:00
Ok(ref cwd) => Ok(Path::new(cwd)),
Err(..) => Err(IoError {
2014-11-24 16:21:39 -08:00
kind: OtherIoError,
desc: "GetCurrentDirectoryW returned invalid UTF-16",
detail: None,
}),
}
}
pub unsafe fn get_env_pairs() -> Vec<Vec<u8>> {
use libc::funcs::extra::kernel32::{
GetEnvironmentStringsW,
FreeEnvironmentStringsW
};
let ch = GetEnvironmentStringsW();
if ch as uint == 0 {
panic!("os::env() failure getting env string from OS: {}",
os::last_os_error());
}
// Here, we lossily decode the string as UTF16.
//
// The docs suggest that the result should be in Unicode, but
// Windows doesn't guarantee it's actually UTF16 -- it doesn't
// validate the environment string passed to CreateProcess nor
// SetEnvironmentVariable. Yet, it's unlikely that returning a
// raw u16 buffer would be of practical use since the result would
// be inherently platform-dependent and introduce additional
// complexity to this code.
//
// Using the non-Unicode version of GetEnvironmentStrings is even
// worse since the result is in an OEM code page. Characters that
// can't be encoded in the code page would be turned into question
// marks.
let mut result = Vec::new();
let mut i = 0;
while *ch.offset(i) != 0 {
let p = &*ch.offset(i);
let mut len = 0;
while *(p as *const _).offset(len) != 0 {
len += 1;
}
let p = p as *const u16;
let s = slice::from_raw_buf(&p, len as uint);
result.push(String::from_utf16_lossy(s).into_bytes());
i += len as int + 1;
}
FreeEnvironmentStringsW(ch);
result
}
pub fn split_paths(unparsed: &[u8]) -> Vec<Path> {
// On Windows, the PATH environment variable is semicolon separated. Double
// quotes are used as a way of introducing literal semicolons (since
// c:\some;dir is a valid Windows path). Double quotes are not themselves
// permitted in path names, so there is no way to escape a double quote.
// Quoted regions can appear in arbitrary locations, so
//
// c:\foo;c:\som"e;di"r;c:\bar
//
// Should parse as [c:\foo, c:\some;dir, c:\bar].
//
// (The above is based on testing; there is no clear reference available
// for the grammar.)
let mut parsed = Vec::new();
let mut in_progress = Vec::new();
let mut in_quote = false;
for b in unparsed.iter() {
match *b {
b';' if !in_quote => {
parsed.push(Path::new(in_progress.as_slice()));
in_progress.truncate(0)
}
b'"' => {
in_quote = !in_quote;
}
_ => {
in_progress.push(*b);
}
}
}
parsed.push(Path::new(in_progress));
parsed
}
pub fn join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> {
let mut joined = Vec::new();
let sep = b';';
for (i, path) in paths.iter().map(|p| p.container_as_bytes()).enumerate() {
if i > 0 { joined.push(sep) }
if path.contains(&b'"') {
return Err("path segment contains `\"`");
} else if path.contains(&sep) {
joined.push(b'"');
joined.push_all(path);
joined.push(b'"');
} else {
joined.push_all(path);
}
}
Ok(joined)
}
pub fn load_self() -> Option<Vec<u8>> {
unsafe {
fill_utf16_buf_and_decode(|buf, sz| {
libc::GetModuleFileNameW(0u as libc::DWORD, buf, sz)
2014-12-10 19:46:38 -08:00
}).map(|s| s.to_string().into_bytes())
2014-11-24 16:21:39 -08:00
}
}
pub fn chdir(p: &Path) -> IoResult<()> {
let mut p = p.as_str().unwrap().utf16_units().collect::<Vec<u16>>();
p.push(0);
unsafe {
match libc::SetCurrentDirectoryW(p.as_ptr()) != (0 as libc::BOOL) {
true => Ok(()),
false => Err(IoError::last_error()),
}
}
}
pub fn page_size() -> uint {
use mem;
unsafe {
let mut info = mem::zeroed();
libc::GetSystemInfo(&mut info);
return info.dwPageSize as uint;
}
}
std: Stabilize the std::str module This commit starts out by consolidating all `str` extension traits into one `StrExt` trait to be included in the prelude. This means that `UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into one `StrExt` exported by the standard library. Some functionality is currently duplicated with the `StrExt` present in libcore. This commit also currently avoids any methods which require any form of pattern to operate. These functions will be stabilized via a separate RFC. Next, stability of methods and structures are as follows: Stable * from_utf8_unchecked * CowString - after moving to std::string * StrExt::as_bytes * StrExt::as_ptr * StrExt::bytes/Bytes - also made a struct instead of a typedef * StrExt::char_indices/CharIndices - CharOffsets was renamed * StrExt::chars/Chars * StrExt::is_empty * StrExt::len * StrExt::lines/Lines * StrExt::lines_any/LinesAny * StrExt::slice_unchecked * StrExt::trim * StrExt::trim_left * StrExt::trim_right * StrExt::words/Words - also made a struct instead of a typedef Unstable * from_utf8 - the error type was changed to a `Result`, but the error type has yet to prove itself * from_c_str - this function will be handled by the c_str RFC * FromStr - this trait will have an associated error type eventually * StrExt::escape_default - needs iterators at least, unsure if it should make the cut * StrExt::escape_unicode - needs iterators at least, unsure if it should make the cut * StrExt::slice_chars - this function has yet to prove itself * StrExt::slice_shift_char - awaiting conventions about slicing and shifting * StrExt::graphemes/Graphemes - this functionality may only be in libunicode * StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in libunicode * StrExt::width - this functionality may only be in libunicode * StrExt::utf16_units - this functionality may only be in libunicode * StrExt::nfd_chars - this functionality may only be in libunicode * StrExt::nfkd_chars - this functionality may only be in libunicode * StrExt::nfc_chars - this functionality may only be in libunicode * StrExt::nfkc_chars - this functionality may only be in libunicode * StrExt::is_char_boundary - naming is uncertain with container conventions * StrExt::char_range_at - naming is uncertain with container conventions * StrExt::char_range_at_reverse - naming is uncertain with container conventions * StrExt::char_at - naming is uncertain with container conventions * StrExt::char_at_reverse - naming is uncertain with container conventions * StrVector::concat - this functionality may be replaced with iterators, but it's not certain at this time * StrVector::connect - as with concat, may be deprecated in favor of iterators Deprecated * StrAllocating and UnicodeStrPrelude have been merged into StrExit * eq_slice - compiler implementation detail * from_str - use the inherent parse() method * is_utf8 - call from_utf8 instead * replace - call the method instead * truncate_utf16_at_nul - this is an implementation detail of windows and does not need to be exposed. * utf8_char_width - moved to libunicode * utf16_items - moved to libunicode * is_utf16 - moved to libunicode * Utf16Items - moved to libunicode * Utf16Item - moved to libunicode * Utf16Encoder - moved to libunicode * AnyLines - renamed to LinesAny and made a struct * SendStr - use CowString<'static> instead * str::raw - all functionality is deprecated * StrExt::into_string - call to_string() instead * StrExt::repeat - use iterators instead * StrExt::char_len - use .chars().count() instead * StrExt::is_alphanumeric - use .chars().all(..) * StrExt::is_whitespace - use .chars().all(..) Pending deprecation -- while slicing syntax is being worked out, these methods are all #[unstable] * Str - while currently used for generic programming, this trait will be replaced with one of [], deref coercions, or a generic conversion trait. * StrExt::slice - use slicing syntax instead * StrExt::slice_to - use slicing syntax instead * StrExt::slice_from - use slicing syntax instead * StrExt::lev_distance - deprecated with no replacement Awaiting stabilization due to patterns and/or matching * StrExt::contains * StrExt::contains_char * StrExt::split * StrExt::splitn * StrExt::split_terminator * StrExt::rsplitn * StrExt::match_indices * StrExt::split_str * StrExt::starts_with * StrExt::ends_with * StrExt::trim_chars * StrExt::trim_left_chars * StrExt::trim_right_chars * StrExt::find * StrExt::rfind * StrExt::find_str * StrExt::subslice_offset
2014-12-10 09:02:31 -08:00
#[cfg(test)]
mod tests {
use super::truncate_utf16_at_nul;
#[test]
fn test_truncate_utf16_at_nul() {
let v = [];
let b: &[u16] = &[];
assert_eq!(truncate_utf16_at_nul(&v), b);
let v = [0, 2, 3];
assert_eq!(truncate_utf16_at_nul(&v), b);
let v = [1, 0, 3];
let b: &[u16] = &[1];
assert_eq!(truncate_utf16_at_nul(&v), b);
let v = [1, 2, 0];
let b: &[u16] = &[1, 2];
assert_eq!(truncate_utf16_at_nul(&v), b);
let v = [1, 2, 3];
let b: &[u16] = &[1, 2, 3];
assert_eq!(truncate_utf16_at_nul(&v), b);
}
}