Rollup merge of #110586 - ChrisDenton:msvc-oem-output, r=workingjubilee

Fix Unreadable non-UTF-8 output on localized MSVC

Fixes #35785 by converting non UTF-8 linker output to Unicode using the OEM code page.

Before:

```text
  = note: Non-UTF-8 output: LINK : fatal error LNK1181: cannot open input file \'m\x84rchenhaft.obj\'\r\n
```

After:

```text
   = note: LINK : fatal error LNK1181: cannot open input file 'märchenhaft.obj'
```

The difference is more dramatic if using a non-ascii language pack for Windows.
This commit is contained in:
Matthias Krüger 2023-04-30 01:14:55 +02:00 committed by GitHub
commit 734e866e63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 96 additions and 1 deletions

View File

@ -3250,6 +3250,7 @@ dependencies = [
"tempfile", "tempfile",
"thorin-dwp", "thorin-dwp",
"tracing", "tracing",
"windows 0.46.0",
] ]
[[package]] [[package]]

View File

@ -49,3 +49,7 @@ libc = "0.2.50"
version = "0.30.1" version = "0.30.1"
default-features = false default-features = false
features = ["read_core", "elf", "macho", "pe", "unaligned", "archive", "write"] features = ["read_core", "elf", "macho", "pe", "unaligned", "archive", "write"]
[target.'cfg(windows)'.dependencies.windows]
version = "0.46.0"
features = ["Win32_Globalization"]

View File

@ -860,7 +860,7 @@ fn is_illegal_instruction(_status: &ExitStatus) -> bool {
if !prog.status.success() { if !prog.status.success() {
let mut output = prog.stderr.clone(); let mut output = prog.stderr.clone();
output.extend_from_slice(&prog.stdout); output.extend_from_slice(&prog.stdout);
let escaped_output = escape_string(&output); let escaped_output = escape_linker_output(&output, flavor);
// FIXME: Add UI tests for this error. // FIXME: Add UI tests for this error.
let err = errors::LinkingFailed { let err = errors::LinkingFailed {
linker_path: &linker_path, linker_path: &linker_path,
@ -1052,6 +1052,83 @@ fn escape_string(s: &[u8]) -> String {
} }
} }
#[cfg(not(windows))]
fn escape_linker_output(s: &[u8], _flavour: LinkerFlavor) -> String {
escape_string(s)
}
/// If the output of the msvc linker is not UTF-8 and the host is Windows,
/// then try to convert the string from the OEM encoding.
#[cfg(windows)]
fn escape_linker_output(s: &[u8], flavour: LinkerFlavor) -> String {
// This only applies to the actual MSVC linker.
if flavour != LinkerFlavor::Msvc(Lld::No) {
return escape_string(s);
}
match str::from_utf8(s) {
Ok(s) => return s.to_owned(),
Err(_) => match win::locale_byte_str_to_string(s, win::oem_code_page()) {
Some(s) => s,
// The string is not UTF-8 and isn't valid for the OEM code page
None => format!("Non-UTF-8 output: {}", s.escape_ascii()),
},
}
}
/// Wrappers around the Windows API.
#[cfg(windows)]
mod win {
use windows::Win32::Globalization::{
GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
};
/// Get the Windows system OEM code page. This is most notably the code page
/// used for link.exe's output.
pub fn oem_code_page() -> u32 {
unsafe {
let mut cp: u32 = 0;
// We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
// But the API requires us to pass the data as though it's a [u16] string.
let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
let len_written = GetLocaleInfoEx(
LOCALE_NAME_SYSTEM_DEFAULT,
LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
Some(data),
);
if len_written as usize == len { cp } else { CP_OEMCP }
}
}
/// Try to convert a multi-byte string to a UTF-8 string using the given code page
/// The string does not need to be null terminated.
///
/// This is implemented as a wrapper around `MultiByteToWideChar`.
/// See <https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar>
///
/// It will fail if the multi-byte string is longer than `i32::MAX` or if it contains
/// any invalid bytes for the expected encoding.
pub fn locale_byte_str_to_string(s: &[u8], code_page: u32) -> Option<String> {
// `MultiByteToWideChar` requires a length to be a "positive integer".
if s.len() > isize::MAX as usize {
return None;
}
// Error if the string is not valid for the expected code page.
let flags = MB_ERR_INVALID_CHARS;
// Call MultiByteToWideChar twice.
// First to calculate the length then to convert the string.
let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
if len > 0 {
let mut utf16 = vec![0; len as usize];
len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
if len > 0 {
return utf16.get(..len as usize).map(String::from_utf16_lossy);
}
}
None
}
}
fn add_sanitizer_libraries(sess: &Session, crate_type: CrateType, linker: &mut dyn Linker) { fn add_sanitizer_libraries(sess: &Session, crate_type: CrateType, linker: &mut dyn Linker) {
// On macOS the runtimes are distributed as dylibs which should be linked to // On macOS the runtimes are distributed as dylibs which should be linked to
// both executables and dynamic shared objects. Everywhere else the runtimes // both executables and dynamic shared objects. Everywhere else the runtimes

View File

@ -0,0 +1,6 @@
// build-fail
// compile-flags:-C link-arg=märchenhaft
// only-msvc
// error-pattern:= note: LINK : fatal error LNK1181:
// normalize-stderr-test "(\s*\|\n)\s*= note: .*\n" -> "$1"
pub fn main() {}

View File

@ -0,0 +1,7 @@
error: linking with `link.exe` failed: exit code: 1181
|
= note: LINK : fatal error LNK1181: cannot open input file 'märchenhaft.obj'
error: aborting due to previous error