From 2f9bd1a2366f2048de4846e0933bb35485d5d91e Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Tue, 30 Aug 2022 03:06:22 -0700 Subject: [PATCH 1/2] Avoid zeroing large stack buffers in stdio on Windows --- library/std/src/sys/windows/stdio.rs | 41 ++++++++++++++++++---------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/library/std/src/sys/windows/stdio.rs b/library/std/src/sys/windows/stdio.rs index a001d6b9858..2b11332714d 100644 --- a/library/std/src/sys/windows/stdio.rs +++ b/library/std/src/sys/windows/stdio.rs @@ -3,6 +3,7 @@ use crate::char::decode_utf16; use crate::cmp; use crate::io; +use crate::mem::MaybeUninit; use crate::os::windows::io::{FromRawHandle, IntoRawHandle}; use crate::ptr; use crate::str; @@ -169,13 +170,14 @@ fn write( } fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result { - let mut utf16 = [0u16; MAX_BUFFER_SIZE / 2]; + let mut utf16: [MaybeUninit; MAX_BUFFER_SIZE / 2] = MaybeUninit::uninit_array(); let mut len_utf16 = 0; for (chr, dest) in utf8.encode_utf16().zip(utf16.iter_mut()) { - *dest = chr; + *dest = MaybeUninit::new(chr); len_utf16 += 1; } - let utf16 = &utf16[..len_utf16]; + // Safety: We've initialized `len_utf16` values. + let utf16: &[u16] = unsafe { MaybeUninit::slice_assume_init_ref(&utf16[..len_utf16]) }; let mut written = write_u16s(handle, &utf16)?; @@ -250,11 +252,14 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { return Ok(bytes_copied); } else if buf.len() - bytes_copied < 4 { // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8. - let mut utf16_buf = [0u16; 1]; + let mut utf16_buf = [MaybeUninit::new(1); 1]; // Read one u16 character. let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?; // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space. - let read_bytes = utf16_to_utf8(&utf16_buf[..read], &mut self.incomplete_utf8.bytes)?; + let read_bytes = utf16_to_utf8( + unsafe { MaybeUninit::slice_assume_init_ref(&utf16_buf[..read]) }, + &mut self.incomplete_utf8.bytes, + )?; // Read in the bytes from incomplete_utf8 until the buffer is full. self.incomplete_utf8.len = read_bytes as u8; @@ -262,15 +267,18 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]); Ok(bytes_copied) } else { - let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2]; + let mut utf16_buf: [MaybeUninit; MAX_BUFFER_SIZE / 2] = + MaybeUninit::uninit_array(); // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets // lost. let amount = cmp::min(buf.len() / 3, utf16_buf.len()); let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?; - - match utf16_to_utf8(&utf16_buf[..read], buf) { + // Safety `read_u16s_fixup_surrogates` returns the number of items + // initialized. + let utf16s = unsafe { MaybeUninit::slice_assume_init_ref(&utf16_buf[..read]) }; + match utf16_to_utf8(utf16s, buf) { Ok(value) => return Ok(bytes_copied + value), Err(e) => return Err(e), } @@ -283,14 +291,14 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { // This is a best effort, and might not work if we are not the only reader on Stdin. fn read_u16s_fixup_surrogates( handle: c::HANDLE, - buf: &mut [u16], + buf: &mut [MaybeUninit], mut amount: usize, surrogate: &mut u16, ) -> io::Result { // Insert possibly remaining unpaired surrogate from last read. let mut start = 0; if *surrogate != 0 { - buf[0] = *surrogate; + buf[0] = MaybeUninit::new(*surrogate); *surrogate = 0; start = 1; if amount == 1 { @@ -303,7 +311,10 @@ fn read_u16s_fixup_surrogates( let mut amount = read_u16s(handle, &mut buf[start..amount])? + start; if amount > 0 { - let last_char = buf[amount - 1]; + // Safety: The returned `amount` is the number of values initialized, + // and it is not 0, so we know that `buf[amount - 1]` have been + // initialized. + let last_char = unsafe { buf[amount - 1].assume_init() }; if last_char >= 0xD800 && last_char <= 0xDBFF { // high surrogate *surrogate = last_char; @@ -313,7 +324,8 @@ fn read_u16s_fixup_surrogates( Ok(amount) } -fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result { +// Returns `Ok(n)` if it initialized `n` values in `buf`. +fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit]) -> io::Result { // Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the // traditional DOS method to indicate end of character stream / user input (SUB). // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole. @@ -346,8 +358,9 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result { } break; } - - if amount > 0 && buf[amount as usize - 1] == CTRL_Z { + // Safety: if `amount > 0`, then that many bytes were written, so + // `buf[amount as usize - 1]` has been initialized. + if amount > 0 && unsafe { buf[amount as usize - 1].assume_init() } == CTRL_Z { amount -= 1; } Ok(amount as usize) From 1b8b2dc2ff9196ba4532c7d6f2775e2112bfc060 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Tue, 30 Aug 2022 06:10:55 -0700 Subject: [PATCH 2/2] Avoid `MaybeUninit::uninit_array()` --- library/std/src/sys/windows/stdio.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/library/std/src/sys/windows/stdio.rs b/library/std/src/sys/windows/stdio.rs index 2b11332714d..70c9b14a08f 100644 --- a/library/std/src/sys/windows/stdio.rs +++ b/library/std/src/sys/windows/stdio.rs @@ -170,7 +170,7 @@ fn write( } fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result { - let mut utf16: [MaybeUninit; MAX_BUFFER_SIZE / 2] = MaybeUninit::uninit_array(); + let mut utf16 = [MaybeUninit::::uninit(); MAX_BUFFER_SIZE / 2]; let mut len_utf16 = 0; for (chr, dest) in utf8.encode_utf16().zip(utf16.iter_mut()) { *dest = MaybeUninit::new(chr); @@ -252,7 +252,7 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { return Ok(bytes_copied); } else if buf.len() - bytes_copied < 4 { // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8. - let mut utf16_buf = [MaybeUninit::new(1); 1]; + let mut utf16_buf = [MaybeUninit::new(0); 1]; // Read one u16 character. let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?; // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space. @@ -267,8 +267,8 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]); Ok(bytes_copied) } else { - let mut utf16_buf: [MaybeUninit; MAX_BUFFER_SIZE / 2] = - MaybeUninit::uninit_array(); + let mut utf16_buf = [MaybeUninit::::uninit(); MAX_BUFFER_SIZE / 2]; + // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets // lost.