From 84b3c842239a97cedf2f2d131c0d5c2f30e85321 Mon Sep 17 00:00:00 2001 From: Max Heller Date: Wed, 22 Nov 2023 20:04:43 -0500 Subject: [PATCH 1/6] add missing period in `std::process::Command` docs --- library/std/src/process.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/process.rs b/library/std/src/process.rs index af6bef1a76e..6004ed51bd1 100644 --- a/library/std/src/process.rs +++ b/library/std/src/process.rs @@ -1108,7 +1108,7 @@ impl fmt::Debug for Command { /// /// The default format approximates a shell invocation of the program along with its /// arguments. It does not include most of the other command properties. The output is not guaranteed to work - /// (e.g. due to lack of shell-escaping or differences in path resolution) + /// (e.g. due to lack of shell-escaping or differences in path resolution). /// On some platforms you can use [the alternate syntax] to show more fields. /// /// Note that the debug implementation is platform-specific. From 101704257533adb2e700cb6f12b12e2e0de8b9af Mon Sep 17 00:00:00 2001 From: onur-ozkan Date: Sun, 26 Nov 2023 18:37:59 +0300 Subject: [PATCH 2/6] give dev-friendly error message for incorrect config profiles before this change, an incorrect profile would result in the following error: ```sh ... ... File "/home/nimda/devspace/onur-ozkan/rust/src/bootstrap/bootstrap.py", line 1088, in bootstrap with open(include_path) as included_toml: ^^^^^^^^^^^^^^^^^^ FileNotFoundError: [Errno 2] No such file or directory: '/home/nimda/devspace/onur-ozkan/rust/src/bootstrap/defaults/config.aaaa.toml' ``` with this change, the error message is now: ```sh ... ... File "/home/nimda/devspace/onur-ozkan/rust/src/bootstrap/bootstrap.py", line 1088, in bootstrap raise Exception("Unrecognized profile '{}'. Check src/bootstrap/defaults" Exception: Unrecognized profile 'aaaa'. Check src/bootstrap/defaults for available options. ``` Signed-off-by: onur-ozkan --- src/bootstrap/bootstrap.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/bootstrap/bootstrap.py b/src/bootstrap/bootstrap.py index 5a84e37f8cf..4691fb3ad6f 100644 --- a/src/bootstrap/bootstrap.py +++ b/src/bootstrap/bootstrap.py @@ -1083,6 +1083,11 @@ def bootstrap(args): include_file = 'config.{}.toml'.format(profile_aliases.get(profile) or profile) include_dir = os.path.join(rust_root, 'src', 'bootstrap', 'defaults') include_path = os.path.join(include_dir, include_file) + + if not os.path.exists(include_path): + raise Exception("Unrecognized config profile '{}'. Check src/bootstrap/defaults" + " for available options.".format(profile)) + # HACK: This works because `self.get_toml()` returns the first match it finds for a # specific key, so appending our defaults at the end allows the user to override them with open(include_path) as included_toml: From bc7dd5fa6d87d8a2826511a2e25aba976e4e1f25 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Fri, 24 Nov 2023 01:00:25 +0100 Subject: [PATCH 3/6] unify read_to_end and io::copy impls for reading into a Vec --- library/std/src/io/copy.rs | 69 +----------------- library/std/src/io/copy/tests.rs | 11 +-- library/std/src/io/mod.rs | 116 +++++++++++++++++++++---------- 3 files changed, 90 insertions(+), 106 deletions(-) diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs index 4d51a719f6c..d49866345cb 100644 --- a/library/std/src/io/copy.rs +++ b/library/std/src/io/copy.rs @@ -1,7 +1,6 @@ use super::{BorrowedBuf, BufReader, BufWriter, Read, Result, Write, DEFAULT_BUF_SIZE}; use crate::alloc::Allocator; use crate::cmp; -use crate::cmp::min; use crate::collections::VecDeque; use crate::io::IoSlice; use crate::mem::MaybeUninit; @@ -256,79 +255,17 @@ impl BufferedWriterSpec for BufWriter { } } -impl BufferedWriterSpec for Vec { +impl BufferedWriterSpec for Vec { fn buffer_size(&self) -> usize { cmp::max(DEFAULT_BUF_SIZE, self.capacity() - self.len()) } fn copy_from(&mut self, reader: &mut R) -> Result { - let mut bytes = 0; - - // avoid inflating empty/small vecs before we have determined that there's anything to read - if self.capacity() < DEFAULT_BUF_SIZE { - let stack_read_limit = DEFAULT_BUF_SIZE as u64; - bytes = stack_buffer_copy(&mut reader.take(stack_read_limit), self)?; - // fewer bytes than requested -> EOF reached - if bytes < stack_read_limit { - return Ok(bytes); - } - } - - // don't immediately offer the vec's whole spare capacity, otherwise - // we might have to fully initialize it if the reader doesn't have a custom read_buf() impl - let mut max_read_size = DEFAULT_BUF_SIZE; - - loop { - self.reserve(DEFAULT_BUF_SIZE); - let mut initialized_spare_capacity = 0; - - loop { - let buf = self.spare_capacity_mut(); - let read_size = min(max_read_size, buf.len()); - let mut buf = BorrowedBuf::from(&mut buf[..read_size]); - // SAFETY: init is either 0 or the init_len from the previous iteration. - unsafe { - buf.set_init(initialized_spare_capacity); - } - match reader.read_buf(buf.unfilled()) { - Ok(()) => { - let bytes_read = buf.len(); - - // EOF - if bytes_read == 0 { - return Ok(bytes); - } - - // the reader is returning short reads but it doesn't call ensure_init() - if buf.init_len() < buf.capacity() { - max_read_size = usize::MAX; - } - // the reader hasn't returned short reads so far - if bytes_read == buf.capacity() { - max_read_size *= 2; - } - - initialized_spare_capacity = buf.init_len() - bytes_read; - bytes += bytes_read as u64; - // SAFETY: BorrowedBuf guarantees all of its filled bytes are init - // and the number of read bytes can't exceed the spare capacity since - // that's what the buffer is borrowing from. - unsafe { self.set_len(self.len() + bytes_read) }; - - // spare capacity full, reserve more - if self.len() == self.capacity() { - break; - } - } - Err(e) if e.is_interrupted() => continue, - Err(e) => return Err(e), - } - } - } + reader.read_to_end(self).map(|bytes| u64::try_from(bytes).expect("usize overflowed u64")) } } -fn stack_buffer_copy( +pub fn stack_buffer_copy( reader: &mut R, writer: &mut W, ) -> Result { diff --git a/library/std/src/io/copy/tests.rs b/library/std/src/io/copy/tests.rs index af137eaf856..a1f909a3c53 100644 --- a/library/std/src/io/copy/tests.rs +++ b/library/std/src/io/copy/tests.rs @@ -82,13 +82,16 @@ fn copy_specializes_bufreader() { #[test] fn copy_specializes_to_vec() { - let cap = 123456; - let mut source = ShortReader { cap, observed_buffer: 0, read_size: 1337 }; + let cap = DEFAULT_BUF_SIZE * 10; + let mut source = ShortReader { cap, observed_buffer: 0, read_size: DEFAULT_BUF_SIZE }; let mut sink = Vec::new(); - assert_eq!(cap as u64, io::copy(&mut source, &mut sink).unwrap()); + let copied = io::copy(&mut source, &mut sink).unwrap(); + assert_eq!(cap as u64, copied); + assert_eq!(sink.len() as u64, copied); assert!( source.observed_buffer > DEFAULT_BUF_SIZE, - "expected a large buffer to be provided to the reader" + "expected a large buffer to be provided to the reader, got {}", + source.observed_buffer ); } diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 7d70a0bac24..29a4b727b66 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -397,12 +397,16 @@ where } } -// This uses an adaptive system to extend the vector when it fills. We want to -// avoid paying to allocate and zero a huge chunk of memory if the reader only -// has 4 bytes while still making large reads if the reader does have a ton -// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every -// time is 4,500 times (!) slower than a default reservation size of 32 if the -// reader has a very small amount of data to return. +// Here we must serve many masters with conflicting goals: +// +// - avoid allocating unless necessary +// - avoid overallocating if we know the exact size (#89165) +// - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820) +// - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads +// - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems +// at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost +// proportional to buffer size (#110650) +// pub(crate) fn default_read_to_end( r: &mut R, buf: &mut Vec, @@ -412,20 +416,58 @@ pub(crate) fn default_read_to_end( let start_cap = buf.capacity(); // Optionally limit the maximum bytes read on each iteration. // This adds an arbitrary fiddle factor to allow for more data than we expect. - let max_read_size = - size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE)); + let mut max_read_size = size_hint + .and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE)) + .unwrap_or(DEFAULT_BUF_SIZE); let mut initialized = 0; // Extra initialized bytes from previous loop iteration + + const PROBE_SIZE: usize = 32; + + fn small_probe_read(r: &mut R, buf: &mut Vec) -> Result { + let mut probe = [0u8; PROBE_SIZE]; + + loop { + match r.read(&mut probe) { + Ok(n) => { + buf.extend_from_slice(&probe[..n]); + return Ok(n); + } + Err(ref e) if e.is_interrupted() => continue, + Err(e) => return Err(e), + } + } + } + + // avoid inflating empty/small vecs before we have determined that there's anything to read + if (size_hint.is_none() || size_hint == Some(0)) && buf.capacity() - buf.len() < PROBE_SIZE { + let read = small_probe_read(r, buf)?; + + if read == 0 { + return Ok(0); + } + } + loop { + if buf.len() == buf.capacity() && buf.capacity() == start_cap { + // The buffer might be an exact fit. Let's read into a probe buffer + // and see if it returns `Ok(0)`. If so, we've avoided an + // unnecessary doubling of the capacity. But if not, append the + // probe buffer to the primary buffer and let its capacity grow. + let read = small_probe_read(r, buf)?; + + if read == 0 { + return Ok(buf.len() - start_len); + } + } + if buf.len() == buf.capacity() { - buf.reserve(32); // buf is full, need more space + buf.reserve(PROBE_SIZE); // buf is full, need more space } let mut spare = buf.spare_capacity_mut(); - if let Some(size) = max_read_size { - let len = cmp::min(spare.len(), size); - spare = &mut spare[..len] - } + let buf_len = cmp::min(spare.len(), max_read_size); + spare = &mut spare[..buf_len]; let mut read_buf: BorrowedBuf<'_> = spare.into(); // SAFETY: These bytes were initialized but not filled in the previous loop @@ -434,42 +476,44 @@ pub(crate) fn default_read_to_end( } let mut cursor = read_buf.unfilled(); - match r.read_buf(cursor.reborrow()) { - Ok(()) => {} - Err(e) if e.is_interrupted() => continue, - Err(e) => return Err(e), + loop { + match r.read_buf(cursor.reborrow()) { + Ok(()) => break, + Err(e) if e.is_interrupted() => continue, + Err(e) => return Err(e), + } } - if cursor.written() == 0 { + let unfilled_but_initialized = cursor.init_ref().len(); + let bytes_read = cursor.written(); + let was_fully_initialized = read_buf.init_len() == buf_len; + + if bytes_read == 0 { return Ok(buf.len() - start_len); } // store how much was initialized but not filled - initialized = cursor.init_ref().len(); + initialized = unfilled_but_initialized; // SAFETY: BorrowedBuf's invariants mean this much memory is initialized. unsafe { - let new_len = read_buf.filled().len() + buf.len(); + let new_len = bytes_read + buf.len(); buf.set_len(new_len); } - if buf.len() == buf.capacity() && buf.capacity() == start_cap { - // The buffer might be an exact fit. Let's read into a probe buffer - // and see if it returns `Ok(0)`. If so, we've avoided an - // unnecessary doubling of the capacity. But if not, append the - // probe buffer to the primary buffer and let its capacity grow. - let mut probe = [0u8; 32]; + // Use heuristics to determine the max read size if no initial size hint was provided + if size_hint.is_none() { + // The reader is returning short reads but it doesn't call ensure_init(). + // In that case we no longer need to restrict read sizes to avoid + // initialization costs. + if !was_fully_initialized { + max_read_size = usize::MAX; + } - loop { - match r.read(&mut probe) { - Ok(0) => return Ok(buf.len() - start_len), - Ok(n) => { - buf.extend_from_slice(&probe[..n]); - break; - } - Err(ref e) if e.is_interrupted() => continue, - Err(e) => return Err(e), - } + // we have passed a larger buffer than previously and the + // reader still hasn't returned a short read + if buf_len >= max_read_size && bytes_read == buf_len { + max_read_size = max_read_size.saturating_mul(2); } } } From 179e193db351ab94339f76d3f86c1514b1e41ca6 Mon Sep 17 00:00:00 2001 From: Cormac Relf Date: Tue, 28 Nov 2023 01:54:38 +1100 Subject: [PATCH 4/6] Perform LTO optimisations with wasm-ld + -Clinker-plugin-lto --- compiler/rustc_codegen_ssa/src/back/linker.rs | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index 4dd688c2234..b13d7d8616a 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -1302,6 +1302,8 @@ impl<'a> Linker for WasmLd<'a> { } fn optimize(&mut self) { + // The -O flag is, as of late 2023, only used for merging of strings and debuginfo, and + // only differentiates -O0 and -O1. It does not apply to LTO. self.cmd.arg(match self.sess.opts.optimize { OptLevel::No => "-O0", OptLevel::Less => "-O1", @@ -1354,7 +1356,31 @@ impl<'a> Linker for WasmLd<'a> { fn subsystem(&mut self, _subsystem: &str) {} fn linker_plugin_lto(&mut self) { - // Do nothing for now + match self.sess.opts.cg.linker_plugin_lto { + LinkerPluginLto::Disabled => { + // Nothing to do + } + LinkerPluginLto::LinkerPluginAuto => { + self.push_linker_plugin_lto_args(); + } + LinkerPluginLto::LinkerPlugin(_) => { + self.push_linker_plugin_lto_args(); + } + } + } +} + +impl<'a> WasmLd<'a> { + fn push_linker_plugin_lto_args(&mut self) { + let opt_level = match self.sess.opts.optimize { + config::OptLevel::No => "O0", + config::OptLevel::Less => "O1", + config::OptLevel::Default => "O2", + config::OptLevel::Aggressive => "O3", + // wasm-ld only handles integer LTO opt levels. Use O2 + config::OptLevel::Size | config::OptLevel::SizeMin => "O2", + }; + self.cmd.arg(&format!("--lto-{opt_level}")); } } From 72668dea6ba68c57d01ef98ae46924f0131fd8f0 Mon Sep 17 00:00:00 2001 From: r0cky Date: Sun, 26 Nov 2023 16:47:03 +0800 Subject: [PATCH 5/6] Remove dead codes --- src/tools/miri/src/shims/unix/fs.rs | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs index ba40a1b3c32..be30627cf0c 100644 --- a/src/tools/miri/src/shims/unix/fs.rs +++ b/src/tools/miri/src/shims/unix/fs.rs @@ -66,11 +66,6 @@ pub trait FileDescriptor: std::fmt::Debug + Any { fn is_tty(&self, _communicate_allowed: bool) -> bool { false } - - #[cfg(unix)] - fn as_unix_host_fd(&self) -> Option { - None - } } impl dyn FileDescriptor { @@ -150,12 +145,6 @@ impl FileDescriptor for FileHandle { Ok(Box::new(FileHandle { file: duplicated, writable: self.writable })) } - #[cfg(unix)] - fn as_unix_host_fd(&self) -> Option { - use std::os::unix::io::AsRawFd; - Some(self.file.as_raw_fd()) - } - fn is_tty(&self, communicate_allowed: bool) -> bool { communicate_allowed && self.file.is_terminal() } @@ -183,11 +172,6 @@ impl FileDescriptor for io::Stdin { Ok(Box::new(io::stdin())) } - #[cfg(unix)] - fn as_unix_host_fd(&self) -> Option { - Some(libc::STDIN_FILENO) - } - fn is_tty(&self, communicate_allowed: bool) -> bool { communicate_allowed && self.is_terminal() } @@ -220,11 +204,6 @@ impl FileDescriptor for io::Stdout { Ok(Box::new(io::stdout())) } - #[cfg(unix)] - fn as_unix_host_fd(&self) -> Option { - Some(libc::STDOUT_FILENO) - } - fn is_tty(&self, communicate_allowed: bool) -> bool { communicate_allowed && self.is_terminal() } @@ -250,11 +229,6 @@ impl FileDescriptor for io::Stderr { Ok(Box::new(io::stderr())) } - #[cfg(unix)] - fn as_unix_host_fd(&self) -> Option { - Some(libc::STDERR_FILENO) - } - fn is_tty(&self, communicate_allowed: bool) -> bool { communicate_allowed && self.is_terminal() } From 81cd7c5b11766ed1e3214a2233371fb6d72ed89c Mon Sep 17 00:00:00 2001 From: Krasimir Georgiev Date: Tue, 28 Nov 2023 12:10:59 +0000 Subject: [PATCH 6/6] update test for new LLVM 18 codegen LLVM at HEAD now emits `or disjoint`: https://buildkite.com/llvm-project/rust-llvm-integrate-prototype/builds/24076#018c1596-8153-488e-b622-951266a02f6c/741-774 --- tests/codegen/ascii-char.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/codegen/ascii-char.rs b/tests/codegen/ascii-char.rs index 4167becf5e9..711ffe7e1a5 100644 --- a/tests/codegen/ascii-char.rs +++ b/tests/codegen/ascii-char.rs @@ -14,7 +14,7 @@ pub fn unwrap_digit_from_remainder(v: u32) -> AsciiChar { // CHECK: %[[R:.+]] = urem i32 %v, 10 // CHECK-NEXT: %[[T:.+]] = trunc i32 %[[R]] to i8 - // CHECK-NEXT: %[[D:.+]] = or i8 %[[T]], 48 + // CHECK-NEXT: %[[D:.+]] = or{{( disjoint)?}} i8 %[[T]], 48 // CHECK-NEXT: ret i8 %[[D]] // CHECK-NOT: icmp