From 8d8f06b277a12e19189d6b08dbd9893db3283e9d Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sat, 4 Nov 2023 12:07:10 +0100 Subject: [PATCH 1/2] avoid excessive initialization when copying to a Vec It now keeps track of initialized bytes to avoid reinitialization. It also keeps track of read sizes to avoid initializing more bytes than the reader needs. This is important when passing a huge vector to a Read that only has a few bytes to offer and doesn't implement read_buf(). --- library/std/src/io/copy.rs | 64 ++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs index 57d226a3771..e05ec9bde24 100644 --- a/library/std/src/io/copy.rs +++ b/library/std/src/io/copy.rs @@ -1,6 +1,7 @@ use super::{BorrowedBuf, BufReader, BufWriter, Read, Result, Write, DEFAULT_BUF_SIZE}; use crate::alloc::Allocator; use crate::cmp; +use crate::cmp::min; use crate::collections::VecDeque; use crate::io::IoSlice; use crate::mem::MaybeUninit; @@ -271,28 +272,57 @@ fn copy_from(&mut self, reader: &mut R) -> Result { } } + // don't immediately offer the vec's whole spare capacity, otherwise + // we might have to fully initialize it if the reader doesn't have a custom read_buf() impl + let mut max_read_size = DEFAULT_BUF_SIZE; + loop { self.reserve(DEFAULT_BUF_SIZE); - let mut buf: BorrowedBuf<'_> = self.spare_capacity_mut().into(); - match reader.read_buf(buf.unfilled()) { - Ok(()) => {} - Err(e) if e.is_interrupted() => continue, - Err(e) => return Err(e), - }; + let mut initialized_spare_capacity = 0; - let read = buf.filled().len(); - if read == 0 { - break; + loop { + let buf = self.spare_capacity_mut(); + let read_size = min(max_read_size, buf.len()); + let mut buf = BorrowedBuf::from(&mut buf[..read_size]); + // SAFETY: init is either 0 or the init_len from the previous iteration. + unsafe { + buf.set_init(initialized_spare_capacity); + } + match reader.read_buf(buf.unfilled()) { + Ok(()) => { + let bytes_read = buf.len(); + + // EOF + if bytes_read == 0 { + return Ok(bytes); + } + + // the reader is returning short reads but it doesn't call ensure_init() + if buf.init_len() < buf.capacity() { + max_read_size = usize::MAX; + } + // the reader hasn't returned short reads so far + if bytes_read == buf.capacity() { + max_read_size *= 2; + } + + initialized_spare_capacity = buf.init_len() - bytes_read; + bytes += bytes_read as u64; + // SAFETY: BorrowedBuf guarantees all of its filled bytes are init + // and the number of read bytes can't exceed the spare capacity since + // that's what the buffer is borrowing from. + unsafe { self.set_len(self.len() + bytes_read) }; + + // spare capacity full, reserve more + if self.len() == self.capacity() { + break; + } + } + Err(e) if e.is_interrupted() => continue, + Err(e) => return Err(e), + } } - - // SAFETY: BorrowedBuf guarantees all of its filled bytes are init - // and the number of read bytes can't exceed the spare capacity since - // that's what the buffer is borrowing from. - unsafe { self.set_len(self.len() + read) }; - bytes += read as u64; } - - Ok(bytes) } } From 78aa5e511c0b205a742edffd48533670579ec247 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sat, 4 Nov 2023 12:09:27 +0100 Subject: [PATCH 2/2] detect EOF earlier The initial probe-for-empty-source by stack_buffer_copy only detected EOF if the source was empty but not when it was merely small which lead to additional calls to read() after Ok(0) had already been returned in the stack copy routine --- library/std/src/io/copy.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs index e05ec9bde24..4d51a719f6c 100644 --- a/library/std/src/io/copy.rs +++ b/library/std/src/io/copy.rs @@ -264,11 +264,13 @@ fn buffer_size(&self) -> usize { fn copy_from(&mut self, reader: &mut R) -> Result { let mut bytes = 0; - // avoid allocating before we have determined that there's anything to read - if self.capacity() == 0 { - bytes = stack_buffer_copy(&mut reader.take(DEFAULT_BUF_SIZE as u64), self)?; - if bytes == 0 { - return Ok(0); + // avoid inflating empty/small vecs before we have determined that there's anything to read + if self.capacity() < DEFAULT_BUF_SIZE { + let stack_read_limit = DEFAULT_BUF_SIZE as u64; + bytes = stack_buffer_copy(&mut reader.take(stack_read_limit), self)?; + // fewer bytes than requested -> EOF reached + if bytes < stack_read_limit { + return Ok(bytes); } }