unify read_to_end and io::copy impls for reading into a Vec
This commit is contained in:
parent
820f06b21f
commit
bc7dd5fa6d
@ -1,7 +1,6 @@
|
|||||||
use super::{BorrowedBuf, BufReader, BufWriter, Read, Result, Write, DEFAULT_BUF_SIZE};
|
use super::{BorrowedBuf, BufReader, BufWriter, Read, Result, Write, DEFAULT_BUF_SIZE};
|
||||||
use crate::alloc::Allocator;
|
use crate::alloc::Allocator;
|
||||||
use crate::cmp;
|
use crate::cmp;
|
||||||
use crate::cmp::min;
|
|
||||||
use crate::collections::VecDeque;
|
use crate::collections::VecDeque;
|
||||||
use crate::io::IoSlice;
|
use crate::io::IoSlice;
|
||||||
use crate::mem::MaybeUninit;
|
use crate::mem::MaybeUninit;
|
||||||
@ -256,79 +255,17 @@ fn copy_from<R: Read + ?Sized>(&mut self, reader: &mut R) -> Result<u64> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A: Allocator> BufferedWriterSpec for Vec<u8, A> {
|
impl BufferedWriterSpec for Vec<u8> {
|
||||||
fn buffer_size(&self) -> usize {
|
fn buffer_size(&self) -> usize {
|
||||||
cmp::max(DEFAULT_BUF_SIZE, self.capacity() - self.len())
|
cmp::max(DEFAULT_BUF_SIZE, self.capacity() - self.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn copy_from<R: Read + ?Sized>(&mut self, reader: &mut R) -> Result<u64> {
|
fn copy_from<R: Read + ?Sized>(&mut self, reader: &mut R) -> Result<u64> {
|
||||||
let mut bytes = 0;
|
reader.read_to_end(self).map(|bytes| u64::try_from(bytes).expect("usize overflowed u64"))
|
||||||
|
|
||||||
// avoid inflating empty/small vecs before we have determined that there's anything to read
|
|
||||||
if self.capacity() < DEFAULT_BUF_SIZE {
|
|
||||||
let stack_read_limit = DEFAULT_BUF_SIZE as u64;
|
|
||||||
bytes = stack_buffer_copy(&mut reader.take(stack_read_limit), self)?;
|
|
||||||
// fewer bytes than requested -> EOF reached
|
|
||||||
if bytes < stack_read_limit {
|
|
||||||
return Ok(bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't immediately offer the vec's whole spare capacity, otherwise
|
|
||||||
// we might have to fully initialize it if the reader doesn't have a custom read_buf() impl
|
|
||||||
let mut max_read_size = DEFAULT_BUF_SIZE;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
self.reserve(DEFAULT_BUF_SIZE);
|
|
||||||
let mut initialized_spare_capacity = 0;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
let buf = self.spare_capacity_mut();
|
|
||||||
let read_size = min(max_read_size, buf.len());
|
|
||||||
let mut buf = BorrowedBuf::from(&mut buf[..read_size]);
|
|
||||||
// SAFETY: init is either 0 or the init_len from the previous iteration.
|
|
||||||
unsafe {
|
|
||||||
buf.set_init(initialized_spare_capacity);
|
|
||||||
}
|
|
||||||
match reader.read_buf(buf.unfilled()) {
|
|
||||||
Ok(()) => {
|
|
||||||
let bytes_read = buf.len();
|
|
||||||
|
|
||||||
// EOF
|
|
||||||
if bytes_read == 0 {
|
|
||||||
return Ok(bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
// the reader is returning short reads but it doesn't call ensure_init()
|
|
||||||
if buf.init_len() < buf.capacity() {
|
|
||||||
max_read_size = usize::MAX;
|
|
||||||
}
|
|
||||||
// the reader hasn't returned short reads so far
|
|
||||||
if bytes_read == buf.capacity() {
|
|
||||||
max_read_size *= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
initialized_spare_capacity = buf.init_len() - bytes_read;
|
|
||||||
bytes += bytes_read as u64;
|
|
||||||
// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
|
|
||||||
// and the number of read bytes can't exceed the spare capacity since
|
|
||||||
// that's what the buffer is borrowing from.
|
|
||||||
unsafe { self.set_len(self.len() + bytes_read) };
|
|
||||||
|
|
||||||
// spare capacity full, reserve more
|
|
||||||
if self.len() == self.capacity() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) if e.is_interrupted() => continue,
|
|
||||||
Err(e) => return Err(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
|
pub fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
writer: &mut W,
|
writer: &mut W,
|
||||||
) -> Result<u64> {
|
) -> Result<u64> {
|
||||||
|
@ -82,13 +82,16 @@ fn copy_specializes_bufreader() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn copy_specializes_to_vec() {
|
fn copy_specializes_to_vec() {
|
||||||
let cap = 123456;
|
let cap = DEFAULT_BUF_SIZE * 10;
|
||||||
let mut source = ShortReader { cap, observed_buffer: 0, read_size: 1337 };
|
let mut source = ShortReader { cap, observed_buffer: 0, read_size: DEFAULT_BUF_SIZE };
|
||||||
let mut sink = Vec::new();
|
let mut sink = Vec::new();
|
||||||
assert_eq!(cap as u64, io::copy(&mut source, &mut sink).unwrap());
|
let copied = io::copy(&mut source, &mut sink).unwrap();
|
||||||
|
assert_eq!(cap as u64, copied);
|
||||||
|
assert_eq!(sink.len() as u64, copied);
|
||||||
assert!(
|
assert!(
|
||||||
source.observed_buffer > DEFAULT_BUF_SIZE,
|
source.observed_buffer > DEFAULT_BUF_SIZE,
|
||||||
"expected a large buffer to be provided to the reader"
|
"expected a large buffer to be provided to the reader, got {}",
|
||||||
|
source.observed_buffer
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -397,12 +397,16 @@ pub(crate) unsafe fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This uses an adaptive system to extend the vector when it fills. We want to
|
// Here we must serve many masters with conflicting goals:
|
||||||
// avoid paying to allocate and zero a huge chunk of memory if the reader only
|
//
|
||||||
// has 4 bytes while still making large reads if the reader does have a ton
|
// - avoid allocating unless necessary
|
||||||
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
|
// - avoid overallocating if we know the exact size (#89165)
|
||||||
// time is 4,500 times (!) slower than a default reservation size of 32 if the
|
// - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820)
|
||||||
// reader has a very small amount of data to return.
|
// - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads
|
||||||
|
// - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems
|
||||||
|
// at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost
|
||||||
|
// proportional to buffer size (#110650)
|
||||||
|
//
|
||||||
pub(crate) fn default_read_to_end<R: Read + ?Sized>(
|
pub(crate) fn default_read_to_end<R: Read + ?Sized>(
|
||||||
r: &mut R,
|
r: &mut R,
|
||||||
buf: &mut Vec<u8>,
|
buf: &mut Vec<u8>,
|
||||||
@ -412,20 +416,58 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
|
|||||||
let start_cap = buf.capacity();
|
let start_cap = buf.capacity();
|
||||||
// Optionally limit the maximum bytes read on each iteration.
|
// Optionally limit the maximum bytes read on each iteration.
|
||||||
// This adds an arbitrary fiddle factor to allow for more data than we expect.
|
// This adds an arbitrary fiddle factor to allow for more data than we expect.
|
||||||
let max_read_size =
|
let mut max_read_size = size_hint
|
||||||
size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE));
|
.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE))
|
||||||
|
.unwrap_or(DEFAULT_BUF_SIZE);
|
||||||
|
|
||||||
let mut initialized = 0; // Extra initialized bytes from previous loop iteration
|
let mut initialized = 0; // Extra initialized bytes from previous loop iteration
|
||||||
|
|
||||||
|
const PROBE_SIZE: usize = 32;
|
||||||
|
|
||||||
|
fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
|
||||||
|
let mut probe = [0u8; PROBE_SIZE];
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match r.read(&mut probe) {
|
||||||
|
Ok(n) => {
|
||||||
|
buf.extend_from_slice(&probe[..n]);
|
||||||
|
return Ok(n);
|
||||||
|
}
|
||||||
|
Err(ref e) if e.is_interrupted() => continue,
|
||||||
|
Err(e) => return Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// avoid inflating empty/small vecs before we have determined that there's anything to read
|
||||||
|
if (size_hint.is_none() || size_hint == Some(0)) && buf.capacity() - buf.len() < PROBE_SIZE {
|
||||||
|
let read = small_probe_read(r, buf)?;
|
||||||
|
|
||||||
|
if read == 0 {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
if buf.len() == buf.capacity() && buf.capacity() == start_cap {
|
||||||
|
// The buffer might be an exact fit. Let's read into a probe buffer
|
||||||
|
// and see if it returns `Ok(0)`. If so, we've avoided an
|
||||||
|
// unnecessary doubling of the capacity. But if not, append the
|
||||||
|
// probe buffer to the primary buffer and let its capacity grow.
|
||||||
|
let read = small_probe_read(r, buf)?;
|
||||||
|
|
||||||
|
if read == 0 {
|
||||||
|
return Ok(buf.len() - start_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if buf.len() == buf.capacity() {
|
if buf.len() == buf.capacity() {
|
||||||
buf.reserve(32); // buf is full, need more space
|
buf.reserve(PROBE_SIZE); // buf is full, need more space
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut spare = buf.spare_capacity_mut();
|
let mut spare = buf.spare_capacity_mut();
|
||||||
if let Some(size) = max_read_size {
|
let buf_len = cmp::min(spare.len(), max_read_size);
|
||||||
let len = cmp::min(spare.len(), size);
|
spare = &mut spare[..buf_len];
|
||||||
spare = &mut spare[..len]
|
|
||||||
}
|
|
||||||
let mut read_buf: BorrowedBuf<'_> = spare.into();
|
let mut read_buf: BorrowedBuf<'_> = spare.into();
|
||||||
|
|
||||||
// SAFETY: These bytes were initialized but not filled in the previous loop
|
// SAFETY: These bytes were initialized but not filled in the previous loop
|
||||||
@ -434,42 +476,44 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut cursor = read_buf.unfilled();
|
let mut cursor = read_buf.unfilled();
|
||||||
match r.read_buf(cursor.reborrow()) {
|
loop {
|
||||||
Ok(()) => {}
|
match r.read_buf(cursor.reborrow()) {
|
||||||
Err(e) if e.is_interrupted() => continue,
|
Ok(()) => break,
|
||||||
Err(e) => return Err(e),
|
Err(e) if e.is_interrupted() => continue,
|
||||||
|
Err(e) => return Err(e),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cursor.written() == 0 {
|
let unfilled_but_initialized = cursor.init_ref().len();
|
||||||
|
let bytes_read = cursor.written();
|
||||||
|
let was_fully_initialized = read_buf.init_len() == buf_len;
|
||||||
|
|
||||||
|
if bytes_read == 0 {
|
||||||
return Ok(buf.len() - start_len);
|
return Ok(buf.len() - start_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
// store how much was initialized but not filled
|
// store how much was initialized but not filled
|
||||||
initialized = cursor.init_ref().len();
|
initialized = unfilled_but_initialized;
|
||||||
|
|
||||||
// SAFETY: BorrowedBuf's invariants mean this much memory is initialized.
|
// SAFETY: BorrowedBuf's invariants mean this much memory is initialized.
|
||||||
unsafe {
|
unsafe {
|
||||||
let new_len = read_buf.filled().len() + buf.len();
|
let new_len = bytes_read + buf.len();
|
||||||
buf.set_len(new_len);
|
buf.set_len(new_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
if buf.len() == buf.capacity() && buf.capacity() == start_cap {
|
// Use heuristics to determine the max read size if no initial size hint was provided
|
||||||
// The buffer might be an exact fit. Let's read into a probe buffer
|
if size_hint.is_none() {
|
||||||
// and see if it returns `Ok(0)`. If so, we've avoided an
|
// The reader is returning short reads but it doesn't call ensure_init().
|
||||||
// unnecessary doubling of the capacity. But if not, append the
|
// In that case we no longer need to restrict read sizes to avoid
|
||||||
// probe buffer to the primary buffer and let its capacity grow.
|
// initialization costs.
|
||||||
let mut probe = [0u8; 32];
|
if !was_fully_initialized {
|
||||||
|
max_read_size = usize::MAX;
|
||||||
|
}
|
||||||
|
|
||||||
loop {
|
// we have passed a larger buffer than previously and the
|
||||||
match r.read(&mut probe) {
|
// reader still hasn't returned a short read
|
||||||
Ok(0) => return Ok(buf.len() - start_len),
|
if buf_len >= max_read_size && bytes_read == buf_len {
|
||||||
Ok(n) => {
|
max_read_size = max_read_size.saturating_mul(2);
|
||||||
buf.extend_from_slice(&probe[..n]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Err(ref e) if e.is_interrupted() => continue,
|
|
||||||
Err(e) => return Err(e),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user