Auto merge of #117925 - kornelski:read-to-oom, r=Amanieu

Handle out of memory errors in io:Read::read_to_end()

#116570 got stuck due to a [procedural confusion](https://github.com/rust-lang/rust/pull/116570#issuecomment-1768271068). Retrying so that it can get FCP with the proper team now. cc `@joshtriplett` `@BurntSushi`

----

I'd like to propose handling of out-of-memory errors in the default implementation of `io::Read::read_to_end()` and `fs::read()`. These methods create/grow a `Vec` with a size that is external to the program, and could be arbitrarily large.

Due to being I/O methods, they can already fail in a variety of ways, in theory even including `ENOMEM` from the OS too, so another failure case should not surprise anyone.

While this may not help much Linux with overcommit, it's useful for other platforms like WASM. [Internals thread](https://internals.rust-lang.org/t/io-read-read-to-end-should-handle-oom/19662).

I've added documentation that makes it explicit that the OOM handling is a nice-to-have, and not a guarantee of the trait.

I haven't changed the implementation of `impl Read for &[u8]` and `VecDeque` out of caution, because in these cases users could assume `read` can't fail.

This code uses `try_reserve()` + `extend_from_slice()` which is optimized since #117503.
This commit is contained in:
bors 2024-01-30 05:10:11 +00:00
commit 5c9c3c7871
4 changed files with 47 additions and 7 deletions

View File

@ -260,7 +260,8 @@ pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
fn inner(path: &Path) -> io::Result<Vec<u8>> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut bytes = Vec::with_capacity(size.unwrap_or(0));
let mut bytes = Vec::new();
bytes.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_end(&mut file, &mut bytes, size)?;
Ok(bytes)
}
@ -302,7 +303,8 @@ pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
fn inner(path: &Path) -> io::Result<String> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut string = String::with_capacity(size.unwrap_or(0));
let mut string = String::new();
string.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_string(&mut file, &mut string, size)?;
Ok(string)
}
@ -774,14 +776,14 @@ fn is_read_vectored(&self) -> bool {
// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
buf.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_end(self, buf, size)
}
// Reserves space in the buffer based on the file size when available.
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
buf.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_string(self, buf, size)
}
}

View File

@ -345,6 +345,7 @@ fn is_read_vectored(&self) -> bool {
// delegate to the inner implementation.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let inner_buf = self.buffer();
buf.try_reserve(inner_buf.len()).map_err(|_| io::ErrorKind::OutOfMemory)?;
buf.extend_from_slice(inner_buf);
let nread = inner_buf.len();
self.discard_buffer();

View File

@ -303,8 +303,9 @@ fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
#[inline]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.extend_from_slice(*self);
let len = self.len();
buf.try_reserve(len).map_err(|_| ErrorKind::OutOfMemory)?;
buf.extend_from_slice(*self);
*self = &self[len..];
Ok(len)
}
@ -451,7 +452,7 @@ fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
// The total len is known upfront so we can reserve it in a single call.
let len = self.len();
buf.reserve(len);
buf.try_reserve(len).map_err(|_| ErrorKind::OutOfMemory)?;
let (front, back) = self.as_slices();
buf.extend_from_slice(front);

View File

@ -430,6 +430,8 @@ fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<us
loop {
match r.read(&mut probe) {
Ok(n) => {
// there is no way to recover from allocation failure here
// because the data has already been read.
buf.extend_from_slice(&probe[..n]);
return Ok(n);
}
@ -462,7 +464,8 @@ fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<us
}
if buf.len() == buf.capacity() {
buf.reserve(PROBE_SIZE); // buf is full, need more space
// buf is full, need more space
buf.try_reserve(PROBE_SIZE).map_err(|_| ErrorKind::OutOfMemory)?;
}
let mut spare = buf.spare_capacity_mut();
@ -815,6 +818,39 @@ fn is_read_vectored(&self) -> bool {
/// file.)
///
/// [`std::fs::read`]: crate::fs::read
///
/// ## Implementing `read_to_end`
///
/// When implementing the `io::Read` trait, it is recommended to allocate
/// memory using [`Vec::try_reserve`]. However, this behavior is not guaranteed
/// by all implementations, and `read_to_end` may not handle out-of-memory
/// situations gracefully.
///
/// ```no_run
/// # use std::io::{self, BufRead};
/// # struct Example { example_datasource: io::Empty } impl Example {
/// # fn get_some_data_for_the_example(&self) -> &'static [u8] { &[] }
/// fn read_to_end(&mut self, dest_vec: &mut Vec<u8>) -> io::Result<usize> {
/// let initial_vec_len = dest_vec.len();
/// loop {
/// let src_buf = self.example_datasource.fill_buf()?;
/// if src_buf.is_empty() {
/// break;
/// }
/// dest_vec.try_reserve(src_buf.len()).map_err(|_| io::ErrorKind::OutOfMemory)?;
/// dest_vec.extend_from_slice(src_buf);
///
/// // Any irreversible side effects should happen after `try_reserve` succeeds,
/// // to avoid losing data on allocation error.
/// let read = src_buf.len();
/// self.example_datasource.consume(read);
/// }
/// Ok(dest_vec.len() - initial_vec_len)
/// }
/// # }
/// ```
///
/// [`Vec::try_reserve`]: crate::vec::Vec::try_reserve
#[stable(feature = "rust1", since = "1.0.0")]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
default_read_to_end(self, buf, None)