rust/library/std/src/io/util.rs
bors 30e49a9ead Auto merge of #75272 - the8472:spec-copy, r=KodrAus
specialize io::copy to use copy_file_range, splice or sendfile

Fixes #74426.
Also covers #60689 but only as an optimization instead of an official API.

The specialization only covers std-owned structs so it should avoid the problems with #71091

Currently linux-only but it should be generalizable to other unix systems that have sendfile/sosplice and similar.

There is a bit of optimization potential around the syscall count. Right now it may end up doing more syscalls than the naive copy loop when doing short (<8KiB) copies between file descriptors.

The test case executes the following:

```
[pid 103776] statx(3, "", AT_STATX_SYNC_AS_STAT|AT_EMPTY_PATH, STATX_ALL, {stx_mask=STATX_ALL|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFREG|0644, stx_size=17, ...}) = 0
[pid 103776] write(4, "wxyz", 4)        = 4
[pid 103776] write(4, "iklmn", 5)       = 5
[pid 103776] copy_file_range(3, NULL, 4, NULL, 5, 0) = 5

```

0-1 `stat` calls to identify the source file type. 0 if the type can be inferred from the struct from which the FD was extracted
𝖬 `write` to drain the `BufReader`/`BufWriter` wrappers. only happen when buffers are present. 𝖬 ≾ number of wrappers present. If there is a write buffer it may absorb the read buffer contents first so only result in a single write. Vectored writes would also be an option but that would require more invasive changes to `BufWriter`.
𝖭 `copy_file_range`/`splice`/`sendfile` until file size, EOF or the byte limit from `Take` is reached. This should generally be *much* more efficient than the read-write loop and also have other benefits such as DMA offload or extent sharing.

## Benchmarks

```

OLD

test io::tests::bench_file_to_file_copy         ... bench:      21,002 ns/iter (+/- 750) = 6240 MB/s    [ext4]
test io::tests::bench_file_to_file_copy         ... bench:      35,704 ns/iter (+/- 1,108) = 3671 MB/s  [btrfs]
test io::tests::bench_file_to_socket_copy       ... bench:      57,002 ns/iter (+/- 4,205) = 2299 MB/s
test io::tests::bench_socket_pipe_socket_copy   ... bench:     142,640 ns/iter (+/- 77,851) = 918 MB/s

NEW

test io::tests::bench_file_to_file_copy         ... bench:      14,745 ns/iter (+/- 519) = 8889 MB/s    [ext4]
test io::tests::bench_file_to_file_copy         ... bench:       6,128 ns/iter (+/- 227) = 21389 MB/s   [btrfs]
test io::tests::bench_file_to_socket_copy       ... bench:      13,767 ns/iter (+/- 3,767) = 9520 MB/s
test io::tests::bench_socket_pipe_socket_copy   ... bench:      26,471 ns/iter (+/- 6,412) = 4951 MB/s
```
2020-11-14 12:01:55 +00:00

219 lines
5.3 KiB
Rust

#![allow(missing_copy_implementations)]
#[cfg(test)]
mod tests;
use crate::fmt;
use crate::io::{self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Write};
/// A reader which is always at EOF.
///
/// This struct is generally created by calling [`empty()`]. Please see
/// the documentation of [`empty()`] for more details.
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Empty {
_priv: (),
}
/// Constructs a new handle to an empty reader.
///
/// All reads from the returned reader will return [`Ok`]`(0)`.
///
/// # Examples
///
/// A slightly sad example of not reading anything into a buffer:
///
/// ```
/// use std::io::{self, Read};
///
/// let mut buffer = String::new();
/// io::empty().read_to_string(&mut buffer).unwrap();
/// assert!(buffer.is_empty());
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_io_structs", issue = "78812")]
pub const fn empty() -> Empty {
Empty { _priv: () }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl Read for Empty {
#[inline]
fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
Ok(0)
}
#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl BufRead for Empty {
#[inline]
fn fill_buf(&mut self) -> io::Result<&[u8]> {
Ok(&[])
}
#[inline]
fn consume(&mut self, _n: usize) {}
}
#[stable(feature = "std_debug", since = "1.16.0")]
impl fmt::Debug for Empty {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.pad("Empty { .. }")
}
}
/// A reader which yields one byte over and over and over and over and over and...
///
/// This struct is generally created by calling [`repeat()`]. Please
/// see the documentation of [`repeat()`] for more details.
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Repeat {
byte: u8,
}
/// Creates an instance of a reader that infinitely repeats one byte.
///
/// All reads from this reader will succeed by filling the specified buffer with
/// the given byte.
///
/// # Examples
///
/// ```
/// use std::io::{self, Read};
///
/// let mut buffer = [0; 3];
/// io::repeat(0b101).read_exact(&mut buffer).unwrap();
/// assert_eq!(buffer, [0b101, 0b101, 0b101]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_io_structs", issue = "78812")]
pub const fn repeat(byte: u8) -> Repeat {
Repeat { byte }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl Read for Repeat {
#[inline]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
for slot in &mut *buf {
*slot = self.byte;
}
Ok(buf.len())
}
#[inline]
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
let mut nwritten = 0;
for buf in bufs {
nwritten += self.read(buf)?;
}
Ok(nwritten)
}
#[inline]
fn is_read_vectored(&self) -> bool {
true
}
#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}
}
#[stable(feature = "std_debug", since = "1.16.0")]
impl fmt::Debug for Repeat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.pad("Repeat { .. }")
}
}
/// A writer which will move data into the void.
///
/// This struct is generally created by calling [`sink`]. Please
/// see the documentation of [`sink()`] for more details.
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Sink {
_priv: (),
}
/// Creates an instance of a writer which will successfully consume all data.
///
/// All calls to [`write`] on the returned instance will return `Ok(buf.len())`
/// and the contents of the buffer will not be inspected.
///
/// [`write`]: Write::write
///
/// # Examples
///
/// ```rust
/// use std::io::{self, Write};
///
/// let buffer = vec![1, 2, 3, 5, 8];
/// let num_bytes = io::sink().write(&buffer).unwrap();
/// assert_eq!(num_bytes, 5);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_io_structs", issue = "78812")]
pub const fn sink() -> Sink {
Sink { _priv: () }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl Write for Sink {
#[inline]
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
Ok(buf.len())
}
#[inline]
fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
let total_len = bufs.iter().map(|b| b.len()).sum();
Ok(total_len)
}
#[inline]
fn is_write_vectored(&self) -> bool {
true
}
#[inline]
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
#[stable(feature = "write_mt", since = "1.48.0")]
impl Write for &Sink {
#[inline]
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
Ok(buf.len())
}
#[inline]
fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
let total_len = bufs.iter().map(|b| b.len()).sum();
Ok(total_len)
}
#[inline]
fn is_write_vectored(&self) -> bool {
true
}
#[inline]
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
#[stable(feature = "std_debug", since = "1.16.0")]
impl fmt::Debug for Sink {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.pad("Sink { .. }")
}
}