diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs index 989cec976b7..1569b0415c6 100644 --- a/library/std/src/io/buffered/bufreader.rs +++ b/library/std/src/io/buffered/bufreader.rs @@ -1,9 +1,10 @@ -use crate::cmp; +mod buffer; + use crate::fmt; use crate::io::{ self, BufRead, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE, }; -use crate::mem::MaybeUninit; +use buffer::Buffer; /// The `BufReader` struct adds buffering to any reader. /// @@ -48,10 +49,7 @@ #[stable(feature = "rust1", since = "1.0.0")] pub struct BufReader { inner: R, - buf: Box<[MaybeUninit]>, - pos: usize, - cap: usize, - init: usize, + buf: Buffer, } impl BufReader { @@ -93,8 +91,7 @@ pub fn new(inner: R) -> BufReader { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn with_capacity(capacity: usize, inner: R) -> BufReader { - let buf = Box::new_uninit_slice(capacity); - BufReader { inner, buf, pos: 0, cap: 0, init: 0 } + BufReader { inner, buf: Buffer::with_capacity(capacity) } } } @@ -170,8 +167,7 @@ pub fn get_mut(&mut self) -> &mut R { /// ``` #[stable(feature = "bufreader_buffer", since = "1.37.0")] pub fn buffer(&self) -> &[u8] { - // SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init - unsafe { MaybeUninit::slice_assume_init_ref(&self.buf[self.pos..self.cap]) } + self.buf.buffer() } /// Returns the number of bytes the internal buffer can hold at once. @@ -194,7 +190,7 @@ pub fn buffer(&self) -> &[u8] { /// ``` #[stable(feature = "buffered_io_capacity", since = "1.46.0")] pub fn capacity(&self) -> usize { - self.buf.len() + self.buf.capacity() } /// Unwraps this `BufReader`, returning the underlying reader. @@ -224,8 +220,7 @@ pub fn into_inner(self) -> R { /// Invalidates all data in the internal buffer. #[inline] fn discard_buffer(&mut self) { - self.pos = 0; - self.cap = 0; + self.buf.discard_buffer() } } @@ -236,15 +231,15 @@ impl BufReader { /// must track this information themselves if it is required. #[stable(feature = "bufreader_seek_relative", since = "1.53.0")] pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> { - let pos = self.pos as u64; + let pos = self.buf.pos() as u64; if offset < 0 { - if let Some(new_pos) = pos.checked_sub((-offset) as u64) { - self.pos = new_pos as usize; + if let Some(_) = pos.checked_sub((-offset) as u64) { + self.buf.unconsume((-offset) as usize); return Ok(()); } } else if let Some(new_pos) = pos.checked_add(offset as u64) { - if new_pos <= self.cap as u64 { - self.pos = new_pos as usize; + if new_pos <= self.buf.cap() as u64 { + self.buf.consume(offset as usize); return Ok(()); } } @@ -259,7 +254,7 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { // If we don't have any buffered data and we're doing a massive read // (larger than our internal buffer), bypass our internal buffer // entirely. - if self.pos == self.cap && buf.len() >= self.buf.len() { + if self.buf.pos() == self.buf.cap() && buf.len() >= self.capacity() { self.discard_buffer(); return self.inner.read(buf); } @@ -275,7 +270,7 @@ fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> { // If we don't have any buffered data and we're doing a massive read // (larger than our internal buffer), bypass our internal buffer // entirely. - if self.pos == self.cap && buf.remaining() >= self.buf.len() { + if self.buf.pos() == self.buf.cap() && buf.remaining() >= self.capacity() { self.discard_buffer(); return self.inner.read_buf(buf); } @@ -295,9 +290,9 @@ fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> { // generation for the common path where the buffer has enough bytes to fill the passed-in // buffer. fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { - if self.buffer().len() >= buf.len() { - buf.copy_from_slice(&self.buffer()[..buf.len()]); - self.consume(buf.len()); + if let Some(claimed) = self.buffer().get(..buf.len()) { + buf.copy_from_slice(claimed); + self.consume(claimed.len()); return Ok(()); } @@ -306,7 +301,7 @@ fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { let total_len = bufs.iter().map(|b| b.len()).sum::(); - if self.pos == self.cap && total_len >= self.buf.len() { + if self.buf.pos() == self.buf.cap() && total_len >= self.capacity() { self.discard_buffer(); return self.inner.read_vectored(bufs); } @@ -325,8 +320,9 @@ fn is_read_vectored(&self) -> bool { // The inner reader might have an optimized `read_to_end`. Drain our buffer and then // delegate to the inner implementation. fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - let nread = self.cap - self.pos; - buf.extend_from_slice(&self.buffer()); + let inner_buf = self.buffer(); + buf.extend_from_slice(inner_buf); + let nread = inner_buf.len(); self.discard_buffer(); Ok(nread + self.inner.read_to_end(buf)?) } @@ -371,33 +367,11 @@ fn read_to_string(&mut self, buf: &mut String) -> io::Result { #[stable(feature = "rust1", since = "1.0.0")] impl BufRead for BufReader { fn fill_buf(&mut self) -> io::Result<&[u8]> { - // If we've reached the end of our internal buffer then we need to fetch - // some more data from the underlying reader. - // Branch using `>=` instead of the more correct `==` - // to tell the compiler that the pos..cap slice is always valid. - if self.pos >= self.cap { - debug_assert!(self.pos == self.cap); - - let mut readbuf = ReadBuf::uninit(&mut self.buf); - - // SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()` - // from the last time this function was called - unsafe { - readbuf.assume_init(self.init); - } - - self.inner.read_buf(&mut readbuf)?; - - self.cap = readbuf.filled_len(); - self.init = readbuf.initialized_len(); - - self.pos = 0; - } - Ok(self.buffer()) + self.buf.fill_buf(&mut self.inner) } fn consume(&mut self, amt: usize) { - self.pos = cmp::min(self.pos + amt, self.cap); + self.buf.consume(amt) } } @@ -409,7 +383,10 @@ impl fmt::Debug for BufReader fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("BufReader") .field("reader", &self.inner) - .field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len())) + .field( + "buffer", + &format_args!("{}/{}", self.buf.cap() - self.buf.pos(), self.capacity()), + ) .finish() } } @@ -441,7 +418,7 @@ impl Seek for BufReader { fn seek(&mut self, pos: SeekFrom) -> io::Result { let result: u64; if let SeekFrom::Current(n) = pos { - let remainder = (self.cap - self.pos) as i64; + let remainder = (self.buf.cap() - self.buf.pos()) as i64; // it should be safe to assume that remainder fits within an i64 as the alternative // means we managed to allocate 8 exbibytes and that's absurd. // But it's not out of the realm of possibility for some weird underlying reader to @@ -499,7 +476,7 @@ fn seek(&mut self, pos: SeekFrom) -> io::Result { /// } /// ``` fn stream_position(&mut self) -> io::Result { - let remainder = (self.cap - self.pos) as u64; + let remainder = (self.buf.cap() - self.buf.pos()) as u64; self.inner.stream_position().map(|pos| { pos.checked_sub(remainder).expect( "overflow when subtracting remaining buffer size from inner stream position", diff --git a/library/std/src/io/buffered/bufreader/buffer.rs b/library/std/src/io/buffered/bufreader/buffer.rs new file mode 100644 index 00000000000..92fe47745d9 --- /dev/null +++ b/library/std/src/io/buffered/bufreader/buffer.rs @@ -0,0 +1,75 @@ +use crate::cmp; +use crate::io::{self, Read, ReadBuf}; +use crate::mem::MaybeUninit; + +pub struct Buffer { + buf: Box<[MaybeUninit]>, + pos: usize, + cap: usize, + init: usize, +} + +impl Buffer { + pub fn with_capacity(capacity: usize) -> Self { + let buf = Box::new_uninit_slice(capacity); + Self { buf, pos: 0, cap: 0, init: 0 } + } + + pub fn buffer(&self) -> &[u8] { + // SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init + // Additionally, both self.pos and self.cap are valid and and self.cap => self.pos, and + // that region is initialized because those are all invariants of this type. + unsafe { MaybeUninit::slice_assume_init_ref(&self.buf.get_unchecked(self.pos..self.cap)) } + } + + pub fn capacity(&self) -> usize { + self.buf.len() + } + + pub fn cap(&self) -> usize { + self.cap + } + + pub fn pos(&self) -> usize { + self.pos + } + + pub fn discard_buffer(&mut self) { + self.pos = 0; + self.cap = 0; + } + + pub fn consume(&mut self, amt: usize) { + self.pos = cmp::min(self.pos + amt, self.cap); + } + + pub fn unconsume(&mut self, amt: usize) { + self.pos = self.pos.saturating_sub(amt); + } + + pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> { + // If we've reached the end of our internal buffer then we need to fetch + // some more data from the underlying reader. + // Branch using `>=` instead of the more correct `==` + // to tell the compiler that the pos..cap slice is always valid. + if self.pos >= self.cap { + debug_assert!(self.pos == self.cap); + + let mut readbuf = ReadBuf::uninit(&mut self.buf); + + // SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()` + // from the last time this function was called + unsafe { + readbuf.assume_init(self.init); + } + + reader.read_buf(&mut readbuf)?; + + self.cap = readbuf.filled_len(); + self.init = readbuf.initialized_len(); + + self.pos = 0; + } + Ok(self.buffer()) + } +} diff --git a/library/std/src/io/buffered/tests.rs b/library/std/src/io/buffered/tests.rs index 9d429e7090e..fe45b132638 100644 --- a/library/std/src/io/buffered/tests.rs +++ b/library/std/src/io/buffered/tests.rs @@ -523,6 +523,7 @@ fn bench_buffered_reader_small_reads(b: &mut test::Bencher) { let mut buf = [0u8; 4]; for _ in 0..1024 { reader.read_exact(&mut buf).unwrap(); + core::hint::black_box(&buf); } }); }