From ccb4e8423e50fef0f13a642715c3e617ee9f78fe Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Sat, 28 Mar 2015 18:47:29 -0700 Subject: [PATCH] Fix massive performance issue in read_to_end with_end_to_cap is enormously expensive now that it's initializing memory since it involves 64k allocation + memset on every call. This is most noticable when calling read_to_end on very small readers, where the new version if **4 orders of magnitude** faster. BufReader also depended on with_end_to_cap so I've rewritten it in its original form. As a bonus, converted the buffered IO struct Debug impls to use the debug builders. Fixes #23815 --- src/libstd/io/buffered.rs | 71 ++++++++++++++++++++++++--------------- src/libstd/io/mod.rs | 64 ++++++++++++++++++----------------- src/libstd/lib.rs | 1 + 3 files changed, 77 insertions(+), 59 deletions(-) diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs index 998f37e6a68..98581fc43f8 100644 --- a/src/libstd/io/buffered.rs +++ b/src/libstd/io/buffered.rs @@ -18,8 +18,9 @@ use cmp; use error::{self, FromError}; use fmt; -use io::{self, Cursor, DEFAULT_BUF_SIZE, Error, ErrorKind}; +use io::{self, DEFAULT_BUF_SIZE, Error, ErrorKind}; use ptr; +use iter; /// Wraps a `Read` and buffers input from it /// @@ -30,7 +31,9 @@ #[stable(feature = "rust1", since = "1.0.0")] pub struct BufReader { inner: R, - buf: Cursor>, + buf: Vec, + pos: usize, + cap: usize, } impl BufReader { @@ -43,9 +46,13 @@ pub fn new(inner: R) -> BufReader { /// Creates a new `BufReader` with the specified buffer capacity #[stable(feature = "rust1", since = "1.0.0")] pub fn with_capacity(cap: usize, inner: R) -> BufReader { + let mut buf = Vec::with_capacity(cap); + buf.extend(iter::repeat(0).take(cap)); BufReader { inner: inner, - buf: Cursor::new(Vec::with_capacity(cap)), + buf: buf, + pos: 0, + cap: 0, } } @@ -74,12 +81,15 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { // If we don't have any buffered data and we're doing a massive read // (larger than our internal buffer), bypass our internal buffer // entirely. - if self.buf.get_ref().len() == self.buf.position() as usize && - buf.len() >= self.buf.get_ref().capacity() { + if self.pos == self.cap && buf.len() >= self.buf.len() { return self.inner.read(buf); } - try!(self.fill_buf()); - self.buf.read(buf) + let nread = { + let mut rem = try!(self.fill_buf()); + try!(rem.read(buf)) + }; + self.consume(nread); + Ok(nread) } } @@ -88,26 +98,25 @@ impl BufRead for BufReader { fn fill_buf(&mut self) -> io::Result<&[u8]> { // If we've reached the end of our internal buffer then we need to fetch // some more data from the underlying reader. - if self.buf.position() as usize == self.buf.get_ref().len() { - self.buf.set_position(0); - let v = self.buf.get_mut(); - v.truncate(0); - let inner = &mut self.inner; - try!(super::with_end_to_cap(v, |b| inner.read(b))); + if self.pos == self.cap { + self.cap = try!(self.inner.read(&mut self.buf)); + self.pos = 0; } - self.buf.fill_buf() + Ok(&self.buf[self.pos..self.cap]) } fn consume(&mut self, amt: usize) { - self.buf.consume(amt) + self.pos = cmp::min(self.pos + amt, self.cap); } } #[stable(feature = "rust1", since = "1.0.0")] impl fmt::Debug for BufReader where R: fmt::Debug { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "BufReader {{ reader: {:?}, buffer: {}/{} }}", - self.inner, self.buf.position(), self.buf.get_ref().len()) + fmt.debug_struct("BufReader") + .field("reader", &self.inner) + .field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len())) + .finish() } } @@ -222,8 +231,10 @@ fn flush(&mut self) -> io::Result<()> { #[stable(feature = "rust1", since = "1.0.0")] impl fmt::Debug for BufWriter where W: fmt::Debug { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "BufWriter {{ writer: {:?}, buffer: {}/{} }}", - self.inner.as_ref().unwrap(), self.buf.len(), self.buf.capacity()) + fmt.debug_struct("BufWriter") + .field("writer", &self.inner.as_ref().unwrap()) + .field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity())) + .finish() } } @@ -337,9 +348,11 @@ fn flush(&mut self) -> io::Result<()> { self.inner.flush() } #[stable(feature = "rust1", since = "1.0.0")] impl fmt::Debug for LineWriter where W: fmt::Debug { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "LineWriter {{ writer: {:?}, buffer: {}/{} }}", - self.inner.inner, self.inner.buf.len(), - self.inner.buf.capacity()) + fmt.debug_struct("LineWriter") + .field("writer", &self.inner.inner) + .field("buffer", + &format_args!("{}/{}", self.inner.buf.len(), self.inner.buf.capacity())) + .finish() } } @@ -415,10 +428,10 @@ pub fn get_mut(&mut self) -> &mut S { /// Any leftover data in the read buffer is lost. #[stable(feature = "rust1", since = "1.0.0")] pub fn into_inner(self) -> Result>> { - let BufReader { inner: InternalBufWriter(w), buf } = self.inner; + let BufReader { inner: InternalBufWriter(w), buf, pos, cap } = self.inner; w.into_inner().map_err(|IntoInnerError(w, e)| { IntoInnerError(BufStream { - inner: BufReader { inner: InternalBufWriter(w), buf: buf }, + inner: BufReader { inner: InternalBufWriter(w), buf: buf, pos: pos, cap: cap }, }, e) }) } @@ -452,10 +465,12 @@ impl fmt::Debug for BufStream where S: fmt::Debug { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { let reader = &self.inner; let writer = &self.inner.inner.0; - write!(fmt, "BufStream {{ stream: {:?}, write_buffer: {}/{}, read_buffer: {}/{} }}", - writer.inner, - writer.buf.len(), writer.buf.capacity(), - reader.buf.position(), reader.buf.get_ref().len()) + fmt.debug_struct("BufStream") + .field("stream", &writer.inner) + .field("write_buffer", &format_args!("{}/{}", writer.buf.len(), writer.buf.capacity())) + .field("read_buffer", + &format_args!("{}/{}", reader.cap - reader.pos, reader.buf.len())) + .finish() } } diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 5d62f1341e3..4a93d96becc 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -48,30 +48,6 @@ const DEFAULT_BUF_SIZE: usize = 64 * 1024; -// Acquires a slice of the vector `v` from its length to its capacity -// (after initializing the data), reads into it, and then updates the length. -// -// This function is leveraged to efficiently read some bytes into a destination -// vector without extra copying and taking advantage of the space that's already -// in `v`. -fn with_end_to_cap(v: &mut Vec, f: F) -> Result - where F: FnOnce(&mut [u8]) -> Result -{ - let len = v.len(); - let new_area = v.capacity() - len; - v.extend(iter::repeat(0).take(new_area)); - match f(&mut v[len..]) { - Ok(n) => { - v.truncate(len + n); - Ok(n) - } - Err(e) => { - v.truncate(len); - Err(e) - } - } -} - // A few methods below (read_to_string, read_line) will append data into a // `String` buffer, but we need to be pretty careful when doing this. The // implementation will just call `.as_mut_vec()` and then delegate to a @@ -116,19 +92,45 @@ fn drop(&mut self) { } } +// This uses an adaptive system to extend the vector when it fills. We want to +// avoid paying to allocate and zero a huge chunk of memory if the reader only +// has 4 bytes while still making large reads if the reader does have a ton +// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every +// time is 4,500 times (!) slower than this if the reader has a very small +// amount of data to return. fn read_to_end(r: &mut R, buf: &mut Vec) -> Result { - let mut read = 0; + let start_len = buf.len(); + let mut len = start_len; + let mut cap_bump = 16; + let ret; loop { - if buf.capacity() == buf.len() { - buf.reserve(DEFAULT_BUF_SIZE); + if len == buf.len() { + if buf.capacity() == buf.len() { + if cap_bump < DEFAULT_BUF_SIZE { + cap_bump *= 2; + } + buf.reserve(cap_bump); + } + let new_area = buf.capacity() - buf.len(); + buf.extend(iter::repeat(0).take(new_area)); } - match with_end_to_cap(buf, |b| r.read(b)) { - Ok(0) => return Ok(read), - Ok(n) => read += n, + + match r.read(&mut buf[len..]) { + Ok(0) => { + ret = Ok(len - start_len); + break; + } + Ok(n) => len += n, Err(ref e) if e.kind() == ErrorKind::Interrupted => {} - Err(e) => return Err(e), + Err(e) => { + ret = Err(e); + break; + } } } + + buf.truncate(len); + ret } /// A trait for objects which are byte-oriented sources. diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 5f5f2fed567..b7cb8f9ed50 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -128,6 +128,7 @@ #![feature(into_cow)] #![feature(slice_patterns)] #![feature(std_misc)] +#![feature(debug_builders)] #![cfg_attr(test, feature(test, rustc_private, std_misc))] // Don't link to std. We are std.