From 181ce631832b066febd966043f0accea91fb66d4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 13 Oct 2023 08:26:49 +1100 Subject: [PATCH] Specialize `Bytes::next` when `R` is a `BufReader`. This reduces the runtime for a simple program using `Bytes::next` to iterate through a file from 220ms to 70ms on my Linux box. --- library/std/src/io/buffered/bufreader.rs | 25 +++++++++--- library/std/src/io/mod.rs | 50 +++++++++++++++++++----- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs index 7097dfef88d..55aafc3db33 100644 --- a/library/std/src/io/buffered/bufreader.rs +++ b/library/std/src/io/buffered/bufreader.rs @@ -2,7 +2,8 @@ use crate::fmt; use crate::io::{ - self, BorrowedCursor, BufRead, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE, + self, uninlined_slow_read_byte, BorrowedCursor, BufRead, IoSliceMut, Read, Seek, SeekFrom, + SizeHint, SpecReadByte, DEFAULT_BUF_SIZE, }; use buffer::Buffer; @@ -259,6 +260,22 @@ pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> { } } +impl SpecReadByte for BufReader +where + Self: Read, +{ + #[inline] + fn spec_read_byte(&mut self) -> Option> { + let mut byte = 0; + if self.buf.consume_with(1, |claimed| byte = claimed[0]) { + return Some(Ok(byte)); + } + + // Fallback case, only reached once per buffer refill. + uninlined_slow_read_byte(self) + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl Read for BufReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { @@ -269,10 +286,8 @@ fn read(&mut self, buf: &mut [u8]) -> io::Result { self.discard_buffer(); return self.inner.read(buf); } - let nread = { - let mut rem = self.fill_buf()?; - rem.read(buf)? - }; + let mut rem = self.fill_buf()?; + let nread = rem.read(buf)?; self.consume(nread); Ok(nread) } diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index c0a72948112..063464046e0 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2777,17 +2777,10 @@ pub struct Bytes { impl Iterator for Bytes { type Item = Result; - #[inline] + // Not `#[inline]`. This function gets inlined even without it, but having + // the inline annotation can result in worse code generation. See #116785. fn next(&mut self) -> Option> { - let mut byte = 0; - loop { - return match self.inner.read(slice::from_mut(&mut byte)) { - Ok(0) => None, - Ok(..) => Some(Ok(byte)), - Err(ref e) if e.is_interrupted() => continue, - Err(e) => Some(Err(e)), - }; - } + SpecReadByte::spec_read_byte(&mut self.inner) } #[inline] @@ -2796,6 +2789,43 @@ fn size_hint(&self) -> (usize, Option) { } } +/// For the specialization of `Bytes::next`. +trait SpecReadByte { + fn spec_read_byte(&mut self) -> Option>; +} + +impl SpecReadByte for R +where + Self: Read, +{ + #[inline] + default fn spec_read_byte(&mut self) -> Option> { + inlined_slow_read_byte(self) + } +} + +/// Read a single byte in a slow, generic way. This is used by the default +/// `spec_read_byte`. +#[inline] +fn inlined_slow_read_byte(reader: &mut R) -> Option> { + let mut byte = 0; + loop { + return match reader.read(slice::from_mut(&mut byte)) { + Ok(0) => None, + Ok(..) => Some(Ok(byte)), + Err(ref e) if e.is_interrupted() => continue, + Err(e) => Some(Err(e)), + }; + } +} + +// Used by `BufReader::spec_read_byte`, for which the `inline(ever)` is +// important. +#[inline(never)] +fn uninlined_slow_read_byte(reader: &mut R) -> Option> { + inlined_slow_read_byte(reader) +} + trait SizeHint { fn lower_bound(&self) -> usize;