wait for two short reads before uncapping the max read size

for disk IO:
1st short read = probably at end of file
2nd short read = confirming that it's indeed EOF
This commit is contained in:
The 8472 2024-09-21 18:50:29 +02:00
parent 74fd001cda
commit ca1a2a6457

View File

@ -398,8 +398,7 @@ pub(crate) unsafe fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize
// - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820) // - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820)
// - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads // - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads
// - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems // - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems
// at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost // at the same time, i.e. small reads suffer from syscall overhead, all reads incur costs proportional to buffer size (#110650)
// proportional to buffer size (#110650)
// //
pub(crate) fn default_read_to_end<R: Read + ?Sized>( pub(crate) fn default_read_to_end<R: Read + ?Sized>(
r: &mut R, r: &mut R,
@ -444,6 +443,8 @@ fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<us
} }
} }
let mut consecutive_short_reads = 0;
loop { loop {
if buf.len() == buf.capacity() && buf.capacity() == start_cap { if buf.len() == buf.capacity() && buf.capacity() == start_cap {
// The buffer might be an exact fit. Let's read into a probe buffer // The buffer might be an exact fit. Let's read into a probe buffer
@ -489,6 +490,12 @@ fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<us
return Ok(buf.len() - start_len); return Ok(buf.len() - start_len);
} }
if bytes_read < buf_len {
consecutive_short_reads += 1;
} else {
consecutive_short_reads = 0;
}
// store how much was initialized but not filled // store how much was initialized but not filled
initialized = unfilled_but_initialized; initialized = unfilled_but_initialized;
@ -503,7 +510,10 @@ fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<us
// The reader is returning short reads but it doesn't call ensure_init(). // The reader is returning short reads but it doesn't call ensure_init().
// In that case we no longer need to restrict read sizes to avoid // In that case we no longer need to restrict read sizes to avoid
// initialization costs. // initialization costs.
if !was_fully_initialized { // When reading from disk we usually don't get any short reads except at EOF.
// So we wait for at least 2 short reads before uncapping the read buffer;
// this helps with the Windows issue.
if !was_fully_initialized && consecutive_short_reads > 1 {
max_read_size = usize::MAX; max_read_size = usize::MAX;
} }