diff --git a/compiler/rustc_data_structures/src/sip128.rs b/compiler/rustc_data_structures/src/sip128.rs index 4a0ed87f77c..4c9acfe0f71 100644 --- a/compiler/rustc_data_structures/src/sip128.rs +++ b/compiler/rustc_data_structures/src/sip128.rs @@ -1,5 +1,8 @@ //! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes. +// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. +// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 +use rustc_serialize::int_overflow::{DebugStrictAdd, DebugStrictSub}; use std::hash::Hasher; use std::mem::{self, MaybeUninit}; use std::ptr; @@ -103,19 +106,19 @@ unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize) } let mut i = 0; - if i + 3 < count { + if i.debug_strict_add(3) < count { ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4); - i += 4; + i = i.debug_strict_add(4); } - if i + 1 < count { + if i.debug_strict_add(1) < count { ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2); - i += 2 + i = i.debug_strict_add(2) } if i < count { *dst.add(i) = *src.add(i); - i += 1; + i = i.debug_strict_add(1); } debug_assert_eq!(i, count); @@ -211,14 +214,14 @@ pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 { debug_assert!(nbuf < BUFFER_SIZE); debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE); - if nbuf + LEN < BUFFER_SIZE { + if nbuf.debug_strict_add(LEN) < BUFFER_SIZE { unsafe { // The memcpy call is optimized away because the size is known. let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN); } - self.nbuf = nbuf + LEN; + self.nbuf = nbuf.debug_strict_add(LEN); return; } @@ -265,8 +268,9 @@ pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 { // This function should only be called when the write fills the buffer. // Therefore, when LEN == 1, the new `self.nbuf` must be zero. // LEN is statically known, so the branch is optimized away. - self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE }; - self.processed += BUFFER_SIZE; + self.nbuf = + if LEN == 1 { 0 } else { nbuf.debug_strict_add(LEN).debug_strict_sub(BUFFER_SIZE) }; + self.processed = self.processed.debug_strict_add(BUFFER_SIZE); } } @@ -277,7 +281,7 @@ fn slice_write(&mut self, msg: &[u8]) { let nbuf = self.nbuf; debug_assert!(nbuf < BUFFER_SIZE); - if nbuf + length < BUFFER_SIZE { + if nbuf.debug_strict_add(length) < BUFFER_SIZE { unsafe { let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); @@ -289,7 +293,7 @@ fn slice_write(&mut self, msg: &[u8]) { } } - self.nbuf = nbuf + length; + self.nbuf = nbuf.debug_strict_add(length); return; } @@ -315,7 +319,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) { // This function should only be called when the write fills the buffer, // so we know that there is enough input to fill the current element. let valid_in_elem = nbuf % ELEM_SIZE; - let needed_in_elem = ELEM_SIZE - valid_in_elem; + let needed_in_elem = ELEM_SIZE.debug_strict_sub(valid_in_elem); let src = msg.as_ptr(); let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); @@ -327,7 +331,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) { // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0. // We know that is true, because last step ensured we have a full // element in the buffer. - let last = nbuf / ELEM_SIZE + 1; + let last = (nbuf / ELEM_SIZE).debug_strict_add(1); for i in 0..last { let elem = self.buf.get_unchecked(i).assume_init().to_le(); @@ -338,7 +342,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) { // Process the remaining element-sized chunks of input. let mut processed = needed_in_elem; - let input_left = length - processed; + let input_left = length.debug_strict_sub(processed); let elems_left = input_left / ELEM_SIZE; let extra_bytes_left = input_left % ELEM_SIZE; @@ -347,7 +351,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) { self.state.v3 ^= elem; Sip13Rounds::c_rounds(&mut self.state); self.state.v0 ^= elem; - processed += ELEM_SIZE; + processed = processed.debug_strict_add(ELEM_SIZE); } // Copy remaining input into start of buffer. @@ -356,7 +360,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) { copy_nonoverlapping_small(src, dst, extra_bytes_left); self.nbuf = extra_bytes_left; - self.processed += nbuf + processed; + self.processed = self.processed.debug_strict_add(nbuf.debug_strict_add(processed)); } } @@ -394,7 +398,7 @@ pub fn finish128(mut self) -> (u64, u64) { }; // Finalize the hash. - let length = self.processed + self.nbuf; + let length = self.processed.debug_strict_add(self.nbuf); let b: u64 = ((length as u64 & 0xff) << 56) | elem; state.v3 ^= b; diff --git a/compiler/rustc_serialize/src/int_overflow.rs b/compiler/rustc_serialize/src/int_overflow.rs new file mode 100644 index 00000000000..f2aac2ef711 --- /dev/null +++ b/compiler/rustc_serialize/src/int_overflow.rs @@ -0,0 +1,65 @@ +// This would belong to `rustc_data_structures`, but `rustc_serialize` needs it too. + +/// Addition, but only overflow checked when `cfg(debug_assertions)` is set +/// instead of respecting `-Coverflow-checks`. +/// +/// This exists for performance reasons, as we ship rustc with overflow checks. +/// While overflow checks are perf neutral in almost all of the compiler, there +/// are a few particularly hot areas where we don't want overflow checks in our +/// dist builds. Overflow is still a bug there, so we want overflow check for +/// builds with debug assertions. +/// +/// That's a long way to say that this should be used in areas where overflow +/// is a bug but overflow checking is too slow. +pub trait DebugStrictAdd { + /// See [`DebugStrictAdd`]. + fn debug_strict_add(self, other: Self) -> Self; +} + +macro_rules! impl_debug_strict_add { + ($( $ty:ty )*) => { + $( + impl DebugStrictAdd for $ty { + fn debug_strict_add(self, other: Self) -> Self { + if cfg!(debug_assertions) { + self + other + } else { + self.wrapping_add(other) + } + } + } + )* + }; +} + +/// See [`DebugStrictAdd`]. +pub trait DebugStrictSub { + /// See [`DebugStrictAdd`]. + fn debug_strict_sub(self, other: Self) -> Self; +} + +macro_rules! impl_debug_strict_sub { + ($( $ty:ty )*) => { + $( + impl DebugStrictSub for $ty { + fn debug_strict_sub(self, other: Self) -> Self { + if cfg!(debug_assertions) { + self - other + } else { + self.wrapping_sub(other) + } + } + } + )* + }; +} + +impl_debug_strict_add! { + u8 u16 u32 u64 u128 usize + i8 i16 i32 i64 i128 isize +} + +impl_debug_strict_sub! { + u8 u16 u32 u64 u128 usize + i8 i16 i32 i64 i128 isize +} diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs index ca661bac78c..44324804468 100644 --- a/compiler/rustc_serialize/src/leb128.rs +++ b/compiler/rustc_serialize/src/leb128.rs @@ -1,6 +1,10 @@ use crate::opaque::MemDecoder; use crate::serialize::Decoder; +// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. +// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 +use crate::int_overflow::DebugStrictAdd; + /// Returns the length of the longest LEB128 encoding for `T`, assuming `T` is an integer type pub const fn max_leb128_len() -> usize { // The longest LEB128 encoding for an integer uses 7 bits per byte. @@ -24,7 +28,7 @@ macro_rules! impl_write_unsigned_leb128 { *out.get_unchecked_mut(i) = value as u8; } - i += 1; + i = i.debug_strict_add(1); break; } else { unsafe { @@ -32,7 +36,7 @@ macro_rules! impl_write_unsigned_leb128 { } value >>= 7; - i += 1; + i = i.debug_strict_add(1); } } @@ -69,7 +73,7 @@ pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty { } else { result |= ((byte & 0x7F) as $int_ty) << shift; } - shift += 7; + shift = shift.debug_strict_add(7); } } }; @@ -101,7 +105,7 @@ macro_rules! impl_write_signed_leb128 { *out.get_unchecked_mut(i) = byte; } - i += 1; + i = i.debug_strict_add(1); if !more { break; @@ -130,7 +134,7 @@ pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty { loop { byte = decoder.read_u8(); result |= <$int_ty>::from(byte & 0x7F) << shift; - shift += 7; + shift = shift.debug_strict_add(7); if (byte & 0x80) == 0 { break; diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs index b67b7d79d97..5a9403e0a85 100644 --- a/compiler/rustc_serialize/src/lib.rs +++ b/compiler/rustc_serialize/src/lib.rs @@ -23,5 +23,6 @@ mod serialize; +pub mod int_overflow; pub mod leb128; pub mod opaque; diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index cc8d1c25092..eec83c02d35 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -7,6 +7,10 @@ use std::path::Path; use std::path::PathBuf; +// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. +// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 +use crate::int_overflow::DebugStrictAdd; + // ----------------------------------------------------------------------------- // Encoder // ----------------------------------------------------------------------------- @@ -65,7 +69,7 @@ pub fn position(&self) -> usize { // Tracking position this way instead of having a `self.position` field // means that we only need to update `self.buffered` on a write call, // as opposed to updating `self.position` and `self.buffered`. - self.flushed + self.buffered + self.flushed.debug_strict_add(self.buffered) } #[cold] @@ -119,7 +123,7 @@ fn write_all(&mut self, buf: &[u8]) { } if let Some(dest) = self.buffer_empty().get_mut(..buf.len()) { dest.copy_from_slice(buf); - self.buffered += buf.len(); + self.buffered = self.buffered.debug_strict_add(buf.len()); } else { self.write_all_cold_path(buf); } @@ -158,7 +162,7 @@ fn write_all(&mut self, buf: &[u8]) { if written > N { Self::panic_invalid_write::(written); } - self.buffered += written; + self.buffered = self.buffered.debug_strict_add(written); } #[cold] diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index e162695a13b..788a52faf56 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -5,6 +5,10 @@ use rustc_data_structures::fx::FxIndexSet; +// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. +// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 +use rustc_serialize::int_overflow::DebugStrictAdd; + /// A compressed span. /// /// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only @@ -166,7 +170,7 @@ pub fn data_untracked(self) -> SpanData { debug_assert!(len <= MAX_LEN); SpanData { lo: BytePos(self.lo_or_index), - hi: BytePos(self.lo_or_index + len), + hi: BytePos(self.lo_or_index.debug_strict_add(len)), ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32), parent: None, } @@ -179,7 +183,7 @@ pub fn data_untracked(self) -> SpanData { }; SpanData { lo: BytePos(self.lo_or_index), - hi: BytePos(self.lo_or_index + len), + hi: BytePos(self.lo_or_index.debug_strict_add(len)), ctxt: SyntaxContext::root(), parent: Some(parent), }