From 5039160c5bd16a9d4908c71b8941ff95e866125f Mon Sep 17 00:00:00 2001
From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com>
Date: Thu, 28 Mar 2024 20:27:12 +0100
Subject: [PATCH] Add add/sub methods that only panic with debug assertions to
 rustc

This mitigates the perf impact of enabling overflow checks on rustc.
The change to use overflow checks will be done in a later PR.
---
 compiler/rustc_data_structures/src/sip128.rs | 38 +++++++-----
 compiler/rustc_serialize/src/int_overflow.rs | 65 ++++++++++++++++++++
 compiler/rustc_serialize/src/leb128.rs       | 14 +++--
 compiler/rustc_serialize/src/lib.rs          |  1 +
 compiler/rustc_serialize/src/opaque.rs       | 10 ++-
 compiler/rustc_span/src/span_encoding.rs     |  8 ++-
 6 files changed, 109 insertions(+), 27 deletions(-)
 create mode 100644 compiler/rustc_serialize/src/int_overflow.rs

diff --git a/compiler/rustc_data_structures/src/sip128.rs b/compiler/rustc_data_structures/src/sip128.rs
index 4a0ed87f77c..4c9acfe0f71 100644
--- a/compiler/rustc_data_structures/src/sip128.rs
+++ b/compiler/rustc_data_structures/src/sip128.rs
@@ -1,5 +1,8 @@
 //! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes.
 
+// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
+// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
+use rustc_serialize::int_overflow::{DebugStrictAdd, DebugStrictSub};
 use std::hash::Hasher;
 use std::mem::{self, MaybeUninit};
 use std::ptr;
@@ -103,19 +106,19 @@ unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize)
         }
 
         let mut i = 0;
-        if i + 3 < count {
+        if i.debug_strict_add(3) < count {
             ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4);
-            i += 4;
+            i = i.debug_strict_add(4);
         }
 
-        if i + 1 < count {
+        if i.debug_strict_add(1) < count {
             ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2);
-            i += 2
+            i = i.debug_strict_add(2)
         }
 
         if i < count {
             *dst.add(i) = *src.add(i);
-            i += 1;
+            i = i.debug_strict_add(1);
         }
 
         debug_assert_eq!(i, count);
@@ -211,14 +214,14 @@ pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 {
         debug_assert!(nbuf < BUFFER_SIZE);
         debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
 
-        if nbuf + LEN < BUFFER_SIZE {
+        if nbuf.debug_strict_add(LEN) < BUFFER_SIZE {
             unsafe {
                 // The memcpy call is optimized away because the size is known.
                 let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
                 ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
             }
 
-            self.nbuf = nbuf + LEN;
+            self.nbuf = nbuf.debug_strict_add(LEN);
 
             return;
         }
@@ -265,8 +268,9 @@ pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 {
             // This function should only be called when the write fills the buffer.
             // Therefore, when LEN == 1, the new `self.nbuf` must be zero.
             // LEN is statically known, so the branch is optimized away.
-            self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE };
-            self.processed += BUFFER_SIZE;
+            self.nbuf =
+                if LEN == 1 { 0 } else { nbuf.debug_strict_add(LEN).debug_strict_sub(BUFFER_SIZE) };
+            self.processed = self.processed.debug_strict_add(BUFFER_SIZE);
         }
     }
 
@@ -277,7 +281,7 @@ fn slice_write(&mut self, msg: &[u8]) {
         let nbuf = self.nbuf;
         debug_assert!(nbuf < BUFFER_SIZE);
 
-        if nbuf + length < BUFFER_SIZE {
+        if nbuf.debug_strict_add(length) < BUFFER_SIZE {
             unsafe {
                 let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
 
@@ -289,7 +293,7 @@ fn slice_write(&mut self, msg: &[u8]) {
                 }
             }
 
-            self.nbuf = nbuf + length;
+            self.nbuf = nbuf.debug_strict_add(length);
 
             return;
         }
@@ -315,7 +319,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
             // This function should only be called when the write fills the buffer,
             // so we know that there is enough input to fill the current element.
             let valid_in_elem = nbuf % ELEM_SIZE;
-            let needed_in_elem = ELEM_SIZE - valid_in_elem;
+            let needed_in_elem = ELEM_SIZE.debug_strict_sub(valid_in_elem);
 
             let src = msg.as_ptr();
             let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
@@ -327,7 +331,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
             // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
             // We know that is true, because last step ensured we have a full
             // element in the buffer.
-            let last = nbuf / ELEM_SIZE + 1;
+            let last = (nbuf / ELEM_SIZE).debug_strict_add(1);
 
             for i in 0..last {
                 let elem = self.buf.get_unchecked(i).assume_init().to_le();
@@ -338,7 +342,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
 
             // Process the remaining element-sized chunks of input.
             let mut processed = needed_in_elem;
-            let input_left = length - processed;
+            let input_left = length.debug_strict_sub(processed);
             let elems_left = input_left / ELEM_SIZE;
             let extra_bytes_left = input_left % ELEM_SIZE;
 
@@ -347,7 +351,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
                 self.state.v3 ^= elem;
                 Sip13Rounds::c_rounds(&mut self.state);
                 self.state.v0 ^= elem;
-                processed += ELEM_SIZE;
+                processed = processed.debug_strict_add(ELEM_SIZE);
             }
 
             // Copy remaining input into start of buffer.
@@ -356,7 +360,7 @@ unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
             copy_nonoverlapping_small(src, dst, extra_bytes_left);
 
             self.nbuf = extra_bytes_left;
-            self.processed += nbuf + processed;
+            self.processed = self.processed.debug_strict_add(nbuf.debug_strict_add(processed));
         }
     }
 
@@ -394,7 +398,7 @@ pub fn finish128(mut self) -> (u64, u64) {
         };
 
         // Finalize the hash.
-        let length = self.processed + self.nbuf;
+        let length = self.processed.debug_strict_add(self.nbuf);
         let b: u64 = ((length as u64 & 0xff) << 56) | elem;
 
         state.v3 ^= b;
diff --git a/compiler/rustc_serialize/src/int_overflow.rs b/compiler/rustc_serialize/src/int_overflow.rs
new file mode 100644
index 00000000000..f2aac2ef711
--- /dev/null
+++ b/compiler/rustc_serialize/src/int_overflow.rs
@@ -0,0 +1,65 @@
+// This would belong to `rustc_data_structures`, but `rustc_serialize` needs it too.
+
+/// Addition, but only overflow checked when `cfg(debug_assertions)` is set
+/// instead of respecting `-Coverflow-checks`.
+///
+/// This exists for performance reasons, as we ship rustc with overflow checks.
+/// While overflow checks are perf neutral in almost all of the compiler, there
+/// are a few particularly hot areas where we don't want overflow checks in our
+/// dist builds. Overflow is still a bug there, so we want overflow check for
+/// builds with debug assertions.
+///
+/// That's a long way to say that this should be used in areas where overflow
+/// is a bug but overflow checking is too slow.
+pub trait DebugStrictAdd {
+    /// See [`DebugStrictAdd`].
+    fn debug_strict_add(self, other: Self) -> Self;
+}
+
+macro_rules! impl_debug_strict_add {
+    ($( $ty:ty )*) => {
+        $(
+            impl DebugStrictAdd for $ty {
+                fn debug_strict_add(self, other: Self) -> Self {
+                    if cfg!(debug_assertions) {
+                        self + other
+                    } else {
+                        self.wrapping_add(other)
+                    }
+                }
+            }
+        )*
+    };
+}
+
+/// See [`DebugStrictAdd`].
+pub trait DebugStrictSub {
+    /// See [`DebugStrictAdd`].
+    fn debug_strict_sub(self, other: Self) -> Self;
+}
+
+macro_rules! impl_debug_strict_sub {
+    ($( $ty:ty )*) => {
+        $(
+            impl DebugStrictSub for $ty {
+                fn debug_strict_sub(self, other: Self) -> Self {
+                    if cfg!(debug_assertions) {
+                        self - other
+                    } else {
+                        self.wrapping_sub(other)
+                    }
+                }
+            }
+        )*
+    };
+}
+
+impl_debug_strict_add! {
+    u8 u16 u32 u64 u128 usize
+    i8 i16 i32 i64 i128 isize
+}
+
+impl_debug_strict_sub! {
+    u8 u16 u32 u64 u128 usize
+    i8 i16 i32 i64 i128 isize
+}
diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs
index ca661bac78c..44324804468 100644
--- a/compiler/rustc_serialize/src/leb128.rs
+++ b/compiler/rustc_serialize/src/leb128.rs
@@ -1,6 +1,10 @@
 use crate::opaque::MemDecoder;
 use crate::serialize::Decoder;
 
+// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
+// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
+use crate::int_overflow::DebugStrictAdd;
+
 /// Returns the length of the longest LEB128 encoding for `T`, assuming `T` is an integer type
 pub const fn max_leb128_len<T>() -> usize {
     // The longest LEB128 encoding for an integer uses 7 bits per byte.
@@ -24,7 +28,7 @@ macro_rules! impl_write_unsigned_leb128 {
                         *out.get_unchecked_mut(i) = value as u8;
                     }
 
-                    i += 1;
+                    i = i.debug_strict_add(1);
                     break;
                 } else {
                     unsafe {
@@ -32,7 +36,7 @@ macro_rules! impl_write_unsigned_leb128 {
                     }
 
                     value >>= 7;
-                    i += 1;
+                    i = i.debug_strict_add(1);
                 }
             }
 
@@ -69,7 +73,7 @@ pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty {
                 } else {
                     result |= ((byte & 0x7F) as $int_ty) << shift;
                 }
-                shift += 7;
+                shift = shift.debug_strict_add(7);
             }
         }
     };
@@ -101,7 +105,7 @@ macro_rules! impl_write_signed_leb128 {
                     *out.get_unchecked_mut(i) = byte;
                 }
 
-                i += 1;
+                i = i.debug_strict_add(1);
 
                 if !more {
                     break;
@@ -130,7 +134,7 @@ pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty {
             loop {
                 byte = decoder.read_u8();
                 result |= <$int_ty>::from(byte & 0x7F) << shift;
-                shift += 7;
+                shift = shift.debug_strict_add(7);
 
                 if (byte & 0x80) == 0 {
                     break;
diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs
index b67b7d79d97..5a9403e0a85 100644
--- a/compiler/rustc_serialize/src/lib.rs
+++ b/compiler/rustc_serialize/src/lib.rs
@@ -23,5 +23,6 @@
 
 mod serialize;
 
+pub mod int_overflow;
 pub mod leb128;
 pub mod opaque;
diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs
index cc8d1c25092..eec83c02d35 100644
--- a/compiler/rustc_serialize/src/opaque.rs
+++ b/compiler/rustc_serialize/src/opaque.rs
@@ -7,6 +7,10 @@
 use std::path::Path;
 use std::path::PathBuf;
 
+// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
+// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
+use crate::int_overflow::DebugStrictAdd;
+
 // -----------------------------------------------------------------------------
 // Encoder
 // -----------------------------------------------------------------------------
@@ -65,7 +69,7 @@ pub fn position(&self) -> usize {
         // Tracking position this way instead of having a `self.position` field
         // means that we only need to update `self.buffered` on a write call,
         // as opposed to updating `self.position` and `self.buffered`.
-        self.flushed + self.buffered
+        self.flushed.debug_strict_add(self.buffered)
     }
 
     #[cold]
@@ -119,7 +123,7 @@ fn write_all(&mut self, buf: &[u8]) {
         }
         if let Some(dest) = self.buffer_empty().get_mut(..buf.len()) {
             dest.copy_from_slice(buf);
-            self.buffered += buf.len();
+            self.buffered = self.buffered.debug_strict_add(buf.len());
         } else {
             self.write_all_cold_path(buf);
         }
@@ -158,7 +162,7 @@ fn write_all(&mut self, buf: &[u8]) {
         if written > N {
             Self::panic_invalid_write::<N>(written);
         }
-        self.buffered += written;
+        self.buffered = self.buffered.debug_strict_add(written);
     }
 
     #[cold]
diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs
index e162695a13b..788a52faf56 100644
--- a/compiler/rustc_span/src/span_encoding.rs
+++ b/compiler/rustc_span/src/span_encoding.rs
@@ -5,6 +5,10 @@
 
 use rustc_data_structures::fx::FxIndexSet;
 
+// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
+// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
+use rustc_serialize::int_overflow::DebugStrictAdd;
+
 /// A compressed span.
 ///
 /// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
@@ -166,7 +170,7 @@ pub fn data_untracked(self) -> SpanData {
                 debug_assert!(len <= MAX_LEN);
                 SpanData {
                     lo: BytePos(self.lo_or_index),
-                    hi: BytePos(self.lo_or_index + len),
+                    hi: BytePos(self.lo_or_index.debug_strict_add(len)),
                     ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32),
                     parent: None,
                 }
@@ -179,7 +183,7 @@ pub fn data_untracked(self) -> SpanData {
                 };
                 SpanData {
                     lo: BytePos(self.lo_or_index),
-                    hi: BytePos(self.lo_or_index + len),
+                    hi: BytePos(self.lo_or_index.debug_strict_add(len)),
                     ctxt: SyntaxContext::root(),
                     parent: Some(parent),
                 }