From bd93b7718efc4267e1106abb42b19c84ab0d5a86 Mon Sep 17 00:00:00 2001 From: Santiago Pastorino Date: Fri, 27 Dec 2019 08:56:52 -0300 Subject: [PATCH 1/2] Avoid memory copy logic for zsts Closes #67539 --- src/librustc_mir/interpret/memory.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 71e6d3e8ca1..3d59b33c15d 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -845,7 +845,15 @@ pub fn copy_repeatedly( let src_bytes = self.get_raw(src.alloc_id)?.get_bytes_with_undef_and_ptr(&tcx, src, size)?.as_ptr(); let dest_bytes = - self.get_raw_mut(dest.alloc_id)?.get_bytes_mut(&tcx, dest, size * length)?.as_mut_ptr(); + self.get_raw_mut(dest.alloc_id)?.get_bytes_mut(&tcx, dest, size * length)?; + + // If `dest_bytes` is empty we just optimize to not run anything for zsts. + // See #67539 + if dest_bytes.is_empty() { + return Ok(()); + } + + let dest_bytes = dest_bytes.as_mut_ptr(); // SAFE: The above indexing would have panicked if there weren't at least `size` bytes // behind `src` and `dest`. Also, we use the overlapping-safe `ptr::copy` if `src` and From 250a636217977ece9bbfcf21e5af7600ee57b5c5 Mon Sep 17 00:00:00 2001 From: Santiago Pastorino Date: Fri, 27 Dec 2019 15:50:56 -0300 Subject: [PATCH 2/2] Avoid copying some undef memory in MIR During MIR interpretation it may happen that a place containing uninitialized bytes is copied. This would read the current representation of these bytes and write it to the destination even though they must, by definition, not matter to the execution. This elides that representation change when no bytes are defined in such a copy, saving some cpu cycles. In such a case, the memory of the target allocation is not touched at all which also means that sometimes no physical page backing the memory allocation of the representation needs to be provided by the OS at all, reducing memory pressure on the system. --- src/librustc/mir/interpret/allocation.rs | 8 ++++ src/librustc_mir/interpret/memory.rs | 52 +++++++++++++----------- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index 67f1c8072d6..a06b23367e6 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -594,6 +594,14 @@ pub struct AllocationDefinedness { ranges: smallvec::SmallVec<[u64; 1]>, } +impl AllocationDefinedness { + pub fn all_bytes_undef(&self) -> bool { + // The `ranges` are run-length encoded and of alternating definedness. + // So if `ranges.len() > 1` then the second block is a range of defined. + self.initial == false && self.ranges.len() == 1 + } +} + /// Transferring the definedness mask to other allocations. impl Allocation { /// Creates a run-length encoding of the undef mask. diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 3d59b33c15d..cb676821fd4 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -841,6 +841,9 @@ pub fn copy_repeatedly( let tcx = self.tcx.tcx; + // The bits have to be saved locally before writing to dest in case src and dest overlap. + assert_eq!(size.bytes() as usize as u64, size.bytes()); + // This checks relocation edges on the src. let src_bytes = self.get_raw(src.alloc_id)?.get_bytes_with_undef_and_ptr(&tcx, src, size)?.as_ptr(); @@ -855,6 +858,22 @@ pub fn copy_repeatedly( let dest_bytes = dest_bytes.as_mut_ptr(); + // Prepare a copy of the undef mask. + let compressed = self.get_raw(src.alloc_id)?.compress_undef_range(src, size); + + if compressed.all_bytes_undef() { + // Fast path: If all bytes are `undef` then there is nothing to copy. The target range + // is marked as undef but we otherwise omit changing the byte representation which may + // be arbitrary for undef bytes. + // This also avoids writing to the target bytes so that the backing allocation is never + // touched if the bytes stay undef for the whole interpreter execution. On contemporary + // operating system this can avoid physically allocating the page. + let dest_alloc = self.get_raw_mut(dest.alloc_id)?; + dest_alloc.mark_definedness(dest, size * length, false); + dest_alloc.mark_relocation_range(relocations); + return Ok(()); + } + // SAFE: The above indexing would have panicked if there weren't at least `size` bytes // behind `src` and `dest`. Also, we use the overlapping-safe `ptr::copy` if `src` and // `dest` could possibly overlap. @@ -889,8 +908,14 @@ pub fn copy_repeatedly( } } - // copy definedness to the destination - self.copy_undef_mask(src, dest, size, length)?; + // now fill in all the data + self.get_raw_mut(dest.alloc_id)?.mark_compressed_undef_range( + &compressed, + dest, + size, + length, + ); + // copy the relocations to the destination self.get_raw_mut(dest.alloc_id)?.mark_relocation_range(relocations); @@ -898,29 +923,8 @@ pub fn copy_repeatedly( } } -/// Undefined bytes +/// Machine pointer introspection. impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { - // FIXME: Add a fast version for the common, nonoverlapping case - fn copy_undef_mask( - &mut self, - src: Pointer, - dest: Pointer, - size: Size, - repeat: u64, - ) -> InterpResult<'tcx> { - // The bits have to be saved locally before writing to dest in case src and dest overlap. - assert_eq!(size.bytes() as usize as u64, size.bytes()); - - let src_alloc = self.get_raw(src.alloc_id)?; - let compressed = src_alloc.compress_undef_range(src, size); - - // now fill in all the data - let dest_allocation = self.get_raw_mut(dest.alloc_id)?; - dest_allocation.mark_compressed_undef_range(&compressed, dest, size, repeat); - - Ok(()) - } - pub fn force_ptr( &self, scalar: Scalar,