From 83aa6d4109f94730b62e275df30247091c629ce9 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 8 Jul 2021 18:37:59 -0400 Subject: [PATCH 1/5] Carefully remove bounds checks from some chunk iterators --- library/core/src/slice/iter.rs | 87 +++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 6 deletions(-) diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index ad1d6b8b846..1b9f64ff215 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -1474,7 +1474,24 @@ fn next_back(&mut self) -> Option<&'a [T]> { } else { let remainder = self.v.len() % self.chunk_size; let chunksz = if remainder != 0 { remainder } else { self.chunk_size }; - let (fst, snd) = self.v.split_at(self.v.len() - chunksz); + // SAFETY: split_at_unchecked requires the argument be less than or + // equal to the length. This is guaranteed, but subtle: We need the + // expression `self.v.len() - sz` not to overflow, which means we + // need `sz >= tmp_len`. + // + // `sz` will always either be `self.v.len() % self.chunk_size`, + // which will always evaluate to strictly less than `self.v.len()` + // (or panic, in the case that `self.chunk_size` is zero), or it can + // be `self.chunk_size`, in the case that the length is exactly + // divisible by the chunk size. + // + // While it seems like using `self.chunk_size` in this case could + // lead to a value greater than `self.v.len()`, it cannot: if + // `self.chunk_size` were greater than `self.v.len()`, then + // `self.v.len() % self.chunk_size` would have returned non-zero + // (note that in this branch of the `if`, we already know that + // `self.v` is non-empty). + let (fst, snd) = unsafe { self.v.split_at_unchecked(self.v.len() - chunksz) }; self.v = fst; Some(snd) } @@ -1639,7 +1656,26 @@ fn next_back(&mut self) -> Option<&'a mut [T]> { let sz = if remainder != 0 { remainder } else { self.chunk_size }; let tmp = mem::replace(&mut self.v, &mut []); let tmp_len = tmp.len(); - let (head, tail) = tmp.split_at_mut(tmp_len - sz); + // SAFETY: split_at_unchecked requires the argument be less than or + // equal to the length. This is guaranteed, but subtle: We need the + // expression `tmp_len - sz` not to overflow, which means we need + // `sz >= tmp_len`. + // + // `sz` will always either be `tmp_or_v.len() % self.chunk_size` + // (where `tmp_or_v` is the slice that at the time was `self.v` but + // now is `tmp`, and thus `tmp_len` and `tmp_or_v.len()` are the + // same), which will always evaluate to strictly less than + // `tmp_or_v.len()` (or panic, in the case that `self.chunk_size` is + // zero), or it can be `self.chunk_size`, in the case that the + // length is exactly divisible by the chunk size. + // + // While it seems like using `self.chunk_size` in this case could + // lead to a value greater than `tmp_len`, it cannot: if + // `self.chunk_size` were greater than `tmp_len`, then + // `tmp_or_v.len() % self.chunk_size` would have returned non-zero + // (note that in this branch of the `if`, we already know that + // `self.v` is non-empty). + let (head, tail) = unsafe { tmp.split_at_mut_unchecked(tmp_len - sz) }; self.v = head; Some(tail) } @@ -2409,7 +2445,12 @@ fn next(&mut self) -> Option<&'a [T]> { None } else { let chunksz = cmp::min(self.v.len(), self.chunk_size); - let (fst, snd) = self.v.split_at(self.v.len() - chunksz); + // SAFETY: split_at_unchecked just requires the argument be less + // than the length. This could only happen if the expression + // `self.v.len() - chunksz` overflows. This could only happen if + // `chunksz > self.v.len()`, which is impossible as we initialize it + // as the `min` of `self.v.len()` and `self.chunk_size`. + let (fst, snd) = unsafe { self.v.split_at_unchecked(self.v.len() - chunksz) }; self.v = fst; Some(snd) } @@ -2483,7 +2524,21 @@ fn next_back(&mut self) -> Option<&'a [T]> { } else { let remainder = self.v.len() % self.chunk_size; let chunksz = if remainder != 0 { remainder } else { self.chunk_size }; - let (fst, snd) = self.v.split_at(chunksz); + // SAFETY: split_at_unchecked requires the argument be less than or + // equal to the length. This is guaranteed, but subtle: `chunksz` + // will always either be `self.v.len() % self.chunk_size`, which + // will always evaluate to strictly less than `self.v.len()` (or + // panic, in the case that `self.chunk_size` is zero), or it can be + // `self.chunk_size`, in the case that the length is exactly + // divisible by the chunk size. + // + // While it seems like using `self.chunk_size` in this case could + // lead to a value greater than `self.v.len()`, it cannot: if + // `self.chunk_size` were greater than `self.v.len()`, then + // `self.v.len() % self.chunk_size` would return nonzero (note that + // in this branch of the `if`, we already know that `self.v` is + // non-empty). + let (fst, snd) = unsafe { self.v.split_at_unchecked(chunksz) }; self.v = snd; Some(fst) } @@ -2569,7 +2624,12 @@ fn next(&mut self) -> Option<&'a mut [T]> { let sz = cmp::min(self.v.len(), self.chunk_size); let tmp = mem::replace(&mut self.v, &mut []); let tmp_len = tmp.len(); - let (head, tail) = tmp.split_at_mut(tmp_len - sz); + // SAFETY: split_at_mut_unchecked just requires the argument be less + // than the length. This could only happen if the expression + // `tmp_len - sz` overflows. This could only happen if `sz > + // tmp_len`, which is impossible as we initialize it as the `min` of + // `self.v.len()` (e.g. `tmp_len`) and `self.chunk_size`. + let (head, tail) = unsafe { tmp.split_at_mut_unchecked(tmp_len - sz) }; self.v = head; Some(tail) } @@ -2647,7 +2707,22 @@ fn next_back(&mut self) -> Option<&'a mut [T]> { let remainder = self.v.len() % self.chunk_size; let sz = if remainder != 0 { remainder } else { self.chunk_size }; let tmp = mem::replace(&mut self.v, &mut []); - let (head, tail) = tmp.split_at_mut(sz); + // SAFETY: split_at_mut_unchecked requires the argument be less than + // or equal to the length. This is guaranteed, but subtle: `chunksz` + // will always either be `tmp_or_v.len() % self.chunk_size` (where + // `tmp_or_v` is the slice that at the time was `self.v` but now is + // `tmp`), which will always evaluate to strictly less than + // `tmp_or_v.len()` (or panic, in the case that `self.chunk_size` is + // zero), or it can be `self.chunk_size`, in the case that the + // length is exactly divisible by the chunk size. + // + // While it seems like using `self.chunk_size` in this case could + // lead to a value greater than `tmp_or_v.len()`, it cannot: if + // `self.chunk_size` were greater than `tmp_or_v.len()`, then + // `tmp_or_v.len() % self.chunk_size` would return nonzero (note + // that in this branch of the `if`, we already know that `tmp_or_v` + // is non-empty). + let (head, tail) = unsafe { tmp.split_at_mut_unchecked(sz) }; self.v = tail; Some(head) } From e81fefaa5096b22c79d13df70eb59d2d66cc536c Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Fri, 29 Oct 2021 19:44:29 -0700 Subject: [PATCH 2/5] Address some issues in chunk iterator safety comments Co-authored-by: the8472 --- library/core/src/slice/iter.rs | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index 1b9f64ff215..18bf61aeb16 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -1475,22 +1475,19 @@ fn next_back(&mut self) -> Option<&'a [T]> { let remainder = self.v.len() % self.chunk_size; let chunksz = if remainder != 0 { remainder } else { self.chunk_size }; // SAFETY: split_at_unchecked requires the argument be less than or - // equal to the length. This is guaranteed, but subtle: We need the - // expression `self.v.len() - sz` not to overflow, which means we - // need `sz >= tmp_len`. - // - // `sz` will always either be `self.v.len() % self.chunk_size`, - // which will always evaluate to strictly less than `self.v.len()` - // (or panic, in the case that `self.chunk_size` is zero), or it can - // be `self.chunk_size`, in the case that the length is exactly + // equal to the length. This is guaranteed, but subtle: `chunksz` + // will always either be `self.v.len() % self.chunk_size`, which + // will always evaluate to strictly less than `self.v.len()` (or + // panic, in the case that `self.chunk_size` is zero), or it can be + // `self.chunk_size`, in the case that the length is exactly // divisible by the chunk size. // // While it seems like using `self.chunk_size` in this case could // lead to a value greater than `self.v.len()`, it cannot: if // `self.chunk_size` were greater than `self.v.len()`, then - // `self.v.len() % self.chunk_size` would have returned non-zero - // (note that in this branch of the `if`, we already know that - // `self.v` is non-empty). + // `self.v.len() % self.chunk_size` would return nonzero (note that + // in this branch of the `if`, we already know that `self.v` is + // non-empty). let (fst, snd) = unsafe { self.v.split_at_unchecked(self.v.len() - chunksz) }; self.v = fst; Some(snd) @@ -2524,20 +2521,7 @@ fn next_back(&mut self) -> Option<&'a [T]> { } else { let remainder = self.v.len() % self.chunk_size; let chunksz = if remainder != 0 { remainder } else { self.chunk_size }; - // SAFETY: split_at_unchecked requires the argument be less than or - // equal to the length. This is guaranteed, but subtle: `chunksz` - // will always either be `self.v.len() % self.chunk_size`, which - // will always evaluate to strictly less than `self.v.len()` (or - // panic, in the case that `self.chunk_size` is zero), or it can be - // `self.chunk_size`, in the case that the length is exactly - // divisible by the chunk size. - // - // While it seems like using `self.chunk_size` in this case could - // lead to a value greater than `self.v.len()`, it cannot: if - // `self.chunk_size` were greater than `self.v.len()`, then - // `self.v.len() % self.chunk_size` would return nonzero (note that - // in this branch of the `if`, we already know that `self.v` is - // non-empty). + // SAFETY: similar to Chunks::next_back let (fst, snd) = unsafe { self.v.split_at_unchecked(chunksz) }; self.v = snd; Some(fst) From b54381640d0cd844f6a739812eaaa202f08aeb03 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Fri, 29 Oct 2021 20:08:27 -0700 Subject: [PATCH 3/5] Reference Chunks::next_back in more of the chunk iterators safety comments --- library/core/src/slice/iter.rs | 36 ++-------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index 18bf61aeb16..27fcb8b5332 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -1653,25 +1653,7 @@ fn next_back(&mut self) -> Option<&'a mut [T]> { let sz = if remainder != 0 { remainder } else { self.chunk_size }; let tmp = mem::replace(&mut self.v, &mut []); let tmp_len = tmp.len(); - // SAFETY: split_at_unchecked requires the argument be less than or - // equal to the length. This is guaranteed, but subtle: We need the - // expression `tmp_len - sz` not to overflow, which means we need - // `sz >= tmp_len`. - // - // `sz` will always either be `tmp_or_v.len() % self.chunk_size` - // (where `tmp_or_v` is the slice that at the time was `self.v` but - // now is `tmp`, and thus `tmp_len` and `tmp_or_v.len()` are the - // same), which will always evaluate to strictly less than - // `tmp_or_v.len()` (or panic, in the case that `self.chunk_size` is - // zero), or it can be `self.chunk_size`, in the case that the - // length is exactly divisible by the chunk size. - // - // While it seems like using `self.chunk_size` in this case could - // lead to a value greater than `tmp_len`, it cannot: if - // `self.chunk_size` were greater than `tmp_len`, then - // `tmp_or_v.len() % self.chunk_size` would have returned non-zero - // (note that in this branch of the `if`, we already know that - // `self.v` is non-empty). + // SAFETY: Similar to `Chunks::next_back` let (head, tail) = unsafe { tmp.split_at_mut_unchecked(tmp_len - sz) }; self.v = head; Some(tail) @@ -2691,21 +2673,7 @@ fn next_back(&mut self) -> Option<&'a mut [T]> { let remainder = self.v.len() % self.chunk_size; let sz = if remainder != 0 { remainder } else { self.chunk_size }; let tmp = mem::replace(&mut self.v, &mut []); - // SAFETY: split_at_mut_unchecked requires the argument be less than - // or equal to the length. This is guaranteed, but subtle: `chunksz` - // will always either be `tmp_or_v.len() % self.chunk_size` (where - // `tmp_or_v` is the slice that at the time was `self.v` but now is - // `tmp`), which will always evaluate to strictly less than - // `tmp_or_v.len()` (or panic, in the case that `self.chunk_size` is - // zero), or it can be `self.chunk_size`, in the case that the - // length is exactly divisible by the chunk size. - // - // While it seems like using `self.chunk_size` in this case could - // lead to a value greater than `tmp_or_v.len()`, it cannot: if - // `self.chunk_size` were greater than `tmp_or_v.len()`, then - // `tmp_or_v.len() % self.chunk_size` would return nonzero (note - // that in this branch of the `if`, we already know that `tmp_or_v` - // is non-empty). + // SAFETY: Similar to `Chunks::next_back` let (head, tail) = unsafe { tmp.split_at_mut_unchecked(sz) }; self.v = tail; Some(head) From b8abd550bce859c7e13905c19cc638a16cfbc906 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Fri, 29 Oct 2021 20:18:41 -0700 Subject: [PATCH 4/5] Pull `self.v.len()` out in `RChunks::next` as suggested in review comments --- library/core/src/slice/iter.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index 27fcb8b5332..44ba494340a 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -2423,13 +2423,14 @@ fn next(&mut self) -> Option<&'a [T]> { if self.v.is_empty() { None } else { - let chunksz = cmp::min(self.v.len(), self.chunk_size); + let len = self.v.len(); + let chunksz = cmp::min(len, self.chunk_size); // SAFETY: split_at_unchecked just requires the argument be less - // than the length. This could only happen if the expression - // `self.v.len() - chunksz` overflows. This could only happen if - // `chunksz > self.v.len()`, which is impossible as we initialize it - // as the `min` of `self.v.len()` and `self.chunk_size`. - let (fst, snd) = unsafe { self.v.split_at_unchecked(self.v.len() - chunksz) }; + // than the length. This could only happen if the expression `len - + // chunksz` overflows. This could only happen if `chunksz > len`, + // which is impossible as we initialize it as the `min` of `len` and + // `self.chunk_size`. + let (fst, snd) = unsafe { self.v.split_at_unchecked(len - chunksz) }; self.v = fst; Some(snd) } From 9c62455e2f5c6b5a35a8e567dab673e725196162 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Mon, 31 Jan 2022 17:35:19 -0800 Subject: [PATCH 5/5] Improve test coverage of {Chunks,RChunks,RChunksMut}::{next,next_back} --- library/core/tests/slice.rs | 102 ++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/library/core/tests/slice.rs b/library/core/tests/slice.rs index 8d05e47edf4..d6cee973da6 100644 --- a/library/core/tests/slice.rs +++ b/library/core/tests/slice.rs @@ -249,6 +249,40 @@ fn test_chunks_nth() { assert_eq!(c2.next(), None); } +#[test] +fn test_chunks_next() { + let v = [0, 1, 2, 3, 4, 5]; + let mut c = v.chunks(2); + assert_eq!(c.next().unwrap(), &[0, 1]); + assert_eq!(c.next().unwrap(), &[2, 3]); + assert_eq!(c.next().unwrap(), &[4, 5]); + assert_eq!(c.next(), None); + + let v = [0, 1, 2, 3, 4, 5, 6, 7]; + let mut c = v.chunks(3); + assert_eq!(c.next().unwrap(), &[0, 1, 2]); + assert_eq!(c.next().unwrap(), &[3, 4, 5]); + assert_eq!(c.next().unwrap(), &[6, 7]); + assert_eq!(c.next(), None); +} + +#[test] +fn test_chunks_next_back() { + let v = [0, 1, 2, 3, 4, 5]; + let mut c = v.chunks(2); + assert_eq!(c.next_back().unwrap(), &[4, 5]); + assert_eq!(c.next_back().unwrap(), &[2, 3]); + assert_eq!(c.next_back().unwrap(), &[0, 1]); + assert_eq!(c.next_back(), None); + + let v = [0, 1, 2, 3, 4, 5, 6, 7]; + let mut c = v.chunks(3); + assert_eq!(c.next_back().unwrap(), &[6, 7]); + assert_eq!(c.next_back().unwrap(), &[3, 4, 5]); + assert_eq!(c.next_back().unwrap(), &[0, 1, 2]); + assert_eq!(c.next_back(), None); +} + #[test] fn test_chunks_nth_back() { let v: &[i32] = &[0, 1, 2, 3, 4, 5]; @@ -807,6 +841,40 @@ fn test_rchunks_nth_back() { assert_eq!(c2.next_back(), None); } +#[test] +fn test_rchunks_next() { + let v = [0, 1, 2, 3, 4, 5]; + let mut c = v.rchunks(2); + assert_eq!(c.next().unwrap(), &[4, 5]); + assert_eq!(c.next().unwrap(), &[2, 3]); + assert_eq!(c.next().unwrap(), &[0, 1]); + assert_eq!(c.next(), None); + + let v = [0, 1, 2, 3, 4, 5, 6, 7]; + let mut c = v.rchunks(3); + assert_eq!(c.next().unwrap(), &[5, 6, 7]); + assert_eq!(c.next().unwrap(), &[2, 3, 4]); + assert_eq!(c.next().unwrap(), &[0, 1]); + assert_eq!(c.next(), None); +} + +#[test] +fn test_rchunks_next_back() { + let v = [0, 1, 2, 3, 4, 5]; + let mut c = v.rchunks(2); + assert_eq!(c.next_back().unwrap(), &[0, 1]); + assert_eq!(c.next_back().unwrap(), &[2, 3]); + assert_eq!(c.next_back().unwrap(), &[4, 5]); + assert_eq!(c.next_back(), None); + + let v = [0, 1, 2, 3, 4, 5, 6, 7]; + let mut c = v.rchunks(3); + assert_eq!(c.next_back().unwrap(), &[0, 1]); + assert_eq!(c.next_back().unwrap(), &[2, 3, 4]); + assert_eq!(c.next_back().unwrap(), &[5, 6, 7]); + assert_eq!(c.next_back(), None); +} + #[test] fn test_rchunks_last() { let v: &[i32] = &[0, 1, 2, 3, 4, 5]; @@ -872,6 +940,40 @@ fn test_rchunks_mut_nth_back() { assert_eq!(c2.next_back(), None); } +#[test] +fn test_rchunks_mut_next() { + let mut v = [0, 1, 2, 3, 4, 5]; + let mut c = v.rchunks_mut(2); + assert_eq!(c.next().unwrap(), &mut [4, 5]); + assert_eq!(c.next().unwrap(), &mut [2, 3]); + assert_eq!(c.next().unwrap(), &mut [0, 1]); + assert_eq!(c.next(), None); + + let mut v = [0, 1, 2, 3, 4, 5, 6, 7]; + let mut c = v.rchunks_mut(3); + assert_eq!(c.next().unwrap(), &mut [5, 6, 7]); + assert_eq!(c.next().unwrap(), &mut [2, 3, 4]); + assert_eq!(c.next().unwrap(), &mut [0, 1]); + assert_eq!(c.next(), None); +} + +#[test] +fn test_rchunks_mut_next_back() { + let mut v = [0, 1, 2, 3, 4, 5]; + let mut c = v.rchunks_mut(2); + assert_eq!(c.next_back().unwrap(), &mut [0, 1]); + assert_eq!(c.next_back().unwrap(), &mut [2, 3]); + assert_eq!(c.next_back().unwrap(), &mut [4, 5]); + assert_eq!(c.next_back(), None); + + let mut v = [0, 1, 2, 3, 4, 5, 6, 7]; + let mut c = v.rchunks_mut(3); + assert_eq!(c.next_back().unwrap(), &mut [0, 1]); + assert_eq!(c.next_back().unwrap(), &mut [2, 3, 4]); + assert_eq!(c.next_back().unwrap(), &mut [5, 6, 7]); + assert_eq!(c.next_back(), None); +} + #[test] fn test_rchunks_mut_last() { let v: &mut [i32] = &mut [0, 1, 2, 3, 4, 5];