rust/src/range_map.rs

301 lines
12 KiB
Rust
Raw Normal View History

2017-12-14 04:03:55 -06:00
//! Implements a map from integer indices to data.
//! Rather than storing data for every index, internally, this maps entire ranges to the data.
//! To this end, the APIs all work on ranges, not on individual integers. Ranges are split as
2019-02-15 19:29:38 -06:00
//! necessary (e.g., when [0,5) is first associated with X, and then [1,2) is mutated).
2017-12-14 04:03:55 -06:00
//! Users must not depend on whether a range is coalesced or not, even though this is observable
//! via the iteration APIs.
2017-12-14 04:03:55 -06:00
use std::ops;
2020-04-02 17:05:35 -05:00
use rustc_target::abi::Size;
2018-10-16 11:01:50 -05:00
#[derive(Clone, Debug)]
struct Elem<T> {
2019-02-26 12:37:05 -06:00
/// The range covered by this element; never empty.
2019-02-15 19:29:38 -06:00
range: ops::Range<u64>,
2019-02-26 12:37:05 -06:00
/// The data stored for this element.
data: T,
2017-12-14 04:03:55 -06:00
}
#[derive(Clone, Debug)]
pub struct RangeMap<T> {
v: Vec<Elem<T>>,
2017-12-14 04:03:55 -06:00
}
impl<T> RangeMap<T> {
2019-02-15 19:29:38 -06:00
/// Creates a new `RangeMap` for the given size, and with the given initial value used for
/// the entire range.
2018-10-16 11:01:50 -05:00
#[inline(always)]
pub fn new(size: Size, init: T) -> RangeMap<T> {
let size = size.bytes();
let mut map = RangeMap { v: Vec::new() };
if size > 0 {
2019-12-23 05:56:23 -06:00
map.v.push(Elem { range: 0..size, data: init });
}
map
2017-12-14 04:03:55 -06:00
}
2019-02-15 19:29:38 -06:00
/// Finds the index containing the given offset.
fn find_offset(&self, offset: u64) -> usize {
2019-02-15 19:29:38 -06:00
// We do a binary search.
let mut left = 0usize; // inclusive
let mut right = self.v.len(); // exclusive
loop {
debug_assert!(left < right, "find_offset: offset {} is out-of-bounds", offset);
let candidate = left.checked_add(right).unwrap() / 2;
let elem = &self.v[candidate];
if offset < elem.range.start {
2019-02-26 12:37:05 -06:00
// We are too far right (offset is further left).
debug_assert!(candidate < right); // we are making progress
right = candidate;
} else if offset >= elem.range.end {
2019-02-26 12:37:05 -06:00
// We are too far left (offset is further right).
debug_assert!(candidate >= left); // we are making progress
2019-12-23 05:56:23 -06:00
left = candidate + 1;
} else {
// This is it!
return candidate;
}
}
2017-12-14 04:03:55 -06:00
}
2019-02-15 19:29:38 -06:00
/// Provides read-only iteration over everything in the given range. This does
/// *not* split items if they overlap with the edges. Do not use this to mutate
/// through interior mutability.
///
/// The iterator also provides the offset of the given element.
Resolve clippy::needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:66:5 | 66 | pub fn iter<'a>(&'a self, offset: Size, len: Size) -> impl Iterator<Item = (Size, &'a T)> + 'a { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: `-D clippy::needless-lifetimes` implied by `-D clippy::all` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:86:5 | 86 | pub fn iter_mut_all<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> + 'a { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:122:5 | 122 | / pub fn iter_mut<'a>( 123 | | &'a mut self, 124 | | offset: Size, 125 | | len: Size, 126 | | ) -> impl Iterator<Item = (Size, &'a mut T)> + 'a | |_____________________________________________________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/shims/intrinsics.rs:1391:1 | 1391 | fn simd_element_to_bool<'tcx>(elem: ImmTy<'tcx, Tag>) -> InterpResult<'tcx, bool> { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes
2022-04-29 17:43:36 -05:00
pub fn iter(&self, offset: Size, len: Size) -> impl Iterator<Item = (Size, &T)> {
let offset = offset.bytes();
let len = len.bytes();
2019-02-15 19:29:38 -06:00
// Compute a slice starting with the elements we care about.
let slice: &[Elem<T>] = if len == 0 {
2019-12-23 05:56:23 -06:00
// We just need any empty iterator. We don't even want to
// yield the element that surrounds this position.
&[]
} else {
let first_idx = self.find_offset(offset);
&self.v[first_idx..]
};
2019-02-15 19:29:38 -06:00
// The first offset that is not included any more.
let end = offset + len;
2022-07-14 13:27:24 -05:00
assert!(
end <= self.v.last().unwrap().range.end,
"iterating beyond the bounds of this RangeMap"
);
2021-05-16 04:28:01 -05:00
slice
.iter()
.take_while(move |elem| elem.range.start < end)
.map(|elem| (Size::from_bytes(elem.range.start), &elem.data))
2017-12-14 04:03:55 -06:00
}
Resolve clippy::needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:66:5 | 66 | pub fn iter<'a>(&'a self, offset: Size, len: Size) -> impl Iterator<Item = (Size, &'a T)> + 'a { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: `-D clippy::needless-lifetimes` implied by `-D clippy::all` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:86:5 | 86 | pub fn iter_mut_all<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> + 'a { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:122:5 | 122 | / pub fn iter_mut<'a>( 123 | | &'a mut self, 124 | | offset: Size, 125 | | len: Size, 126 | | ) -> impl Iterator<Item = (Size, &'a mut T)> + 'a | |_____________________________________________________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/shims/intrinsics.rs:1391:1 | 1391 | fn simd_element_to_bool<'tcx>(elem: ImmTy<'tcx, Tag>) -> InterpResult<'tcx, bool> { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes
2022-04-29 17:43:36 -05:00
pub fn iter_mut_all(&mut self) -> impl Iterator<Item = &mut T> {
self.v.iter_mut().map(|elem| &mut elem.data)
}
2019-02-15 19:29:38 -06:00
// Splits the element situated at the given `index`, such that the 2nd one starts at offset
// `split_offset`. Do nothing if the element already starts there.
// Returns whether a split was necessary.
fn split_index(&mut self, index: usize, split_offset: u64) -> bool
2017-12-14 04:03:55 -06:00
where
T: Clone,
{
let elem = &mut self.v[index];
if split_offset == elem.range.start || split_offset == elem.range.end {
2019-02-15 19:29:38 -06:00
// Nothing to do.
return false;
2017-12-14 04:03:55 -06:00
}
2019-12-23 05:56:23 -06:00
debug_assert!(
elem.range.contains(&split_offset),
"the `split_offset` is not in the element to be split"
);
2019-02-15 19:29:38 -06:00
// Now we really have to split. Reduce length of first element.
let second_range = split_offset..elem.range.end;
elem.range.end = split_offset;
2019-02-15 19:29:38 -06:00
// Copy the data, and insert second element.
2019-12-23 05:56:23 -06:00
let second = Elem { range: second_range, data: elem.data.clone() };
self.v.insert(index + 1, second);
Resolve clippy::needless_return error: unneeded `return` statement --> src/helpers.rs:734:13 | 734 | return Ok(()); | ^^^^^^^^^^^^^^ help: remove `return`: `Ok(())` | = note: `-D clippy::needless-return` implied by `-D clippy::all` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_return error: unneeded `return` statement --> src/range_map.rs:113:9 | 113 | return true; | ^^^^^^^^^^^^ help: remove `return`: `true` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_return error: unneeded `return` statement --> src/shims/posix/fs.rs:648:25 | 648 | None => return this.handle_not_found(), | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove `return`: `this.handle_not_found()` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_return error: unneeded `return` statement --> src/shims/panic.rs:62:9 | 62 | return Ok(()); | ^^^^^^^^^^^^^^ help: remove `return`: `Ok(())` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_return error: unneeded `return` statement --> src/shims/panic.rs:115:9 | 115 | return Ok(()); | ^^^^^^^^^^^^^^ help: remove `return`: `Ok(())` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_return error: unneeded `return` statement --> src/thread.rs:477:9 | 477 | return free_tls_statics; | ^^^^^^^^^^^^^^^^^^^^^^^^ help: remove `return`: `free_tls_statics` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_return error: unneeded `return` statement --> src/thread.rs:459:17 | 459 | return false; | ^^^^^^^^^^^^^ help: remove `return`: `false` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_return
2022-04-29 17:48:24 -05:00
true
2017-12-14 04:03:55 -06:00
}
2019-02-15 19:29:38 -06:00
/// Provides mutable iteration over everything in the given range. As a side-effect,
2017-12-14 04:03:55 -06:00
/// this will split entries in the map that are only partially hit by the given range,
/// to make sure that when they are mutated, the effect is constrained to the given range.
2019-01-07 12:41:18 -06:00
/// Moreover, this will opportunistically merge neighbouring equal blocks.
///
/// The iterator also provides the offset of the given element.
Resolve clippy::needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:66:5 | 66 | pub fn iter<'a>(&'a self, offset: Size, len: Size) -> impl Iterator<Item = (Size, &'a T)> + 'a { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: `-D clippy::needless-lifetimes` implied by `-D clippy::all` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:86:5 | 86 | pub fn iter_mut_all<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> + 'a { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/range_map.rs:122:5 | 122 | / pub fn iter_mut<'a>( 123 | | &'a mut self, 124 | | offset: Size, 125 | | len: Size, 126 | | ) -> impl Iterator<Item = (Size, &'a mut T)> + 'a | |_____________________________________________________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes error: explicit lifetimes given in parameter types where they could be elided (or replaced with `'_` if needed by type declaration) --> src/shims/intrinsics.rs:1391:1 | 1391 | fn simd_element_to_bool<'tcx>(elem: ImmTy<'tcx, Tag>) -> InterpResult<'tcx, bool> { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_lifetimes
2022-04-29 17:43:36 -05:00
pub fn iter_mut(&mut self, offset: Size, len: Size) -> impl Iterator<Item = (Size, &mut T)>
2017-12-14 04:03:55 -06:00
where
T: Clone + PartialEq,
2017-12-14 04:03:55 -06:00
{
2018-10-16 11:01:50 -05:00
let offset = offset.bytes();
let len = len.bytes();
// Compute a slice containing exactly the elements we care about
let slice: &mut [Elem<T>] = if len == 0 {
2019-12-23 05:56:23 -06:00
// We just need any empty iterator. We don't even want to
// yield the element that surrounds this position, nor do
// any splitting.
&mut []
} else {
// Make sure we got a clear beginning
let mut first_idx = self.find_offset(offset);
if self.split_index(first_idx, offset) {
// The newly created 2nd element is ours
first_idx += 1;
}
// No more mutation.
let first_idx = first_idx;
// Find our end. Linear scan, but that's ok because the iteration
// is doing the same linear scan anyway -- no increase in complexity.
// We combine this scan with a scan for duplicates that we can merge, to reduce
// the number of elements.
// We stop searching after the first "block" of size 1, to avoid spending excessive
// amounts of time on the merging.
let mut equal_since_idx = first_idx;
// Once we see too many non-mergeable blocks, we stop.
// The initial value is chosen via... magic. Benchmarking and magic.
let mut successful_merge_count = 3usize;
// When the loop is done, this is the first excluded element.
let mut end_idx = first_idx;
loop {
// Compute if `end` is the last element we need to look at.
let done = self.v[end_idx].range.end >= offset + len;
// We definitely need to include `end`, so move the index.
end_idx += 1;
debug_assert!(
done || end_idx < self.v.len(),
"iter_mut: end-offset {} is out-of-bounds",
offset + len
);
// see if we want to merge everything in `equal_since..end` (exclusive at the end!)
if successful_merge_count > 0 {
if done || self.v[end_idx].data != self.v[equal_since_idx].data {
// Everything in `equal_since..end` was equal. Make them just one element covering
// the entire range.
let removed_elems = end_idx - equal_since_idx - 1; // number of elements that we would remove
if removed_elems > 0 {
// Adjust the range of the first element to cover all of them.
let equal_until = self.v[end_idx - 1].range.end; // end of range of last of the equal elements
self.v[equal_since_idx].range.end = equal_until;
// Delete the rest of them.
self.v.splice(equal_since_idx + 1..end_idx, std::iter::empty());
// Adjust `end_idx` because we made the list shorter.
end_idx -= removed_elems;
// Adjust the count for the cutoff.
successful_merge_count += removed_elems;
} else {
// Adjust the count for the cutoff.
successful_merge_count -= 1;
}
2019-12-23 05:56:23 -06:00
// Go on scanning for the next block starting here.
equal_since_idx = end_idx;
}
2017-12-14 04:03:55 -06:00
}
2019-12-23 05:56:23 -06:00
// Leave loop if this is the last element.
if done {
break;
}
}
// Move to last included instead of first excluded index.
let end_idx = end_idx - 1;
// We need to split the end as well. Even if this performs a
// split, we don't have to adjust our index as we only care about
// the first part of the split.
self.split_index(end_idx, offset + len);
// Now we yield the slice. `end` is inclusive.
&mut self.v[first_idx..=end_idx]
};
slice.iter_mut().map(|elem| (Size::from_bytes(elem.range.start), &mut elem.data))
2017-12-14 04:03:55 -06:00
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Query the map at every offset in the range and collect the results.
fn to_vec<T: Copy>(map: &RangeMap<T>, offset: u64, len: u64) -> Vec<T> {
2017-12-14 04:03:55 -06:00
(offset..offset + len)
.into_iter()
2021-05-16 04:28:01 -05:00
.map(|i| {
map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap()
})
2017-12-14 04:03:55 -06:00
.collect()
}
#[test]
fn basic_insert() {
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
2019-02-15 19:29:38 -06:00
// Insert.
for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) {
2017-12-14 04:03:55 -06:00
*x = 42;
}
2019-02-15 19:29:38 -06:00
// Check.
assert_eq!(to_vec(&map, 10, 1), vec![42]);
assert_eq!(map.v.len(), 3);
2018-10-16 11:01:50 -05:00
2019-02-15 19:29:38 -06:00
// Insert with size 0.
for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(0)) {
2018-10-16 11:01:50 -05:00
*x = 19;
}
for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(0)) {
2018-10-16 11:01:50 -05:00
*x = 19;
}
assert_eq!(to_vec(&map, 10, 2), vec![42, -1]);
assert_eq!(map.v.len(), 3);
2017-12-14 04:03:55 -06:00
}
#[test]
fn gaps() {
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) {
2017-12-14 04:03:55 -06:00
*x = 42;
}
for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(1)) {
2018-10-16 11:01:50 -05:00
*x = 43;
2017-12-14 04:03:55 -06:00
}
assert_eq!(map.v.len(), 5);
2019-12-23 05:56:23 -06:00
assert_eq!(to_vec(&map, 10, 10), vec![-1, 42, -1, -1, -1, 43, -1, -1, -1, -1]);
2017-12-14 04:03:55 -06:00
for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(10)) {
2018-10-16 11:01:50 -05:00
if *x < 42 {
2017-12-14 04:03:55 -06:00
*x = 23;
}
}
assert_eq!(map.v.len(), 6);
2019-12-23 05:56:23 -06:00
assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 43, 23, 23, 23, 23]);
assert_eq!(to_vec(&map, 13, 5), vec![23, 23, 43, 23, 23]);
for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {
*x = 19;
}
assert_eq!(map.v.len(), 6);
2019-12-23 05:56:23 -06:00
assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
// Should be seeing two blocks with 19.
assert_eq!(
2021-05-16 04:28:01 -05:00
map.iter(Size::from_bytes(15), Size::from_bytes(2))
.map(|(_, &t)| t)
.collect::<Vec<_>>(),
2019-12-23 05:56:23 -06:00
vec![19, 19]
);
2019-02-15 19:29:38 -06:00
// A NOP `iter_mut` should trigger merging.
2019-12-23 05:56:23 -06:00
for _ in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {}
assert_eq!(map.v.len(), 5);
2019-12-23 05:56:23 -06:00
assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
2017-12-14 04:03:55 -06:00
}
2022-07-14 13:27:24 -05:00
#[test]
#[should_panic]
fn out_of_range_iter_mut() {
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11));
}
#[test]
#[should_panic]
fn out_of_range_iter() {
let map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11));
}
2017-12-14 04:03:55 -06:00
}