2017-12-14 04:03:55 -06:00
|
|
|
//! Implements a map from integer indices to data.
|
|
|
|
//! Rather than storing data for every index, internally, this maps entire ranges to the data.
|
|
|
|
//! To this end, the APIs all work on ranges, not on individual integers. Ranges are split as
|
2019-02-15 19:29:38 -06:00
|
|
|
//! necessary (e.g., when [0,5) is first associated with X, and then [1,2) is mutated).
|
2017-12-14 04:03:55 -06:00
|
|
|
//! Users must not depend on whether a range is coalesced or not, even though this is observable
|
|
|
|
//! via the iteration APIs.
|
2019-01-05 05:59:33 -06:00
|
|
|
|
2017-12-14 04:03:55 -06:00
|
|
|
use std::ops;
|
|
|
|
|
2018-10-16 11:01:50 -05:00
|
|
|
use rustc::ty::layout::Size;
|
|
|
|
|
2019-01-05 05:59:33 -06:00
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
struct Elem<T> {
|
2019-02-26 12:37:05 -06:00
|
|
|
/// The range covered by this element; never empty.
|
2019-02-15 19:29:38 -06:00
|
|
|
range: ops::Range<u64>,
|
2019-02-26 12:37:05 -06:00
|
|
|
/// The data stored for this element.
|
2019-01-05 05:59:33 -06:00
|
|
|
data: T,
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
2019-01-05 05:59:33 -06:00
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub struct RangeMap<T> {
|
|
|
|
v: Vec<Elem<T>>,
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<T> RangeMap<T> {
|
2019-02-15 19:29:38 -06:00
|
|
|
/// Creates a new `RangeMap` for the given size, and with the given initial value used for
|
2018-11-15 06:29:55 -06:00
|
|
|
/// the entire range.
|
2018-10-16 11:01:50 -05:00
|
|
|
#[inline(always)]
|
2018-11-15 06:29:55 -06:00
|
|
|
pub fn new(size: Size, init: T) -> RangeMap<T> {
|
2019-01-05 05:59:33 -06:00
|
|
|
let size = size.bytes();
|
|
|
|
let mut map = RangeMap { v: Vec::new() };
|
|
|
|
if size > 0 {
|
|
|
|
map.v.push(Elem {
|
2019-01-05 08:26:16 -06:00
|
|
|
range: 0..size,
|
2019-01-05 05:59:33 -06:00
|
|
|
data: init
|
|
|
|
});
|
2018-11-15 06:29:55 -06:00
|
|
|
}
|
|
|
|
map
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
|
2019-02-15 19:29:38 -06:00
|
|
|
/// Finds the index containing the given offset.
|
2019-01-05 05:59:33 -06:00
|
|
|
fn find_offset(&self, offset: u64) -> usize {
|
2019-02-15 19:29:38 -06:00
|
|
|
// We do a binary search.
|
2019-01-05 05:59:33 -06:00
|
|
|
let mut left = 0usize; // inclusive
|
|
|
|
let mut right = self.v.len(); // exclusive
|
|
|
|
loop {
|
2019-01-05 09:16:08 -06:00
|
|
|
debug_assert!(left < right, "find_offset: offset {} is out-of-bounds", offset);
|
2019-01-05 05:59:33 -06:00
|
|
|
let candidate = left.checked_add(right).unwrap() / 2;
|
|
|
|
let elem = &self.v[candidate];
|
2019-01-05 08:26:16 -06:00
|
|
|
if offset < elem.range.start {
|
2019-02-26 12:37:05 -06:00
|
|
|
// We are too far right (offset is further left).
|
2019-01-05 05:59:33 -06:00
|
|
|
debug_assert!(candidate < right); // we are making progress
|
|
|
|
right = candidate;
|
2019-01-05 08:26:16 -06:00
|
|
|
} else if offset >= elem.range.end {
|
2019-02-26 12:37:05 -06:00
|
|
|
// We are too far left (offset is further right).
|
2019-01-05 05:59:33 -06:00
|
|
|
debug_assert!(candidate >= left); // we are making progress
|
|
|
|
left = candidate+1;
|
|
|
|
} else {
|
|
|
|
// This is it!
|
|
|
|
return candidate;
|
|
|
|
}
|
|
|
|
}
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
|
2019-02-15 19:29:38 -06:00
|
|
|
/// Provides read-only iteration over everything in the given range. This does
|
|
|
|
/// *not* split items if they overlap with the edges. Do not use this to mutate
|
2018-11-15 06:29:55 -06:00
|
|
|
/// through interior mutability.
|
2018-10-16 11:01:50 -05:00
|
|
|
pub fn iter<'a>(&'a self, offset: Size, len: Size) -> impl Iterator<Item = &'a T> + 'a {
|
2019-01-05 05:59:33 -06:00
|
|
|
let offset = offset.bytes();
|
|
|
|
let len = len.bytes();
|
2019-02-15 19:29:38 -06:00
|
|
|
// Compute a slice starting with the elements we care about.
|
2019-01-05 05:59:33 -06:00
|
|
|
let slice: &[Elem<T>] = if len == 0 {
|
2019-02-15 19:29:38 -06:00
|
|
|
// We just need any empty iterator. We don't even want to
|
2019-01-05 05:59:33 -06:00
|
|
|
// yield the element that surrounds this position.
|
|
|
|
&[]
|
|
|
|
} else {
|
2019-01-05 08:26:16 -06:00
|
|
|
let first_idx = self.find_offset(offset);
|
|
|
|
&self.v[first_idx..]
|
2019-01-05 05:59:33 -06:00
|
|
|
};
|
2019-02-15 19:29:38 -06:00
|
|
|
// The first offset that is not included any more.
|
|
|
|
let end = offset + len;
|
2019-01-05 05:59:33 -06:00
|
|
|
slice.iter()
|
2019-01-05 08:26:16 -06:00
|
|
|
.take_while(move |elem| elem.range.start < end)
|
2019-01-05 05:59:33 -06:00
|
|
|
.map(|elem| &elem.data)
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
|
2018-10-17 08:15:53 -05:00
|
|
|
pub fn iter_mut_all<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> + 'a {
|
2019-01-05 05:59:33 -06:00
|
|
|
self.v.iter_mut().map(|elem| &mut elem.data)
|
2018-10-17 08:15:53 -05:00
|
|
|
}
|
|
|
|
|
2019-02-15 19:29:38 -06:00
|
|
|
// Splits the element situated at the given `index`, such that the 2nd one starts at offset
|
|
|
|
// `split_offset`. Do nothing if the element already starts there.
|
|
|
|
// Returns whether a split was necessary.
|
2019-01-05 05:59:33 -06:00
|
|
|
fn split_index(&mut self, index: usize, split_offset: u64) -> bool
|
2017-12-14 04:03:55 -06:00
|
|
|
where
|
|
|
|
T: Clone,
|
|
|
|
{
|
2019-01-05 05:59:33 -06:00
|
|
|
let elem = &mut self.v[index];
|
2019-01-05 08:26:16 -06:00
|
|
|
if split_offset == elem.range.start || split_offset == elem.range.end {
|
2019-02-15 19:29:38 -06:00
|
|
|
// Nothing to do.
|
2019-01-05 05:59:33 -06:00
|
|
|
return false;
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
2019-01-05 08:26:16 -06:00
|
|
|
debug_assert!(elem.range.contains(&split_offset),
|
2019-02-15 19:29:38 -06:00
|
|
|
"the `split_offset` is not in the element to be split");
|
2019-01-05 05:59:33 -06:00
|
|
|
|
2019-02-15 19:29:38 -06:00
|
|
|
// Now we really have to split. Reduce length of first element.
|
2019-01-05 08:26:16 -06:00
|
|
|
let second_range = split_offset..elem.range.end;
|
|
|
|
elem.range.end = split_offset;
|
2019-02-15 19:29:38 -06:00
|
|
|
// Copy the data, and insert second element.
|
2019-01-05 05:59:33 -06:00
|
|
|
let second = Elem {
|
2019-01-05 08:26:16 -06:00
|
|
|
range: second_range,
|
2019-01-05 05:59:33 -06:00
|
|
|
data: elem.data.clone(),
|
|
|
|
};
|
|
|
|
self.v.insert(index+1, second);
|
|
|
|
return true;
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
|
2019-02-15 19:29:38 -06:00
|
|
|
/// Provides mutable iteration over everything in the given range. As a side-effect,
|
2017-12-14 04:03:55 -06:00
|
|
|
/// this will split entries in the map that are only partially hit by the given range,
|
|
|
|
/// to make sure that when they are mutated, the effect is constrained to the given range.
|
2019-01-07 12:41:18 -06:00
|
|
|
/// Moreover, this will opportunistically merge neighbouring equal blocks.
|
2018-11-15 06:29:55 -06:00
|
|
|
pub fn iter_mut<'a>(
|
2017-12-14 04:03:55 -06:00
|
|
|
&'a mut self,
|
2018-10-16 11:01:50 -05:00
|
|
|
offset: Size,
|
|
|
|
len: Size,
|
2017-12-14 04:03:55 -06:00
|
|
|
) -> impl Iterator<Item = &'a mut T> + 'a
|
|
|
|
where
|
2019-01-05 08:26:16 -06:00
|
|
|
T: Clone + PartialEq,
|
2017-12-14 04:03:55 -06:00
|
|
|
{
|
2018-10-16 11:01:50 -05:00
|
|
|
let offset = offset.bytes();
|
|
|
|
let len = len.bytes();
|
2019-01-05 05:59:33 -06:00
|
|
|
// Compute a slice containing exactly the elements we care about
|
|
|
|
let slice: &mut [Elem<T>] = if len == 0 {
|
2019-02-15 19:29:38 -06:00
|
|
|
// We just need any empty iterator. We don't even want to
|
2019-01-05 05:59:33 -06:00
|
|
|
// yield the element that surrounds this position, nor do
|
|
|
|
// any splitting.
|
|
|
|
&mut []
|
|
|
|
} else {
|
|
|
|
// Make sure we got a clear beginning
|
2019-01-05 08:26:16 -06:00
|
|
|
let mut first_idx = self.find_offset(offset);
|
|
|
|
if self.split_index(first_idx, offset) {
|
2019-01-05 05:59:33 -06:00
|
|
|
// The newly created 2nd element is ours
|
2019-01-05 08:26:16 -06:00
|
|
|
first_idx += 1;
|
2019-01-05 05:59:33 -06:00
|
|
|
}
|
2019-01-05 08:26:16 -06:00
|
|
|
let first_idx = first_idx; // no more mutation
|
2019-02-15 19:29:38 -06:00
|
|
|
// Find our end. Linear scan, but that's ok because the iteration
|
2019-01-05 05:59:33 -06:00
|
|
|
// is doing the same linear scan anyway -- no increase in complexity.
|
2019-01-05 08:26:16 -06:00
|
|
|
// We combine this scan with a scan for duplicates that we can merge, to reduce
|
|
|
|
// the number of elements.
|
2019-01-07 12:36:25 -06:00
|
|
|
// We stop searching after the first "block" of size 1, to avoid spending excessive
|
|
|
|
// amounts of time on the merging.
|
2019-01-05 08:26:16 -06:00
|
|
|
let mut equal_since_idx = first_idx;
|
2019-01-07 12:36:25 -06:00
|
|
|
// Once we see too many non-mergeable blocks, we stop.
|
2019-02-15 19:29:38 -06:00
|
|
|
// The initial value is chosen via... magic. Benchmarking and magic.
|
2019-01-07 12:36:25 -06:00
|
|
|
let mut successful_merge_count = 3usize;
|
2019-01-05 08:26:16 -06:00
|
|
|
let mut end_idx = first_idx; // when the loop is done, this is the first excluded element.
|
2019-01-05 05:59:33 -06:00
|
|
|
loop {
|
2019-01-05 08:26:16 -06:00
|
|
|
// Compute if `end` is the last element we need to look at.
|
2019-06-29 07:15:05 -05:00
|
|
|
let done = self.v[end_idx].range.end >= offset+len;
|
2019-01-05 08:26:16 -06:00
|
|
|
// We definitely need to include `end`, so move the index.
|
|
|
|
end_idx += 1;
|
|
|
|
debug_assert!(done || end_idx < self.v.len(), "iter_mut: end-offset {} is out-of-bounds", offset+len);
|
|
|
|
// see if we want to merge everything in `equal_since..end` (exclusive at the end!)
|
2019-01-07 12:36:25 -06:00
|
|
|
if successful_merge_count > 0 {
|
|
|
|
if done || self.v[end_idx].data != self.v[equal_since_idx].data {
|
2019-02-15 19:29:38 -06:00
|
|
|
// Everything in `equal_since..end` was equal. Make them just one element covering
|
2019-01-07 12:36:25 -06:00
|
|
|
// the entire range.
|
|
|
|
let removed_elems = end_idx - equal_since_idx - 1; // number of elements that we would remove
|
|
|
|
if removed_elems > 0 {
|
|
|
|
// Adjust the range of the first element to cover all of them.
|
|
|
|
let equal_until = self.v[end_idx - 1].range.end; // end of range of last of the equal elements
|
|
|
|
self.v[equal_since_idx].range.end = equal_until;
|
|
|
|
// Delete the rest of them.
|
|
|
|
self.v.splice(equal_since_idx+1..end_idx, std::iter::empty());
|
|
|
|
// Adjust `end_idx` because we made the list shorter.
|
|
|
|
end_idx -= removed_elems;
|
2019-02-15 19:29:38 -06:00
|
|
|
// Adjust the count for the cutoff.
|
2019-01-07 12:36:25 -06:00
|
|
|
successful_merge_count += removed_elems;
|
|
|
|
} else {
|
2019-02-15 19:29:38 -06:00
|
|
|
// Adjust the count for the cutoff.
|
2019-01-07 12:36:25 -06:00
|
|
|
successful_merge_count -= 1;
|
|
|
|
}
|
|
|
|
// Go on scanning for the next block starting here.
|
|
|
|
equal_since_idx = end_idx;
|
2019-01-05 08:26:16 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// Leave loop if this is the last element.
|
|
|
|
if done {
|
2019-01-05 05:59:33 -06:00
|
|
|
break;
|
|
|
|
}
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
2019-02-15 19:29:38 -06:00
|
|
|
// Move to last included instead of first excluded index.
|
|
|
|
let end_idx = end_idx-1;
|
|
|
|
// We need to split the end as well. Even if this performs a
|
2019-01-05 05:59:33 -06:00
|
|
|
// split, we don't have to adjust our index as we only care about
|
|
|
|
// the first part of the split.
|
2019-01-05 08:26:16 -06:00
|
|
|
self.split_index(end_idx, offset+len);
|
2019-01-05 05:59:33 -06:00
|
|
|
// Now we yield the slice. `end` is inclusive.
|
2019-01-05 08:26:16 -06:00
|
|
|
&mut self.v[first_idx..=end_idx]
|
2019-01-05 05:59:33 -06:00
|
|
|
};
|
|
|
|
slice.iter_mut().map(|elem| &mut elem.data)
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
/// Query the map at every offset in the range and collect the results.
|
2018-11-15 06:29:55 -06:00
|
|
|
fn to_vec<T: Copy>(map: &RangeMap<T>, offset: u64, len: u64) -> Vec<T> {
|
2017-12-14 04:03:55 -06:00
|
|
|
(offset..offset + len)
|
|
|
|
.into_iter()
|
2018-10-16 11:01:50 -05:00
|
|
|
.map(|i| map
|
|
|
|
.iter(Size::from_bytes(i), Size::from_bytes(1))
|
|
|
|
.next()
|
|
|
|
.map(|&t| t)
|
|
|
|
.unwrap()
|
|
|
|
)
|
2017-12-14 04:03:55 -06:00
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn basic_insert() {
|
2018-11-15 06:29:55 -06:00
|
|
|
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
|
2019-02-15 19:29:38 -06:00
|
|
|
// Insert.
|
2018-10-16 11:01:50 -05:00
|
|
|
for x in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) {
|
2017-12-14 04:03:55 -06:00
|
|
|
*x = 42;
|
|
|
|
}
|
2019-02-15 19:29:38 -06:00
|
|
|
// Check.
|
2018-11-15 06:29:55 -06:00
|
|
|
assert_eq!(to_vec(&map, 10, 1), vec![42]);
|
2019-01-05 05:59:33 -06:00
|
|
|
assert_eq!(map.v.len(), 3);
|
2018-10-16 11:01:50 -05:00
|
|
|
|
2019-02-15 19:29:38 -06:00
|
|
|
// Insert with size 0.
|
2018-10-16 11:01:50 -05:00
|
|
|
for x in map.iter_mut(Size::from_bytes(10), Size::from_bytes(0)) {
|
|
|
|
*x = 19;
|
|
|
|
}
|
|
|
|
for x in map.iter_mut(Size::from_bytes(11), Size::from_bytes(0)) {
|
|
|
|
*x = 19;
|
|
|
|
}
|
2018-11-15 06:29:55 -06:00
|
|
|
assert_eq!(to_vec(&map, 10, 2), vec![42, -1]);
|
2019-01-05 05:59:33 -06:00
|
|
|
assert_eq!(map.v.len(), 3);
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn gaps() {
|
2018-11-15 06:29:55 -06:00
|
|
|
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
|
2018-10-16 11:01:50 -05:00
|
|
|
for x in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) {
|
2017-12-14 04:03:55 -06:00
|
|
|
*x = 42;
|
|
|
|
}
|
2018-10-16 11:01:50 -05:00
|
|
|
for x in map.iter_mut(Size::from_bytes(15), Size::from_bytes(1)) {
|
|
|
|
*x = 43;
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
2019-01-05 05:59:33 -06:00
|
|
|
assert_eq!(map.v.len(), 5);
|
2018-10-16 11:01:50 -05:00
|
|
|
assert_eq!(
|
2018-11-15 06:29:55 -06:00
|
|
|
to_vec(&map, 10, 10),
|
2018-10-16 11:01:50 -05:00
|
|
|
vec![-1, 42, -1, -1, -1, 43, -1, -1, -1, -1]
|
|
|
|
);
|
2017-12-14 04:03:55 -06:00
|
|
|
|
2018-10-16 11:01:50 -05:00
|
|
|
for x in map.iter_mut(Size::from_bytes(10), Size::from_bytes(10)) {
|
|
|
|
if *x < 42 {
|
2017-12-14 04:03:55 -06:00
|
|
|
*x = 23;
|
|
|
|
}
|
|
|
|
}
|
2019-01-05 05:59:33 -06:00
|
|
|
assert_eq!(map.v.len(), 6);
|
2017-12-14 04:03:55 -06:00
|
|
|
assert_eq!(
|
2018-11-15 06:29:55 -06:00
|
|
|
to_vec(&map, 10, 10),
|
2018-10-16 11:01:50 -05:00
|
|
|
vec![23, 42, 23, 23, 23, 43, 23, 23, 23, 23]
|
2017-12-14 04:03:55 -06:00
|
|
|
);
|
2018-11-15 06:29:55 -06:00
|
|
|
assert_eq!(to_vec(&map, 13, 5), vec![23, 23, 43, 23, 23]);
|
|
|
|
|
2019-01-05 08:26:16 -06:00
|
|
|
|
2019-01-05 05:59:33 -06:00
|
|
|
for x in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {
|
2018-11-15 06:29:55 -06:00
|
|
|
*x = 19;
|
|
|
|
}
|
2019-01-05 05:59:33 -06:00
|
|
|
assert_eq!(map.v.len(), 6);
|
2019-01-05 08:26:16 -06:00
|
|
|
assert_eq!(
|
|
|
|
to_vec(&map, 10, 10),
|
|
|
|
vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]
|
|
|
|
);
|
2019-02-15 19:29:38 -06:00
|
|
|
// Should be seeing two blocks with 19.
|
2019-01-05 08:26:16 -06:00
|
|
|
assert_eq!(map.iter(Size::from_bytes(15), Size::from_bytes(2))
|
|
|
|
.map(|&t| t).collect::<Vec<_>>(), vec![19, 19]);
|
|
|
|
|
2019-02-15 19:29:38 -06:00
|
|
|
// A NOP `iter_mut` should trigger merging.
|
2019-07-06 02:32:35 -05:00
|
|
|
for _ in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) { }
|
2019-01-05 08:26:16 -06:00
|
|
|
assert_eq!(map.v.len(), 5);
|
|
|
|
assert_eq!(
|
|
|
|
to_vec(&map, 10, 10),
|
|
|
|
vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]
|
|
|
|
);
|
2017-12-14 04:03:55 -06:00
|
|
|
}
|
|
|
|
}
|