Auto merge of #97862 - SparrowLii:superset, r=lcnr
optimize `superset` method of `IntervalSet` Given that intervals in the `IntervalSet` are sorted and strictly separated( it means the `end` of the previous interval will not be equal to the `start` of the next interval), we can reduce the complexity of the `superset` method from O(NMlogN) to O(2N) (N is the number of intervals and M is the length of each interval)
This commit is contained in:
commit
6dc598a01b
@ -1,7 +1,7 @@
|
|||||||
use std::iter::Step;
|
use std::iter::Step;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::ops::Bound;
|
|
||||||
use std::ops::RangeBounds;
|
use std::ops::RangeBounds;
|
||||||
|
use std::ops::{Bound, Range};
|
||||||
|
|
||||||
use crate::vec::Idx;
|
use crate::vec::Idx;
|
||||||
use crate::vec::IndexVec;
|
use crate::vec::IndexVec;
|
||||||
@ -11,6 +11,10 @@
|
|||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
/// Stores a set of intervals on the indices.
|
/// Stores a set of intervals on the indices.
|
||||||
|
///
|
||||||
|
/// The elements in `map` are sorted and non-adjacent, which means
|
||||||
|
/// the second value of the previous element is *greater* than the
|
||||||
|
/// first value of the following element.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct IntervalSet<I> {
|
pub struct IntervalSet<I> {
|
||||||
// Start, end
|
// Start, end
|
||||||
@ -84,7 +88,7 @@ pub fn insert_range(&mut self, range: impl RangeBounds<I> + Clone) -> bool {
|
|||||||
// continue to the next range. We're looking here for the first
|
// continue to the next range. We're looking here for the first
|
||||||
// range which starts *non-adjacently* to our end.
|
// range which starts *non-adjacently* to our end.
|
||||||
let next = self.map.partition_point(|r| r.0 <= end + 1);
|
let next = self.map.partition_point(|r| r.0 <= end + 1);
|
||||||
if let Some(right) = next.checked_sub(1) {
|
let result = if let Some(right) = next.checked_sub(1) {
|
||||||
let (prev_start, prev_end) = self.map[right];
|
let (prev_start, prev_end) = self.map[right];
|
||||||
if prev_end + 1 >= start {
|
if prev_end + 1 >= start {
|
||||||
// If the start for the inserted range is adjacent to the
|
// If the start for the inserted range is adjacent to the
|
||||||
@ -99,7 +103,7 @@ pub fn insert_range(&mut self, range: impl RangeBounds<I> + Clone) -> bool {
|
|||||||
if left != right {
|
if left != right {
|
||||||
self.map.drain(left..right);
|
self.map.drain(left..right);
|
||||||
}
|
}
|
||||||
return true;
|
true
|
||||||
} else {
|
} else {
|
||||||
// We overlap with the previous range, increase it to
|
// We overlap with the previous range, increase it to
|
||||||
// include us.
|
// include us.
|
||||||
@ -107,17 +111,17 @@ pub fn insert_range(&mut self, range: impl RangeBounds<I> + Clone) -> bool {
|
|||||||
// Make sure we're actually going to *increase* it though --
|
// Make sure we're actually going to *increase* it though --
|
||||||
// it may be that end is just inside the previously existing
|
// it may be that end is just inside the previously existing
|
||||||
// set.
|
// set.
|
||||||
return if end > prev_end {
|
if end > prev_end {
|
||||||
self.map[right].1 = end;
|
self.map[right].1 = end;
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, we don't overlap, so just insert
|
// Otherwise, we don't overlap, so just insert
|
||||||
self.map.insert(right + 1, (start, end));
|
self.map.insert(right + 1, (start, end));
|
||||||
return true;
|
true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if self.map.is_empty() {
|
if self.map.is_empty() {
|
||||||
@ -127,8 +131,16 @@ pub fn insert_range(&mut self, range: impl RangeBounds<I> + Clone) -> bool {
|
|||||||
} else {
|
} else {
|
||||||
self.map.insert(next, (start, end));
|
self.map.insert(next, (start, end));
|
||||||
}
|
}
|
||||||
return true;
|
true
|
||||||
}
|
};
|
||||||
|
debug_assert!(
|
||||||
|
self.check_invariants(),
|
||||||
|
"wrong intervals after insert {:?}..={:?} to {:?}",
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
self
|
||||||
|
);
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn contains(&self, needle: I) -> bool {
|
pub fn contains(&self, needle: I) -> bool {
|
||||||
@ -145,9 +157,26 @@ pub fn superset(&self, other: &IntervalSet<I>) -> bool
|
|||||||
where
|
where
|
||||||
I: Step,
|
I: Step,
|
||||||
{
|
{
|
||||||
// FIXME: Performance here is probably not great. We will be doing a lot
|
let mut sup_iter = self.iter_intervals();
|
||||||
// of pointless tree traversals.
|
let mut current = None;
|
||||||
other.iter().all(|elem| self.contains(elem))
|
let contains = |sup: Range<I>, sub: Range<I>, current: &mut Option<Range<I>>| {
|
||||||
|
if sup.end < sub.start {
|
||||||
|
// if `sup.end == sub.start`, the next sup doesn't contain `sub.start`
|
||||||
|
None // continue to the next sup
|
||||||
|
} else if sup.end >= sub.end && sup.start <= sub.start {
|
||||||
|
*current = Some(sup); // save the current sup
|
||||||
|
Some(true)
|
||||||
|
} else {
|
||||||
|
Some(false)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
other.iter_intervals().all(|sub| {
|
||||||
|
current
|
||||||
|
.take()
|
||||||
|
.and_then(|sup| contains(sup, sub.clone(), &mut current))
|
||||||
|
.or_else(|| sup_iter.find_map(|sup| contains(sup, sub.clone(), &mut current)))
|
||||||
|
.unwrap_or(false)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
@ -174,7 +203,10 @@ pub fn last_set_in(&self, range: impl RangeBounds<I> + Clone) -> Option<I> {
|
|||||||
|
|
||||||
pub fn insert_all(&mut self) {
|
pub fn insert_all(&mut self) {
|
||||||
self.clear();
|
self.clear();
|
||||||
self.map.push((0, self.domain.try_into().unwrap()));
|
if let Some(end) = self.domain.checked_sub(1) {
|
||||||
|
self.map.push((0, end.try_into().unwrap()));
|
||||||
|
}
|
||||||
|
debug_assert!(self.check_invariants());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn union(&mut self, other: &IntervalSet<I>) -> bool
|
pub fn union(&mut self, other: &IntervalSet<I>) -> bool
|
||||||
@ -186,8 +218,21 @@ pub fn union(&mut self, other: &IntervalSet<I>) -> bool
|
|||||||
for range in other.iter_intervals() {
|
for range in other.iter_intervals() {
|
||||||
did_insert |= self.insert_range(range);
|
did_insert |= self.insert_range(range);
|
||||||
}
|
}
|
||||||
|
debug_assert!(self.check_invariants());
|
||||||
did_insert
|
did_insert
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check the intervals are valid, sorted and non-adjacent
|
||||||
|
fn check_invariants(&self) -> bool {
|
||||||
|
let mut current: Option<u32> = None;
|
||||||
|
for (start, end) in &self.map {
|
||||||
|
if start > end || current.map_or(false, |x| x + 1 >= *start) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
current = Some(*end);
|
||||||
|
}
|
||||||
|
current.map_or(true, |x| x < self.domain as u32)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This data structure optimizes for cases where the stored bits in each row
|
/// This data structure optimizes for cases where the stored bits in each row
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn insert_collapses() {
|
fn insert_collapses() {
|
||||||
let mut set = IntervalSet::<u32>::new(3000);
|
let mut set = IntervalSet::<u32>::new(10000);
|
||||||
set.insert_range(9831..=9837);
|
set.insert_range(9831..=9837);
|
||||||
set.insert_range(43..=9830);
|
set.insert_range(43..=9830);
|
||||||
assert_eq!(set.iter_intervals().collect::<Vec<_>>(), [43..9838]);
|
assert_eq!(set.iter_intervals().collect::<Vec<_>>(), [43..9838]);
|
||||||
|
Loading…
Reference in New Issue
Block a user