From 4c91bb9571ffbc7ddad52cc98552f5b19b0d44d7 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Tue, 11 Jun 2019 13:40:24 -0400 Subject: [PATCH] introduce a `VecGraph` abstraction that cheaply stores graphs This is perhaps better than the linked list approach I was using before. Lower memory overhead, Theta(N+E) storage. Does require a sort. =) --- src/librustc_data_structures/graph/mod.rs | 5 + src/librustc_data_structures/graph/scc/mod.rs | 21 +++- .../graph/vec_graph/mod.rs | 113 ++++++++++++++++++ .../graph/vec_graph/test.rs | 46 +++++++ src/librustc_data_structures/indexed_vec.rs | 15 ++- 5 files changed, 197 insertions(+), 3 deletions(-) create mode 100644 src/librustc_data_structures/graph/vec_graph/mod.rs create mode 100644 src/librustc_data_structures/graph/vec_graph/test.rs diff --git a/src/librustc_data_structures/graph/mod.rs b/src/librustc_data_structures/graph/mod.rs index 3d47b7d49fb..45e7e5db38f 100644 --- a/src/librustc_data_structures/graph/mod.rs +++ b/src/librustc_data_structures/graph/mod.rs @@ -5,6 +5,7 @@ pub mod iterate; mod reference; pub mod scc; +pub mod vec_graph; #[cfg(test)] mod test; @@ -17,6 +18,10 @@ pub trait WithNumNodes: DirectedGraph { fn num_nodes(&self) -> usize; } +pub trait WithNumEdges: DirectedGraph { + fn num_edges(&self) -> usize; +} + pub trait WithSuccessors: DirectedGraph where Self: for<'graph> GraphSuccessors<'graph, Item = ::Node>, diff --git a/src/librustc_data_structures/graph/scc/mod.rs b/src/librustc_data_structures/graph/scc/mod.rs index 7d29bd63707..78554cda77b 100644 --- a/src/librustc_data_structures/graph/scc/mod.rs +++ b/src/librustc_data_structures/graph/scc/mod.rs @@ -4,7 +4,8 @@ //! O(n) time. use crate::fx::FxHashSet; -use crate::graph::{DirectedGraph, WithNumNodes, WithSuccessors, GraphSuccessors}; +use crate::graph::{DirectedGraph, WithNumNodes, WithNumEdges, WithSuccessors, GraphSuccessors}; +use crate::graph::vec_graph::VecGraph; use crate::indexed_vec::{Idx, IndexVec}; use std::ops::Range; @@ -58,6 +59,18 @@ pub fn scc(&self, r: N) -> S { pub fn successors(&self, scc: S) -> &[S] { self.scc_data.successors(scc) } + + /// Construct the reverse graph of the SCC graph. + pub fn reverse(&self) -> VecGraph { + VecGraph::new( + self.num_sccs(), + self.all_sccs() + .flat_map(|source| self.successors(source).iter().map(move |&target| { + (target, source) + })) + .collect(), + ) + } } impl DirectedGraph for Sccs { @@ -70,6 +83,12 @@ fn num_nodes(&self) -> usize { } } +impl WithNumEdges for Sccs { + fn num_edges(&self) -> usize { + self.scc_data.all_successors.len() + } +} + impl GraphSuccessors<'graph> for Sccs { type Item = S; diff --git a/src/librustc_data_structures/graph/vec_graph/mod.rs b/src/librustc_data_structures/graph/vec_graph/mod.rs new file mode 100644 index 00000000000..6b3349e3e15 --- /dev/null +++ b/src/librustc_data_structures/graph/vec_graph/mod.rs @@ -0,0 +1,113 @@ +use crate::indexed_vec::{Idx, IndexVec}; +use crate::graph::{DirectedGraph, WithNumNodes, WithNumEdges, WithSuccessors, GraphSuccessors}; + +mod test; + +pub struct VecGraph { + /// Maps from a given node to an index where the set of successors + /// for that node starts. The index indexes into the `edges` + /// vector. To find the range for a given node, we look up the + /// start for that node and then the start for the next node + /// (i.e., with an index 1 higher) and get the range between the + /// two. This vector always has an extra entry so that this works + /// even for the max element. + node_starts: IndexVec, + + edge_targets: Vec, +} + +impl VecGraph { + pub fn new( + num_nodes: usize, + mut edge_pairs: Vec<(N, N)>, + ) -> Self { + // Sort the edges by the source -- this is important. + edge_pairs.sort(); + + let num_edges = edge_pairs.len(); + + // Store the *target* of each edge into `edge_targets` + let edge_targets: Vec = edge_pairs.iter().map(|&(_, target)| target).collect(); + + // Create the *edge starts* array. We are iterating over over + // the (sorted) edge pairs. We maintain the invariant that the + // length of the `node_starts` arary is enough to store the + // current source node -- so when we see that the source node + // for an edge is greater than the current length, we grow the + // edge-starts array by just enough. + let mut node_starts = IndexVec::with_capacity(num_edges); + for (index, &(source, _)) in edge_pairs.iter().enumerate() { + // If we have a list like `[(0, x), (2, y)]`: + // + // - Start out with `node_starts` of `[]` + // - Iterate to `(0, x)` at index 0: + // - Push one entry because `node_starts.len()` (0) is <= the source (0) + // - Leaving us with `node_starts` of `[0]` + // - Iterate to `(2, y)` at index 1: + // - Push one entry because `node_starts.len()` (1) is <= the source (2) + // - Push one entry because `node_starts.len()` (2) is <= the source (2) + // - Leaving us with `node_starts` of `[0, 1, 1]` + // - Loop terminates + while node_starts.len() <= source.index() { + node_starts.push(index); + } + } + + // Pad out the `node_starts` array so that it has `num_nodes + + // 1` entries. Continuing our example above, if `num_nodes` is + // be `3`, we would push one more index: `[0, 1, 1, 2]`. + // + // Interpretation of that vector: + // + // [0, 1, 1, 2] + // ---- range for N=2 + // ---- range for N=1 + // ---- range for N=0 + while node_starts.len() <= num_nodes { + node_starts.push(edge_targets.len()); + } + + assert_eq!(node_starts.len(), num_nodes + 1); + + Self { node_starts, edge_targets } + } + + /// Gets the successors for `source` as a slice. + pub fn successors(&self, source: N) -> &[N] { + let start_index = self.node_starts[source]; + let end_index = self.node_starts[source.plus(1)]; + &self.edge_targets[start_index..end_index] + } +} + +impl DirectedGraph for VecGraph { + type Node = N; +} + +impl WithNumNodes for VecGraph { + fn num_nodes(&self) -> usize { + self.node_starts.len() - 1 + } +} + +impl WithNumEdges for VecGraph { + fn num_edges(&self) -> usize { + self.edge_targets.len() + } +} + +impl GraphSuccessors<'graph> for VecGraph { + type Item = N; + + type Iter = std::iter::Cloned>; +} + +impl WithSuccessors for VecGraph { + fn successors<'graph>( + &'graph self, + node: N + ) -> >::Iter { + self.successors(node).iter().cloned() + } +} + diff --git a/src/librustc_data_structures/graph/vec_graph/test.rs b/src/librustc_data_structures/graph/vec_graph/test.rs new file mode 100644 index 00000000000..4a168ee1d44 --- /dev/null +++ b/src/librustc_data_structures/graph/vec_graph/test.rs @@ -0,0 +1,46 @@ +#![cfg(test)] + +use super::*; + +fn create_graph() -> VecGraph { + // Create a simple graph + // + // 5 + // | + // V + // 0 --> 1 --> 2 + // | + // v + // 3 --> 4 + // + // 6 + + VecGraph::new( + 7, + vec![ + (0, 1), + (1, 2), + (1, 3), + (3, 4), + (5, 1), + ], + ) +} + +#[test] +fn num_nodes() { + let graph = create_graph(); + assert_eq!(graph.num_nodes(), 7); +} + +#[test] +fn succesors() { + let graph = create_graph(); + assert_eq!(graph.successors(0), &[1]); + assert_eq!(graph.successors(1), &[2, 3]); + assert_eq!(graph.successors(2), &[]); + assert_eq!(graph.successors(3), &[4]); + assert_eq!(graph.successors(4), &[]); + assert_eq!(graph.successors(5), &[1]); + assert_eq!(graph.successors(6), &[]); +} diff --git a/src/librustc_data_structures/indexed_vec.rs b/src/librustc_data_structures/indexed_vec.rs index 635edbb927e..b3a810a622d 100644 --- a/src/librustc_data_structures/indexed_vec.rs +++ b/src/librustc_data_structures/indexed_vec.rs @@ -19,8 +19,11 @@ pub trait Idx: Copy + 'static + Ord + Debug + Hash { fn index(self) -> usize; fn increment_by(&mut self, amount: usize) { - let v = self.index() + amount; - *self = Self::new(v); + *self = self.plus(amount); + } + + fn plus(self, amount: usize) -> Self { + Self::new(self.index() + amount) } } @@ -167,6 +170,14 @@ impl $type { } } + impl std::ops::Add for $type { + type Output = Self; + + fn add(self, other: usize) -> Self { + Self::new(self.index() + other) + } + } + impl Idx for $type { #[inline] fn new(value: usize) -> Self {