Auto merge of #111673 - cjgillot:dominator-preprocess, r=cjgillot,tmiasko

Preprocess and cache dominator tree Preprocessing dominators has a very strong effect for https://github.com/rust-lang/rust/pull/111344. That pass checks that assignments dominate their uses repeatedly. Using the unprocessed dominator tree caused a quadratic runtime (number of bbs x depth of the dominator tree). This PR also caches the dominator tree and the pre-processed dominators in the MIR cfg cache. Rebase of https://github.com/rust-lang/rust/pull/107157 cc `@tmiasko`
2023-05-24 16:18:21 +00:00 · 2023-05-24 16:18:21 +00:00 · 97d328012b
commit 97d328012b
parent b3cbf7c835 7c8f29f02c
9 changed files with 107 additions and 51 deletions
--- a/compiler/rustc_borrowck/src/invalidation.rs
+++ b/compiler/rustc_borrowck/src/invalidation.rs
@ -46,7 +46,7 @@ struct InvalidationGenerator<'cx, 'tcx> {
    all_facts: &'cx mut AllFacts,
    location_table: &'cx LocationTable,
    body: &'cx Body<'tcx>,
-    dominators: Dominators<BasicBlock>,
+    dominators: &'cx Dominators<BasicBlock>,
    borrow_set: &'cx BorrowSet<'tcx>,
 }

--- a/compiler/rustc_borrowck/src/lib.rs
+++ b/compiler/rustc_borrowck/src/lib.rs
@ -43,7 +43,6 @@

 use either::Either;
 use smallvec::SmallVec;
-use std::cell::OnceCell;
 use std::cell::RefCell;
 use std::collections::BTreeMap;
 use std::ops::Deref;
@ -331,7 +330,6 @@ fn do_mir_borrowck<'tcx>(
                used_mut: Default::default(),
                used_mut_upvars: SmallVec::new(),
                borrow_set: Rc::clone(&borrow_set),
-                dominators: Default::default(),
                upvars: Vec::new(),
                local_names: IndexVec::from_elem(None, &promoted_body.local_decls),
                region_names: RefCell::default(),
@ -360,7 +358,6 @@ fn do_mir_borrowck<'tcx>(
        used_mut: Default::default(),
        used_mut_upvars: SmallVec::new(),
        borrow_set: Rc::clone(&borrow_set),
-        dominators: Default::default(),
        upvars,
        local_names,
        region_names: RefCell::default(),
@ -591,9 +588,6 @@ struct MirBorrowckCtxt<'cx, 'tcx> {
    /// The set of borrows extracted from the MIR
    borrow_set: Rc<BorrowSet<'tcx>>,

-    /// Dominators for MIR
-    dominators: OnceCell<Dominators<BasicBlock>>,
-
    /// Information about upvars not necessarily preserved in types or MIR
    upvars: Vec<Upvar<'tcx>>,

@ -2269,7 +2263,8 @@ fn is_upvar_field_projection(&self, place_ref: PlaceRef<'tcx>) -> Option<FieldId
    }

    fn dominators(&self) -> &Dominators<BasicBlock> {
-        self.dominators.get_or_init(|| self.body.basic_blocks.dominators())
+        // `BasicBlocks` computes dominators on-demand and caches them.
+        self.body.basic_blocks.dominators()
    }
 }

--- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
@ -84,7 +84,7 @@ fn dominates(self, location: Location, dominators: &Dominators<mir::BasicBlock>)

 struct LocalAnalyzer<'mir, 'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> {
    fx: &'mir FunctionCx<'a, 'tcx, Bx>,
-    dominators: Dominators<mir::BasicBlock>,
+    dominators: &'mir Dominators<mir::BasicBlock>,
    locals: IndexVec<mir::Local, LocalKind>,
 }

--- a/compiler/rustc_data_structures/src/graph/dominators/mod.rs
+++ b/compiler/rustc_data_structures/src/graph/dominators/mod.rs
@ -26,7 +26,7 @@ struct PreOrderFrame<Iter> {
    struct PreorderIndex {}
 }

-pub fn dominators<G: ControlFlowGraph>(graph: G) -> Dominators<G::Node> {
+pub fn dominators<G: ControlFlowGraph>(graph: &G) -> Dominators<G::Node> {
    // compute the post order index (rank) for each node
    let mut post_order_rank = IndexVec::from_elem_n(0, graph.num_nodes());

@ -244,7 +244,10 @@ pub fn dominators<G: ControlFlowGraph>(graph: G) -> Dominators<G::Node> {

    let start_node = graph.start_node();
    immediate_dominators[start_node] = None;
-    Dominators { start_node, post_order_rank, immediate_dominators }
+
+    let time = compute_access_time(start_node, &immediate_dominators);
+
+    Dominators { start_node, post_order_rank, immediate_dominators, time }
 }

 /// Evaluate the link-eval virtual forest, providing the currently minimum semi
@ -316,6 +319,7 @@ pub struct Dominators<N: Idx> {
    // possible to get its full list of dominators by looking up the dominator
    // of each dominator. (See the `impl Iterator for Iter` definition).
    immediate_dominators: IndexVec<N, Option<N>>,
+    time: IndexVec<N, Time>,
 }

 impl<Node: Idx> Dominators<Node> {
@ -333,12 +337,7 @@ pub fn immediate_dominator(&self, node: Node) -> Option<Node> {
    /// See the `impl Iterator for Iter` definition to understand how this works.
    pub fn dominators(&self, node: Node) -> Iter<'_, Node> {
        assert!(self.is_reachable(node), "node {node:?} is not reachable");
-        Iter { dominators: self, node: Some(node) }
-    }
-
-    pub fn dominates(&self, dom: Node, node: Node) -> bool {
-        // FIXME -- could be optimized by using post-order-rank
-        self.dominators(node).any(|n| n == dom)
+        Iter { dom_tree: self, node: Some(node) }
    }

    /// Provide deterministic ordering of nodes such that, if any two nodes have a dominator
@ -348,10 +347,22 @@ pub fn dominates(&self, dom: Node, node: Node) -> bool {
    pub fn rank_partial_cmp(&self, lhs: Node, rhs: Node) -> Option<Ordering> {
        self.post_order_rank[rhs].partial_cmp(&self.post_order_rank[lhs])
    }
+
+    /// Returns true if `a` dominates `b`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `b` is unreachable.
+    pub fn dominates(&self, a: Node, b: Node) -> bool {
+        let a = self.time[a];
+        let b = self.time[b];
+        assert!(b.start != 0, "node {b:?} is not reachable");
+        a.start <= b.start && b.finish <= a.finish
+    }
 }

 pub struct Iter<'dom, Node: Idx> {
-    dominators: &'dom Dominators<Node>,
+    dom_tree: &'dom Dominators<Node>,
    node: Option<Node>,
 }

@ -360,10 +371,74 @@ impl<'dom, Node: Idx> Iterator for Iter<'dom, Node> {

    fn next(&mut self) -> Option<Self::Item> {
        if let Some(node) = self.node {
-            self.node = self.dominators.immediate_dominator(node);
+            self.node = self.dom_tree.immediate_dominator(node);
            Some(node)
        } else {
            None
        }
    }
 }
+
+/// Describes the number of vertices discovered at the time when processing of a particular vertex
+/// started and when it finished. Both values are zero for unreachable vertices.
+#[derive(Copy, Clone, Default, Debug)]
+struct Time {
+    start: u32,
+    finish: u32,
+}
+
+fn compute_access_time<N: Idx>(
+    start_node: N,
+    immediate_dominators: &IndexSlice<N, Option<N>>,
+) -> IndexVec<N, Time> {
+    // Transpose the dominator tree edges, so that child nodes of vertex v are stored in
+    // node[edges[v].start..edges[v].end].
+    let mut edges: IndexVec<N, std::ops::Range<u32>> =
+        IndexVec::from_elem(0..0, immediate_dominators);
+    for &idom in immediate_dominators.iter() {
+        if let Some(idom) = idom {
+            edges[idom].end += 1;
+        }
+    }
+    let mut m = 0;
+    for e in edges.iter_mut() {
+        m += e.end;
+        e.start = m;
+        e.end = m;
+    }
+    let mut node = IndexVec::from_elem_n(Idx::new(0), m.try_into().unwrap());
+    for (i, &idom) in immediate_dominators.iter_enumerated() {
+        if let Some(idom) = idom {
+            edges[idom].start -= 1;
+            node[edges[idom].start] = i;
+        }
+    }
+
+    // Perform a depth-first search of the dominator tree. Record the number of vertices discovered
+    // when vertex v is discovered first as time[v].start, and when its processing is finished as
+    // time[v].finish.
+    let mut time: IndexVec<N, Time> = IndexVec::from_elem(Time::default(), immediate_dominators);
+    let mut stack = Vec::new();
+
+    let mut discovered = 1;
+    stack.push(start_node);
+    time[start_node].start = discovered;
+
+    while let Some(&i) = stack.last() {
+        let e = &mut edges[i];
+        if e.start == e.end {
+            // Finish processing vertex i.
+            time[i].finish = discovered;
+            stack.pop();
+        } else {
+            let j = node[e.start];
+            e.start += 1;
+            // Start processing vertex j.
+            discovered += 1;
+            time[j].start = discovered;
+            stack.push(j);
+        }
+    }
+
+    time
+}
--- a/compiler/rustc_middle/src/mir/basic_blocks.rs
+++ b/compiler/rustc_middle/src/mir/basic_blocks.rs
@ -27,6 +27,7 @@ struct Cache {
    switch_sources: OnceCell<SwitchSources>,
    is_cyclic: OnceCell<bool>,
    postorder: OnceCell<Vec<BasicBlock>>,
+    dominators: OnceCell<Dominators<BasicBlock>>,
 }

 impl<'tcx> BasicBlocks<'tcx> {
@ -41,8 +42,8 @@ pub fn is_cfg_cyclic(&self) -> bool {
        *self.cache.is_cyclic.get_or_init(|| graph::is_cyclic(self))
    }

-    pub fn dominators(&self) -> Dominators<BasicBlock> {
-        dominators(&self)
+    pub fn dominators(&self) -> &Dominators<BasicBlock> {
+        self.cache.dominators.get_or_init(|| dominators(self))
    }

    /// Returns predecessors for each basic block.
--- a/compiler/rustc_mir_transform/src/coverage/graph.rs
+++ b/compiler/rustc_mir_transform/src/coverage/graph.rs
@ -9,6 +9,7 @@
 use rustc_middle::mir::coverage::*;
 use rustc_middle::mir::{self, BasicBlock, BasicBlockData, Terminator, TerminatorKind};

+use std::cmp::Ordering;
 use std::ops::{Index, IndexMut};

 const ID_SEPARATOR: &str = ",";
@ -212,8 +213,12 @@ pub fn dominates(&self, dom: BasicCoverageBlock, node: BasicCoverageBlock) -> bo
    }

    #[inline(always)]
-    pub fn dominators(&self) -> &Dominators<BasicCoverageBlock> {
-        self.dominators.as_ref().unwrap()
+    pub fn rank_partial_cmp(
+        &self,
+        a: BasicCoverageBlock,
+        b: BasicCoverageBlock,
+    ) -> Option<Ordering> {
+        self.dominators.as_ref().unwrap().rank_partial_cmp(a, b)
    }
 }

@ -650,26 +655,6 @@ pub(super) fn find_loop_backedges(
    let mut backedges = IndexVec::from_elem_n(Vec::<BasicCoverageBlock>::new(), num_bcbs);

    // Identify loops by their backedges.
-    //
-    // The computational complexity is bounded by: n(s) x d where `n` is the number of
-    // `BasicCoverageBlock` nodes (the simplified/reduced representation of the CFG derived from the
-    // MIR); `s` is the average number of successors per node (which is most likely less than 2, and
-    // independent of the size of the function, so it can be treated as a constant);
-    // and `d` is the average number of dominators per node.
-    //
-    // The average number of dominators depends on the size and complexity of the function, and
-    // nodes near the start of the function's control flow graph typically have less dominators
-    // than nodes near the end of the CFG. Without doing a detailed mathematical analysis, I
-    // think the resulting complexity has the characteristics of O(n log n).
-    //
-    // The overall complexity appears to be comparable to many other MIR transform algorithms, and I
-    // don't expect that this function is creating a performance hot spot, but if this becomes an
-    // issue, there may be ways to optimize the `dominates` algorithm (as indicated by an
-    // existing `FIXME` comment in that code), or possibly ways to optimize it's usage here, perhaps
-    // by keeping track of results for visited `BasicCoverageBlock`s if they can be used to short
-    // circuit downstream `dominates` checks.
-    //
-    // For now, that kind of optimization seems unnecessarily complicated.
    for (bcb, _) in basic_coverage_blocks.iter_enumerated() {
        for &successor in &basic_coverage_blocks.successors[bcb] {
            if basic_coverage_blocks.dominates(successor, bcb) {
--- a/compiler/rustc_mir_transform/src/coverage/spans.rs
+++ b/compiler/rustc_mir_transform/src/coverage/spans.rs
@ -344,7 +344,7 @@ fn mir_to_initial_sorted_coverage_spans(&self) -> Vec<CoverageSpan> {
                        // before the dominated equal spans). When later comparing two spans in
                        // order, the first will either dominate the second, or they will have no
                        // dominator relationship.
-                        self.basic_coverage_blocks.dominators().rank_partial_cmp(a.bcb, b.bcb)
+                        self.basic_coverage_blocks.rank_partial_cmp(a.bcb, b.bcb)
                    }
                } else {
                    // Sort hi() in reverse order so shorter spans are attempted after longer spans.
--- a/compiler/rustc_mir_transform/src/ctfe_limit.rs
+++ b/compiler/rustc_mir_transform/src/ctfe_limit.rs
@ -47,7 +47,7 @@ fn has_back_edge(
        return false;
    }
    // Check if any of the dominators of the node are also the node's successor.
-    doms.dominators(node).any(|dom| node_data.terminator().successors().any(|succ| succ == dom))
+    node_data.terminator().successors().any(|succ| doms.dominates(succ, node))
 }

 fn insert_counter(basic_block_data: &mut BasicBlockData<'_>) {
--- a/compiler/rustc_mir_transform/src/ssa.rs
+++ b/compiler/rustc_mir_transform/src/ssa.rs
@ -31,11 +31,11 @@ pub struct SsaLocals {
 /// We often encounter MIR bodies with 1 or 2 basic blocks. In those cases, it's unnecessary to
 /// actually compute dominators, we can just compare block indices because bb0 is always the first
 /// block, and in any body all other blocks are always dominated by bb0.
-struct SmallDominators {
-    inner: Option<Dominators<BasicBlock>>,
+struct SmallDominators<'a> {
+    inner: Option<&'a Dominators<BasicBlock>>,
 }

-impl SmallDominators {
+impl SmallDominators<'_> {
    fn dominates(&self, first: Location, second: Location) -> bool {
        if first.block == second.block {
            first.statement_index <= second.statement_index
@ -198,14 +198,14 @@ enum LocationExtended {
    Arg,
 }

-struct SsaVisitor {
-    dominators: SmallDominators,
+struct SsaVisitor<'a> {
+    dominators: SmallDominators<'a>,
    assignments: IndexVec<Local, Set1<LocationExtended>>,
    assignment_order: Vec<Local>,
    direct_uses: IndexVec<Local, u32>,
 }

-impl<'tcx> Visitor<'tcx> for SsaVisitor {
+impl<'tcx> Visitor<'tcx> for SsaVisitor<'_> {
    fn visit_local(&mut self, local: Local, ctxt: PlaceContext, loc: Location) {
        match ctxt {
            PlaceContext::MutatingUse(MutatingUseContext::Projection)