From 339f4be04627f192ba29c33ab2251a582241a112 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sun, 31 Mar 2024 12:31:48 -0400 Subject: [PATCH 1/2] Only collect mono items from reachable blocks --- compiler/rustc_codegen_ssa/src/mir/mod.rs | 9 +- compiler/rustc_middle/src/mir/mod.rs | 52 ----------- compiler/rustc_middle/src/mir/traversal.rs | 96 +++++++++++++++++++- compiler/rustc_monomorphize/src/collector.rs | 20 ++-- 4 files changed, 110 insertions(+), 67 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index 387a5366b20..1f4473d2ec4 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -257,20 +257,19 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( // Apply debuginfo to the newly allocated locals. fx.debug_introduce_locals(&mut start_bx); - let reachable_blocks = mir.reachable_blocks_in_mono(cx.tcx(), instance); - // The builders will be created separately for each basic block at `codegen_block`. // So drop the builder of `start_llbb` to avoid having two at the same time. drop(start_bx); + let reachable_blocks = traversal::mono_reachable_as_bitset(mir, cx.tcx(), instance); + // Codegen the body of each block using reverse postorder for (bb, _) in traversal::reverse_postorder(mir) { if reachable_blocks.contains(bb) { fx.codegen_block(bb); } else { - // This may have references to things we didn't monomorphize, so we - // don't actually codegen the body. We still create the block so - // terminators in other blocks can reference it without worry. + // We want to skip this block, because it's not reachable. But we still create + // the block so terminators in other blocks can reference it. fx.codegen_block_as_unreachable(bb); } } diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 7ecac0c0e78..601bfc770f4 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -30,7 +30,6 @@ use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashSet; use rustc_data_structures::graph::dominators::Dominators; -use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_index::bit_set::BitSet; use rustc_index::{Idx, IndexSlice, IndexVec}; use rustc_serialize::{Decodable, Encodable}; @@ -687,57 +686,6 @@ pub fn is_custom_mir(&self) -> bool { self.injection_phase.is_some() } - /// Finds which basic blocks are actually reachable for a specific - /// monomorphization of this body. - /// - /// This is allowed to have false positives; just because this says a block - /// is reachable doesn't mean that's necessarily true. It's thus always - /// legal for this to return a filled set. - /// - /// Regardless, the [`BitSet::domain_size`] of the returned set will always - /// exactly match the number of blocks in the body so that `contains` - /// checks can be done without worrying about panicking. - /// - /// This is mostly useful because it lets us skip lowering the `false` side - /// of `if ::CONST`, as well as `intrinsics::debug_assertions`. - pub fn reachable_blocks_in_mono( - &self, - tcx: TyCtxt<'tcx>, - instance: Instance<'tcx>, - ) -> BitSet { - let mut set = BitSet::new_empty(self.basic_blocks.len()); - self.reachable_blocks_in_mono_from(tcx, instance, &mut set, START_BLOCK); - set - } - - fn reachable_blocks_in_mono_from( - &self, - tcx: TyCtxt<'tcx>, - instance: Instance<'tcx>, - set: &mut BitSet, - bb: BasicBlock, - ) { - if !set.insert(bb) { - return; - } - - let data = &self.basic_blocks[bb]; - - if let Some((bits, targets)) = Self::try_const_mono_switchint(tcx, instance, data) { - let target = targets.target_for_value(bits); - ensure_sufficient_stack(|| { - self.reachable_blocks_in_mono_from(tcx, instance, set, target) - }); - return; - } - - for target in data.terminator().successors() { - ensure_sufficient_stack(|| { - self.reachable_blocks_in_mono_from(tcx, instance, set, target) - }); - } - } - /// If this basic block ends with a [`TerminatorKind::SwitchInt`] for which we can evaluate the /// dimscriminant in monomorphization, we return the discriminant bits and the /// [`SwitchTargets`], just so the caller doesn't also have to match on the terminator. diff --git a/compiler/rustc_middle/src/mir/traversal.rs b/compiler/rustc_middle/src/mir/traversal.rs index 0a938bcd315..245e9096bad 100644 --- a/compiler/rustc_middle/src/mir/traversal.rs +++ b/compiler/rustc_middle/src/mir/traversal.rs @@ -245,7 +245,7 @@ pub fn reachable<'a, 'tcx>( /// Returns a `BitSet` containing all basic blocks reachable from the `START_BLOCK`. pub fn reachable_as_bitset(body: &Body<'_>) -> BitSet { let mut iter = preorder(body); - iter.by_ref().for_each(drop); + while let Some(_) = iter.next() {} iter.visited } @@ -279,3 +279,97 @@ pub fn reverse_postorder<'a, 'tcx>( { body.basic_blocks.reverse_postorder().iter().map(|&bb| (bb, &body.basic_blocks[bb])) } + +/// Traversal of a [`Body`] that tries to avoid unreachable blocks in a monomorphized [`Instance`]. +/// +/// This is allowed to have false positives; blocks may be visited even if they are not actually +/// reachable. +/// +/// Such a traversal is mostly useful because it lets us skip lowering the `false` side +/// of `if ::CONST`, as well as [`NullOp::UbChecks`]. +/// +/// [`NullOp::UbChecks`]: rustc_middle::mir::NullOp::UbChecks +pub fn mono_reachable<'a, 'tcx>( + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, +) -> MonoReachable<'a, 'tcx> { + MonoReachable::new(body, tcx, instance) +} + +/// [`MonoReachable`] internally accumulates a [`BitSet`] of visited blocks. This is just a +/// convenience function to run that traversal then extract its set of reached blocks. +pub fn mono_reachable_as_bitset<'a, 'tcx>( + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, +) -> BitSet { + let mut iter = mono_reachable(body, tcx, instance); + while let Some(_) = iter.next() {} + iter.visited +} + +pub struct MonoReachable<'a, 'tcx> { + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + visited: BitSet, + // Other traversers track their worklist in a Vec. But we don't care about order, so we can + // store ours in a BitSet and thus save allocations because BitSet has a small size + // optimization. + worklist: BitSet, +} + +impl<'a, 'tcx> MonoReachable<'a, 'tcx> { + pub fn new( + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + ) -> MonoReachable<'a, 'tcx> { + let mut worklist = BitSet::new_empty(body.basic_blocks.len()); + worklist.insert(START_BLOCK); + MonoReachable { + body, + tcx, + instance, + visited: BitSet::new_empty(body.basic_blocks.len()), + worklist, + } + } + + fn add_work(&mut self, blocks: impl IntoIterator) { + for block in blocks.into_iter() { + if !self.visited.contains(block) { + self.worklist.insert(block); + } + } + } +} + +impl<'a, 'tcx> Iterator for MonoReachable<'a, 'tcx> { + type Item = (BasicBlock, &'a BasicBlockData<'tcx>); + + fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { + while let Some(idx) = self.worklist.iter().next() { + self.worklist.remove(idx); + if !self.visited.insert(idx) { + continue; + } + + let data = &self.body[idx]; + + if let Some((bits, targets)) = + Body::try_const_mono_switchint(self.tcx, self.instance, data) + { + let target = targets.target_for_value(bits); + self.add_work([target]); + } else { + self.add_work(data.terminator().successors()); + } + + return Some((idx, data)); + } + + None + } +} diff --git a/compiler/rustc_monomorphize/src/collector.rs b/compiler/rustc_monomorphize/src/collector.rs index 0c35f9838ed..ee6c154e6e8 100644 --- a/compiler/rustc_monomorphize/src/collector.rs +++ b/compiler/rustc_monomorphize/src/collector.rs @@ -214,6 +214,7 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::mir::interpret::{AllocId, ErrorHandled, GlobalAlloc, Scalar}; use rustc_middle::mir::mono::{InstantiationMode, MonoItem}; +use rustc_middle::mir::traversal; use rustc_middle::mir::visit::Visitor as MirVisitor; use rustc_middle::mir::{self, Location, MentionedItem}; use rustc_middle::query::TyCtxtAt; @@ -1414,15 +1415,16 @@ fn collect_items_of_instance<'tcx>( }; if mode == CollectionMode::UsedItems { - // Visit everything. Here we rely on the visitor also visiting `required_consts`, so that we - // evaluate them and abort compilation if any of them errors. - collector.visit_body(body); - } else { - // We only need to evaluate all constants, but can ignore the rest of the MIR. - for const_op in &body.required_consts { - if let Some(val) = collector.eval_constant(const_op) { - collect_const_value(tcx, val, mentioned_items); - } + for (bb, data) in traversal::mono_reachable(body, tcx, instance) { + collector.visit_basic_block_data(bb, data) + } + } + + // Always visit all `required_consts`, so that we evaluate them and abort compilation if any of + // them errors. + for const_op in &body.required_consts { + if let Some(val) = collector.eval_constant(const_op) { + collect_const_value(tcx, val, mentioned_items); } } From b5b49289e1e67a1b333da3ba6cf374ca11b9d326 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sun, 7 Apr 2024 16:46:21 -0400 Subject: [PATCH 2/2] Only traverse mono-reachable blocks in cg_clif --- compiler/rustc_codegen_cranelift/src/base.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs index 1307a62a60d..771e5b21958 100644 --- a/compiler/rustc_codegen_cranelift/src/base.rs +++ b/compiler/rustc_codegen_cranelift/src/base.rs @@ -267,7 +267,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { .generic_activity("codegen prelude") .run(|| crate::abi::codegen_fn_prelude(fx, start_block)); - for (bb, bb_data) in fx.mir.basic_blocks.iter_enumerated() { + for (bb, bb_data) in traversal::mono_reachable(fx.mir, fx.tcx, fx.instance) { let block = fx.get_block(bb); fx.bcx.switch_to_block(block);