From c7ae4c2cb6d7e58ad0f9c12047e3d747c26a9d71 Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Fri, 23 Oct 2020 00:45:07 -0700 Subject: [PATCH] Splitting transform/instrument_coverage.rs into transform/coverage/... --- .../src/transform/coverage/counters.rs | 54 ++ .../rustc_mir/src/transform/coverage/debug.rs | 70 ++ .../rustc_mir/src/transform/coverage/graph.rs | 274 +++++++ .../rustc_mir/src/transform/coverage/mod.rs | 241 ++++++ .../rustc_mir/src/transform/coverage/query.rs | 113 +++ .../spans.rs} | 738 +----------------- compiler/rustc_mir/src/transform/mod.rs | 6 +- 7 files changed, 766 insertions(+), 730 deletions(-) create mode 100644 compiler/rustc_mir/src/transform/coverage/counters.rs create mode 100644 compiler/rustc_mir/src/transform/coverage/debug.rs create mode 100644 compiler/rustc_mir/src/transform/coverage/graph.rs create mode 100644 compiler/rustc_mir/src/transform/coverage/mod.rs create mode 100644 compiler/rustc_mir/src/transform/coverage/query.rs rename compiler/rustc_mir/src/transform/{instrument_coverage.rs => coverage/spans.rs} (52%) diff --git a/compiler/rustc_mir/src/transform/coverage/counters.rs b/compiler/rustc_mir/src/transform/coverage/counters.rs new file mode 100644 index 00000000000..511ad937c24 --- /dev/null +++ b/compiler/rustc_mir/src/transform/coverage/counters.rs @@ -0,0 +1,54 @@ +use rustc_middle::mir::coverage::*; + +/// Manages the counter and expression indexes/IDs to generate `CoverageKind` components for MIR +/// `Coverage` statements. +pub(crate) struct CoverageCounters { + function_source_hash: u64, + next_counter_id: u32, + num_expressions: u32, +} + +impl CoverageCounters { + pub fn new(function_source_hash: u64) -> Self { + Self { + function_source_hash, + next_counter_id: CounterValueReference::START.as_u32(), + num_expressions: 0, + } + } + + pub fn make_counter(&mut self) -> CoverageKind { + CoverageKind::Counter { + function_source_hash: self.function_source_hash, + id: self.next_counter(), + } + } + + pub fn make_expression( + &mut self, + lhs: ExpressionOperandId, + op: Op, + rhs: ExpressionOperandId, + ) -> CoverageKind { + let id = self.next_expression(); + CoverageKind::Expression { id, lhs, op, rhs } + } + + /// Counter IDs start from one and go up. + fn next_counter(&mut self) -> CounterValueReference { + assert!(self.next_counter_id < u32::MAX - self.num_expressions); + let next = self.next_counter_id; + self.next_counter_id += 1; + CounterValueReference::from(next) + } + + /// Expression IDs start from u32::MAX and go down because a Expression can reference + /// (add or subtract counts) of both Counter regions and Expression regions. The counter + /// expression operand IDs must be unique across both types. + fn next_expression(&mut self) -> InjectedExpressionId { + assert!(self.next_counter_id < u32::MAX - self.num_expressions); + let next = u32::MAX - self.num_expressions; + self.num_expressions += 1; + InjectedExpressionId::from(next) + } +} diff --git a/compiler/rustc_mir/src/transform/coverage/debug.rs b/compiler/rustc_mir/src/transform/coverage/debug.rs new file mode 100644 index 00000000000..0cd2e413da9 --- /dev/null +++ b/compiler/rustc_mir/src/transform/coverage/debug.rs @@ -0,0 +1,70 @@ +use super::graph::BasicCoverageBlocks; +use super::spans::CoverageSpan; + +use crate::util::pretty; +use crate::util::spanview::{self, SpanViewable}; + +use rustc_middle::mir::{self, TerminatorKind}; +use rustc_middle::ty::TyCtxt; + +/// Generates the MIR pass `CoverageSpan`-specific spanview dump file. +pub(crate) fn dump_coverage_spanview( + tcx: TyCtxt<'tcx>, + mir_body: &mir::Body<'tcx>, + basic_coverage_blocks: &BasicCoverageBlocks, + pass_name: &str, + coverage_spans: &Vec, +) { + let mir_source = mir_body.source; + let def_id = mir_source.def_id(); + + let span_viewables = span_viewables(tcx, mir_body, basic_coverage_blocks, &coverage_spans); + let mut file = pretty::create_dump_file(tcx, "html", None, pass_name, &0, mir_source) + .expect("Unexpected error creating MIR spanview HTML file"); + let crate_name = tcx.crate_name(def_id.krate); + let item_name = tcx.def_path(def_id).to_filename_friendly_no_crate(); + let title = format!("{}.{} - Coverage Spans", crate_name, item_name); + spanview::write_document(tcx, def_id, span_viewables, &title, &mut file) + .expect("Unexpected IO error dumping coverage spans as HTML"); +} + +/// Converts the computed `BasicCoverageBlock`s into `SpanViewable`s. +fn span_viewables( + tcx: TyCtxt<'tcx>, + mir_body: &mir::Body<'tcx>, + basic_coverage_blocks: &BasicCoverageBlocks, + coverage_spans: &Vec, +) -> Vec { + let mut span_viewables = Vec::new(); + for coverage_span in coverage_spans { + let tooltip = coverage_span.format_coverage_statements(tcx, mir_body); + let CoverageSpan { span, bcb_leader_bb: bb, .. } = coverage_span; + let bcb = &basic_coverage_blocks[*bb]; + let id = bcb.id(); + let leader_bb = bcb.leader_bb(); + span_viewables.push(SpanViewable { bb: leader_bb, span: *span, id, tooltip }); + } + span_viewables +} + +/// Returns a simple string representation of a `TerminatorKind` variant, indenpendent of any +/// values it might hold. +pub(crate) fn term_type(kind: &TerminatorKind<'tcx>) -> &'static str { + match kind { + TerminatorKind::Goto { .. } => "Goto", + TerminatorKind::SwitchInt { .. } => "SwitchInt", + TerminatorKind::Resume => "Resume", + TerminatorKind::Abort => "Abort", + TerminatorKind::Return => "Return", + TerminatorKind::Unreachable => "Unreachable", + TerminatorKind::Drop { .. } => "Drop", + TerminatorKind::DropAndReplace { .. } => "DropAndReplace", + TerminatorKind::Call { .. } => "Call", + TerminatorKind::Assert { .. } => "Assert", + TerminatorKind::Yield { .. } => "Yield", + TerminatorKind::GeneratorDrop => "GeneratorDrop", + TerminatorKind::FalseEdge { .. } => "FalseEdge", + TerminatorKind::FalseUnwind { .. } => "FalseUnwind", + TerminatorKind::InlineAsm { .. } => "InlineAsm", + } +} diff --git a/compiler/rustc_mir/src/transform/coverage/graph.rs b/compiler/rustc_mir/src/transform/coverage/graph.rs new file mode 100644 index 00000000000..c0a698833a1 --- /dev/null +++ b/compiler/rustc_mir/src/transform/coverage/graph.rs @@ -0,0 +1,274 @@ +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::{self, BasicBlock, BasicBlockData, TerminatorKind}; + +const ID_SEPARATOR: &str = ","; + +/// A BasicCoverageBlock (BCB) represents the maximal-length sequence of CFG (MIR) BasicBlocks +/// without conditional branches. +/// +/// The BCB allows coverage analysis to be performed on a simplified projection of the underlying +/// MIR CFG, without altering the original CFG. Note that running the MIR `SimplifyCfg` transform, +/// is not sufficient, and therefore not necessary, since the BCB-based CFG projection is a more +/// aggressive simplification. For example: +/// +/// * The BCB CFG projection ignores (trims) branches not relevant to coverage, such as unwind- +/// related code that is injected by the Rust compiler but has no physical source code to +/// count. This also means a BasicBlock with a `Call` terminator can be merged into its +/// primary successor target block, in the same BCB. +/// * Some BasicBlock terminators support Rust-specific concerns--like borrow-checking--that are +/// not relevant to coverage analysis. `FalseUnwind`, for example, can be treated the same as +/// a `Goto`, and merged with its successor into the same BCB. +/// +/// Each BCB with at least one computed `CoverageSpan` will have no more than one `Counter`. +/// In some cases, a BCB's execution count can be computed by `CounterExpression`. Additional +/// disjoint `CoverageSpan`s in a BCB can also be counted by `CounterExpression` (by adding `ZERO` +/// to the BCB's primary counter or expression). +/// +/// Dominator/dominated relationships (which are fundamental to the coverage analysis algorithm) +/// between two BCBs can be computed using the `mir::Body` `dominators()` with any `BasicBlock` +/// member of each BCB. (For consistency, BCB's use the first `BasicBlock`, also referred to as the +/// `bcb_leader_bb`.) +/// +/// The BCB CFG projection is critical to simplifying the coverage analysis by ensuring graph +/// path-based queries (`is_dominated_by()`, `predecessors`, `successors`, etc.) have branch +/// (control flow) significance. +#[derive(Debug, Clone)] +pub(crate) struct BasicCoverageBlock { + pub blocks: Vec, +} + +impl BasicCoverageBlock { + pub fn leader_bb(&self) -> BasicBlock { + self.blocks[0] + } + + pub fn id(&self) -> String { + format!( + "@{}", + self.blocks + .iter() + .map(|bb| bb.index().to_string()) + .collect::>() + .join(ID_SEPARATOR) + ) + } +} + +pub(crate) struct BasicCoverageBlocks { + vec: IndexVec>, +} + +impl BasicCoverageBlocks { + pub fn from_mir(mir_body: &mir::Body<'tcx>) -> Self { + let mut basic_coverage_blocks = + BasicCoverageBlocks { vec: IndexVec::from_elem_n(None, mir_body.basic_blocks().len()) }; + basic_coverage_blocks.extract_from_mir(mir_body); + basic_coverage_blocks + } + + pub fn iter(&self) -> impl Iterator { + self.vec.iter().filter_map(|bcb| bcb.as_ref()) + } + + pub fn num_nodes(&self) -> usize { + self.vec.len() + } + + pub fn extract_from_mir(&mut self, mir_body: &mir::Body<'tcx>) { + // Traverse the CFG but ignore anything following an `unwind` + let cfg_without_unwind = ShortCircuitPreorder::new(&mir_body, |term_kind| { + let mut successors = term_kind.successors(); + match &term_kind { + // SwitchInt successors are never unwind, and all of them should be traversed. + + // NOTE: TerminatorKind::FalseEdge targets from SwitchInt don't appear to be + // helpful in identifying unreachable code. I did test the theory, but the following + // changes were not beneficial. (I assumed that replacing some constants with + // non-deterministic variables might effect which blocks were targeted by a + // `FalseEdge` `imaginary_target`. It did not.) + // + // Also note that, if there is a way to identify BasicBlocks that are part of the + // MIR CFG, but not actually reachable, here are some other things to consider: + // + // Injecting unreachable code regions will probably require computing the set + // difference between the basic blocks found without filtering out unreachable + // blocks, and the basic blocks found with the filter; then computing the + // `CoverageSpans` without the filter; and then injecting `Counter`s or + // `CounterExpression`s for blocks that are not unreachable, or injecting + // `Unreachable` code regions otherwise. This seems straightforward, but not + // trivial. + // + // Alternatively, we might instead want to leave the unreachable blocks in + // (bypass the filter here), and inject the counters. This will result in counter + // values of zero (0) for unreachable code (and, notably, the code will be displayed + // with a red background by `llvm-cov show`). + // + // TerminatorKind::SwitchInt { .. } => { + // let some_imaginary_target = successors.clone().find_map(|&successor| { + // let term = mir_body.basic_blocks()[successor].terminator(); + // if let TerminatorKind::FalseEdge { imaginary_target, .. } = term.kind { + // if mir_body.predecessors()[imaginary_target].len() == 1 { + // return Some(imaginary_target); + // } + // } + // None + // }); + // if let Some(imaginary_target) = some_imaginary_target { + // box successors.filter(move |&&successor| successor != imaginary_target) + // } else { + // box successors + // } + // } + // + // Note this also required changing the closure signature for the + // `ShortCurcuitPreorder` to: + // + // F: Fn(&'tcx TerminatorKind<'tcx>) -> Box + 'a>, + TerminatorKind::SwitchInt { .. } => successors, + + // For all other kinds, return only the first successor, if any, and ignore unwinds + _ => successors.next().into_iter().chain(&[]), + } + }); + + // Walk the CFG using a Preorder traversal, which starts from `START_BLOCK` and follows + // each block terminator's `successors()`. Coverage spans must map to actual source code, + // so compiler generated blocks and paths can be ignored. To that end the CFG traversal + // intentionally omits unwind paths. + let mut blocks = Vec::new(); + for (bb, data) in cfg_without_unwind { + if let Some(last) = blocks.last() { + let predecessors = &mir_body.predecessors()[bb]; + if predecessors.len() > 1 || !predecessors.contains(last) { + // The `bb` has more than one _incoming_ edge, and should start its own + // `BasicCoverageBlock`. (Note, the `blocks` vector does not yet include `bb`; + // it contains a sequence of one or more sequential blocks with no intermediate + // branches in or out. Save these as a new `BasicCoverageBlock` before starting + // the new one.) + self.add_basic_coverage_block(blocks.split_off(0)); + debug!( + " because {}", + if predecessors.len() > 1 { + "predecessors.len() > 1".to_owned() + } else { + format!("bb {} is not in precessors: {:?}", bb.index(), predecessors) + } + ); + } + } + blocks.push(bb); + + let term = data.terminator(); + + match term.kind { + TerminatorKind::Return { .. } + | TerminatorKind::Abort + | TerminatorKind::Assert { .. } + | TerminatorKind::Yield { .. } + | TerminatorKind::SwitchInt { .. } => { + // The `bb` has more than one _outgoing_ edge, or exits the function. Save the + // current sequence of `blocks` gathered to this point, as a new + // `BasicCoverageBlock`. + self.add_basic_coverage_block(blocks.split_off(0)); + debug!(" because term.kind = {:?}", term.kind); + // Note that this condition is based on `TerminatorKind`, even though it + // theoretically boils down to `successors().len() != 1`; that is, either zero + // (e.g., `Return`, `Abort`) or multiple successors (e.g., `SwitchInt`), but + // since the Coverage graph (the BCB CFG projection) ignores things like unwind + // branches (which exist in the `Terminator`s `successors()` list) checking the + // number of successors won't work. + } + TerminatorKind::Goto { .. } + | TerminatorKind::Resume + | TerminatorKind::Unreachable + | TerminatorKind::Drop { .. } + | TerminatorKind::DropAndReplace { .. } + | TerminatorKind::Call { .. } + | TerminatorKind::GeneratorDrop + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::FalseUnwind { .. } + | TerminatorKind::InlineAsm { .. } => {} + } + } + + if !blocks.is_empty() { + // process any remaining blocks into a final `BasicCoverageBlock` + self.add_basic_coverage_block(blocks.split_off(0)); + debug!(" because the end of the CFG was reached while traversing"); + } + } + + fn add_basic_coverage_block(&mut self, blocks: Vec) { + let leader_bb = blocks[0]; + let bcb = BasicCoverageBlock { blocks }; + debug!("adding BCB: {:?}", bcb); + self.vec[leader_bb] = Some(bcb); + } +} + +impl std::ops::Index for BasicCoverageBlocks { + type Output = BasicCoverageBlock; + + fn index(&self, index: BasicBlock) -> &Self::Output { + self.vec[index].as_ref().expect("is_some if BasicBlock is a BasicCoverageBlock leader") + } +} + +pub struct ShortCircuitPreorder< + 'a, + 'tcx, + F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>, +> { + body: &'a mir::Body<'tcx>, + visited: BitSet, + worklist: Vec, + filtered_successors: F, +} + +impl<'a, 'tcx, F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>> + ShortCircuitPreorder<'a, 'tcx, F> +{ + pub fn new( + body: &'a mir::Body<'tcx>, + filtered_successors: F, + ) -> ShortCircuitPreorder<'a, 'tcx, F> { + let worklist = vec![mir::START_BLOCK]; + + ShortCircuitPreorder { + body, + visited: BitSet::new_empty(body.basic_blocks().len()), + worklist, + filtered_successors, + } + } +} + +impl<'a: 'tcx, 'tcx, F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>> Iterator + for ShortCircuitPreorder<'a, 'tcx, F> +{ + type Item = (BasicBlock, &'a BasicBlockData<'tcx>); + + fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { + while let Some(idx) = self.worklist.pop() { + if !self.visited.insert(idx) { + continue; + } + + let data = &self.body[idx]; + + if let Some(ref term) = data.terminator { + self.worklist.extend((self.filtered_successors)(&term.kind)); + } + + return Some((idx, data)); + } + + None + } + + fn size_hint(&self) -> (usize, Option) { + let size = self.body.basic_blocks().len() - self.visited.count(); + (size, Some(size)) + } +} diff --git a/compiler/rustc_mir/src/transform/coverage/mod.rs b/compiler/rustc_mir/src/transform/coverage/mod.rs new file mode 100644 index 00000000000..9961afba8e7 --- /dev/null +++ b/compiler/rustc_mir/src/transform/coverage/mod.rs @@ -0,0 +1,241 @@ +pub mod query; + +mod counters; +mod debug; +mod graph; +mod spans; + +use counters::CoverageCounters; +use graph::BasicCoverageBlocks; +use spans::{CoverageSpan, CoverageSpans}; + +use crate::transform::MirPass; +use crate::util::pretty; + +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::Lrc; +use rustc_index::vec::IndexVec; +use rustc_middle::hir; +use rustc_middle::hir::map::blocks::FnLikeNode; +use rustc_middle::ich::StableHashingContext; +use rustc_middle::mir::coverage::*; +use rustc_middle::mir::{self, BasicBlock, Coverage, Statement, StatementKind}; +use rustc_middle::ty::TyCtxt; +use rustc_span::def_id::DefId; +use rustc_span::{CharPos, Pos, SourceFile, Span, Symbol}; + +/// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected +/// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen +/// to construct the coverage map. +pub struct InstrumentCoverage; + +impl<'tcx> MirPass<'tcx> for InstrumentCoverage { + fn run_pass(&self, tcx: TyCtxt<'tcx>, mir_body: &mut mir::Body<'tcx>) { + let mir_source = mir_body.source; + + // If the InstrumentCoverage pass is called on promoted MIRs, skip them. + // See: https://github.com/rust-lang/rust/pull/73011#discussion_r438317601 + if mir_source.promoted.is_some() { + trace!( + "InstrumentCoverage skipped for {:?} (already promoted for Miri evaluation)", + mir_source.def_id() + ); + return; + } + + let hir_id = tcx.hir().local_def_id_to_hir_id(mir_source.def_id().expect_local()); + let is_fn_like = FnLikeNode::from_node(tcx.hir().get(hir_id)).is_some(); + + // Only instrument functions, methods, and closures (not constants since they are evaluated + // at compile time by Miri). + // FIXME(#73156): Handle source code coverage in const eval, but note, if and when const + // expressions get coverage spans, we will probably have to "carve out" space for const + // expressions from coverage spans in enclosing MIR's, like we do for closures. (That might + // be tricky if const expressions have no corresponding statements in the enclosing MIR. + // Closures are carved out by their initial `Assign` statement.) + if !is_fn_like { + trace!("InstrumentCoverage skipped for {:?} (not an FnLikeNode)", mir_source.def_id()); + return; + } + // FIXME(richkadel): By comparison, the MIR pass `ConstProp` includes associated constants, + // with functions, methods, and closures. I assume Miri is used for associated constants as + // well. If not, we may need to include them here too. + + trace!("InstrumentCoverage starting for {:?}", mir_source.def_id()); + Instrumentor::new(&self.name(), tcx, mir_body).inject_counters(); + trace!("InstrumentCoverage starting for {:?}", mir_source.def_id()); + } +} + +struct Instrumentor<'a, 'tcx> { + pass_name: &'a str, + tcx: TyCtxt<'tcx>, + mir_body: &'a mut mir::Body<'tcx>, + body_span: Span, + basic_coverage_blocks: BasicCoverageBlocks, + coverage_counters: CoverageCounters, +} + +impl<'a, 'tcx> Instrumentor<'a, 'tcx> { + fn new(pass_name: &'a str, tcx: TyCtxt<'tcx>, mir_body: &'a mut mir::Body<'tcx>) -> Self { + let hir_body = hir_body(tcx, mir_body.source.def_id()); + let body_span = hir_body.value.span; + let function_source_hash = hash_mir_source(tcx, hir_body); + let basic_coverage_blocks = BasicCoverageBlocks::from_mir(mir_body); + Self { + pass_name, + tcx, + mir_body, + body_span, + basic_coverage_blocks, + coverage_counters: CoverageCounters::new(function_source_hash), + } + } + + fn inject_counters(&'a mut self) { + let tcx = self.tcx; + let source_map = tcx.sess.source_map(); + let mir_source = self.mir_body.source; + let def_id = mir_source.def_id(); + let body_span = self.body_span; + + debug!("instrumenting {:?}, span: {}", def_id, source_map.span_to_string(body_span)); + + //////////////////////////////////////////////////// + // Compute `CoverageSpan`s from the `BasicCoverageBlocks`. + let coverage_spans = CoverageSpans::generate_coverage_spans( + &self.mir_body, + body_span, + &self.basic_coverage_blocks, + ); + + if pretty::dump_enabled(tcx, self.pass_name, def_id) { + debug::dump_coverage_spanview( + tcx, + self.mir_body, + &self.basic_coverage_blocks, + self.pass_name, + &coverage_spans, + ); + } + + self.inject_coverage_span_counters(coverage_spans); + } + + /// Inject a counter for each `CoverageSpan`. There can be multiple `CoverageSpan`s for a given + /// BCB, but only one actual counter needs to be incremented per BCB. `bcb_counters` maps each + /// `bcb` to its `Counter`, when injected. Subsequent `CoverageSpan`s for a BCB that already has + /// a `Counter` will inject an `Expression` instead, and compute its value by adding `ZERO` to + /// the BCB `Counter` value. + fn inject_coverage_span_counters(&mut self, coverage_spans: Vec) { + let tcx = self.tcx; + let source_map = tcx.sess.source_map(); + let body_span = self.body_span; + let source_file = source_map.lookup_source_file(body_span.lo()); + let file_name = Symbol::intern(&source_file.name.to_string()); + + let mut bb_counters = IndexVec::from_elem_n(None, self.mir_body.basic_blocks().len()); + for CoverageSpan { span, bcb_leader_bb: bb, .. } in coverage_spans { + if let Some(&counter_operand) = bb_counters[bb].as_ref() { + let expression = self.coverage_counters.make_expression( + counter_operand, + Op::Add, + ExpressionOperandId::ZERO, + ); + debug!( + "Injecting counter expression {:?} at: {:?}:\n{}\n==========", + expression, + span, + source_map.span_to_snippet(span).expect("Error getting source for span"), + ); + let code_region = make_code_region(file_name, &source_file, span, body_span); + inject_statement(self.mir_body, expression, bb, Some(code_region)); + } else { + let counter = self.coverage_counters.make_counter(); + debug!( + "Injecting counter {:?} at: {:?}:\n{}\n==========", + counter, + span, + source_map.span_to_snippet(span).expect("Error getting source for span"), + ); + let counter_operand = counter.as_operand_id(); + bb_counters[bb] = Some(counter_operand); + let code_region = make_code_region(file_name, &source_file, span, body_span); + inject_statement(self.mir_body, counter, bb, Some(code_region)); + } + } + } +} + +fn inject_statement( + mir_body: &mut mir::Body<'tcx>, + counter_kind: CoverageKind, + bb: BasicBlock, + some_code_region: Option, +) { + debug!( + " injecting statement {:?} for {:?} at code region: {:?}", + counter_kind, bb, some_code_region + ); + let data = &mut mir_body[bb]; + let source_info = data.terminator().source_info; + let statement = Statement { + source_info, + kind: StatementKind::Coverage(box Coverage { + kind: counter_kind, + code_region: some_code_region, + }), + }; + data.statements.push(statement); +} + +/// Convert the Span into its file name, start line and column, and end line and column +fn make_code_region( + file_name: Symbol, + source_file: &Lrc, + span: Span, + body_span: Span, +) -> CodeRegion { + let (start_line, mut start_col) = source_file.lookup_file_pos(span.lo()); + let (end_line, end_col) = if span.hi() == span.lo() { + let (end_line, mut end_col) = (start_line, start_col); + // Extend an empty span by one character so the region will be counted. + let CharPos(char_pos) = start_col; + if span.hi() == body_span.hi() { + start_col = CharPos(char_pos - 1); + } else { + end_col = CharPos(char_pos + 1); + } + (end_line, end_col) + } else { + source_file.lookup_file_pos(span.hi()) + }; + CodeRegion { + file_name, + start_line: start_line as u32, + start_col: start_col.to_u32() + 1, + end_line: end_line as u32, + end_col: end_col.to_u32() + 1, + } +} + +fn hir_body<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> &'tcx rustc_hir::Body<'tcx> { + let hir_node = tcx.hir().get_if_local(def_id).expect("expected DefId is local"); + let fn_body_id = hir::map::associated_body(hir_node).expect("HIR node is a function with body"); + tcx.hir().body(fn_body_id) +} + +fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx rustc_hir::Body<'tcx>) -> u64 { + let mut hcx = tcx.create_no_span_stable_hashing_context(); + hash(&mut hcx, &hir_body.value).to_smaller_hash() +} + +fn hash( + hcx: &mut StableHashingContext<'tcx>, + node: &impl HashStable>, +) -> Fingerprint { + let mut stable_hasher = StableHasher::new(); + node.hash_stable(hcx, &mut stable_hasher); + stable_hasher.finish() +} diff --git a/compiler/rustc_mir/src/transform/coverage/query.rs b/compiler/rustc_mir/src/transform/coverage/query.rs new file mode 100644 index 00000000000..c17221b78dd --- /dev/null +++ b/compiler/rustc_mir/src/transform/coverage/query.rs @@ -0,0 +1,113 @@ +use rustc_middle::mir::coverage::*; +use rustc_middle::mir::visit::Visitor; +use rustc_middle::mir::{Coverage, CoverageInfo, Location}; +use rustc_middle::ty::query::Providers; +use rustc_middle::ty::TyCtxt; +use rustc_span::def_id::DefId; + +/// The `query` provider for `CoverageInfo`, requested by `codegen_coverage()` (to inject each +/// counter) and `FunctionCoverage::new()` (to extract the coverage map metadata from the MIR). +pub(crate) fn provide(providers: &mut Providers) { + providers.coverageinfo = |tcx, def_id| coverageinfo_from_mir(tcx, def_id); +} + +/// The `num_counters` argument to `llvm.instrprof.increment` is the max counter_id + 1, or in +/// other words, the number of counter value references injected into the MIR (plus 1 for the +/// reserved `ZERO` counter, which uses counter ID `0` when included in an expression). Injected +/// counters have a counter ID from `1..num_counters-1`. +/// +/// `num_expressions` is the number of counter expressions added to the MIR body. +/// +/// Both `num_counters` and `num_expressions` are used to initialize new vectors, during backend +/// code generate, to lookup counters and expressions by simple u32 indexes. +/// +/// MIR optimization may split and duplicate some BasicBlock sequences, or optimize out some code +/// including injected counters. (It is OK if some counters are optimized out, but those counters +/// are still included in the total `num_counters` or `num_expressions`.) Simply counting the +/// calls may not work; but computing the number of counters or expressions by adding `1` to the +/// highest ID (for a given instrumented function) is valid. +/// +/// This visitor runs twice, first with `add_missing_operands` set to `false`, to find the maximum +/// counter ID and maximum expression ID based on their enum variant `id` fields; then, as a +/// safeguard, with `add_missing_operands` set to `true`, to find any other counter or expression +/// IDs referenced by expression operands, if not already seen. +/// +/// Ideally, every expression operand in the MIR will have a corresponding Counter or Expression, +/// but since current or future MIR optimizations can theoretically optimize out segments of a +/// MIR, it may not be possible to guarantee this, so the second pass ensures the `CoverageInfo` +/// counts include all referenced IDs. +struct CoverageVisitor { + info: CoverageInfo, + add_missing_operands: bool, +} + +impl CoverageVisitor { + // If an expression operand is encountered with an ID outside the range of known counters and + // expressions, the only way to determine if the ID is a counter ID or an expression ID is to + // assume a maximum possible counter ID value. + const MAX_COUNTER_GUARD: u32 = (u32::MAX / 2) + 1; + + #[inline(always)] + fn update_num_counters(&mut self, counter_id: u32) { + self.info.num_counters = std::cmp::max(self.info.num_counters, counter_id + 1); + } + + #[inline(always)] + fn update_num_expressions(&mut self, expression_id: u32) { + let expression_index = u32::MAX - expression_id; + self.info.num_expressions = std::cmp::max(self.info.num_expressions, expression_index + 1); + } + + fn update_from_expression_operand(&mut self, operand_id: u32) { + if operand_id >= self.info.num_counters { + let operand_as_expression_index = u32::MAX - operand_id; + if operand_as_expression_index >= self.info.num_expressions { + if operand_id <= Self::MAX_COUNTER_GUARD { + self.update_num_counters(operand_id) + } else { + self.update_num_expressions(operand_id) + } + } + } + } +} + +impl Visitor<'_> for CoverageVisitor { + fn visit_coverage(&mut self, coverage: &Coverage, _location: Location) { + if self.add_missing_operands { + match coverage.kind { + CoverageKind::Expression { lhs, rhs, .. } => { + self.update_from_expression_operand(u32::from(lhs)); + self.update_from_expression_operand(u32::from(rhs)); + } + _ => {} + } + } else { + match coverage.kind { + CoverageKind::Counter { id, .. } => { + self.update_num_counters(u32::from(id)); + } + CoverageKind::Expression { id, .. } => { + self.update_num_expressions(u32::from(id)); + } + _ => {} + } + } + } +} + +fn coverageinfo_from_mir<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> CoverageInfo { + let mir_body = tcx.optimized_mir(def_id); + + let mut coverage_visitor = CoverageVisitor { + info: CoverageInfo { num_counters: 0, num_expressions: 0 }, + add_missing_operands: false, + }; + + coverage_visitor.visit_body(mir_body); + + coverage_visitor.add_missing_operands = true; + coverage_visitor.visit_body(mir_body); + + coverage_visitor.info +} diff --git a/compiler/rustc_mir/src/transform/instrument_coverage.rs b/compiler/rustc_mir/src/transform/coverage/spans.rs similarity index 52% rename from compiler/rustc_mir/src/transform/instrument_coverage.rs rename to compiler/rustc_mir/src/transform/coverage/spans.rs index cb6798f2ee7..7957b6f3ff5 100644 --- a/compiler/rustc_mir/src/transform/instrument_coverage.rs +++ b/compiler/rustc_mir/src/transform/coverage/spans.rs @@ -1,396 +1,24 @@ -use crate::transform::MirPass; -use crate::util::pretty; -use crate::util::spanview::{self, source_range_no_file, SpanViewable}; +use super::debug::term_type; +use super::graph::{BasicCoverageBlock, BasicCoverageBlocks}; + +use crate::util::spanview::source_range_no_file; -use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::graph::dominators::Dominators; use rustc_data_structures::graph::WithNumNodes; -use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; -use rustc_data_structures::sync::Lrc; use rustc_index::bit_set::BitSet; -use rustc_index::vec::IndexVec; -use rustc_middle::hir; -use rustc_middle::hir::map::blocks::FnLikeNode; -use rustc_middle::ich::StableHashingContext; -use rustc_middle::mir; -use rustc_middle::mir::coverage::*; -use rustc_middle::mir::visit::Visitor; use rustc_middle::mir::{ - AggregateKind, BasicBlock, BasicBlockData, Coverage, CoverageInfo, FakeReadCause, Location, - Rvalue, Statement, StatementKind, Terminator, TerminatorKind, + self, AggregateKind, BasicBlock, FakeReadCause, Rvalue, Statement, StatementKind, Terminator, + TerminatorKind, }; -use rustc_middle::ty::query::Providers; use rustc_middle::ty::TyCtxt; -use rustc_span::def_id::DefId; + use rustc_span::source_map::original_sp; -use rustc_span::{BytePos, CharPos, Pos, SourceFile, Span, Symbol, SyntaxContext}; +use rustc_span::{BytePos, Span, SyntaxContext}; use std::cmp::Ordering; -const ID_SEPARATOR: &str = ","; - -/// The `query` provider for `CoverageInfo`, requested by `codegen_coverage()` (to inject each -/// counter) and `FunctionCoverage::new()` (to extract the coverage map metadata from the MIR). -pub(crate) fn provide(providers: &mut Providers) { - providers.coverageinfo = |tcx, def_id| coverageinfo_from_mir(tcx, def_id); -} - -/// The `num_counters` argument to `llvm.instrprof.increment` is the max counter_id + 1, or in -/// other words, the number of counter value references injected into the MIR (plus 1 for the -/// reserved `ZERO` counter, which uses counter ID `0` when included in an expression). Injected -/// counters have a counter ID from `1..num_counters-1`. -/// -/// `num_expressions` is the number of counter expressions added to the MIR body. -/// -/// Both `num_counters` and `num_expressions` are used to initialize new vectors, during backend -/// code generate, to lookup counters and expressions by simple u32 indexes. -/// -/// MIR optimization may split and duplicate some BasicBlock sequences, or optimize out some code -/// including injected counters. (It is OK if some counters are optimized out, but those counters -/// are still included in the total `num_counters` or `num_expressions`.) Simply counting the -/// calls may not work; but computing the number of counters or expressions by adding `1` to the -/// highest ID (for a given instrumented function) is valid. -/// -/// This visitor runs twice, first with `add_missing_operands` set to `false`, to find the maximum -/// counter ID and maximum expression ID based on their enum variant `id` fields; then, as a -/// safeguard, with `add_missing_operands` set to `true`, to find any other counter or expression -/// IDs referenced by expression operands, if not already seen. -/// -/// Ideally, every expression operand in the MIR will have a corresponding Counter or Expression, -/// but since current or future MIR optimizations can theoretically optimize out segments of a -/// MIR, it may not be possible to guarantee this, so the second pass ensures the `CoverageInfo` -/// counts include all referenced IDs. -struct CoverageVisitor { - info: CoverageInfo, - add_missing_operands: bool, -} - -impl CoverageVisitor { - // If an expression operand is encountered with an ID outside the range of known counters and - // expressions, the only way to determine if the ID is a counter ID or an expression ID is to - // assume a maximum possible counter ID value. - const MAX_COUNTER_GUARD: u32 = (u32::MAX / 2) + 1; - - #[inline(always)] - fn update_num_counters(&mut self, counter_id: u32) { - self.info.num_counters = std::cmp::max(self.info.num_counters, counter_id + 1); - } - - #[inline(always)] - fn update_num_expressions(&mut self, expression_id: u32) { - let expression_index = u32::MAX - expression_id; - self.info.num_expressions = std::cmp::max(self.info.num_expressions, expression_index + 1); - } - - fn update_from_expression_operand(&mut self, operand_id: u32) { - if operand_id >= self.info.num_counters { - let operand_as_expression_index = u32::MAX - operand_id; - if operand_as_expression_index >= self.info.num_expressions { - if operand_id <= Self::MAX_COUNTER_GUARD { - self.update_num_counters(operand_id) - } else { - self.update_num_expressions(operand_id) - } - } - } - } -} - -impl Visitor<'_> for CoverageVisitor { - fn visit_coverage(&mut self, coverage: &Coverage, _location: Location) { - if self.add_missing_operands { - match coverage.kind { - CoverageKind::Expression { lhs, rhs, .. } => { - self.update_from_expression_operand(u32::from(lhs)); - self.update_from_expression_operand(u32::from(rhs)); - } - _ => {} - } - } else { - match coverage.kind { - CoverageKind::Counter { id, .. } => { - self.update_num_counters(u32::from(id)); - } - CoverageKind::Expression { id, .. } => { - self.update_num_expressions(u32::from(id)); - } - _ => {} - } - } - } -} - -fn coverageinfo_from_mir<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> CoverageInfo { - let mir_body = tcx.optimized_mir(def_id); - - let mut coverage_visitor = CoverageVisitor { - info: CoverageInfo { num_counters: 0, num_expressions: 0 }, - add_missing_operands: false, - }; - - coverage_visitor.visit_body(mir_body); - - coverage_visitor.add_missing_operands = true; - coverage_visitor.visit_body(mir_body); - - coverage_visitor.info -} -/// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected -/// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen -/// to construct the coverage map. -pub struct InstrumentCoverage; - -impl<'tcx> MirPass<'tcx> for InstrumentCoverage { - fn run_pass(&self, tcx: TyCtxt<'tcx>, mir_body: &mut mir::Body<'tcx>) { - let mir_source = mir_body.source; - - // If the InstrumentCoverage pass is called on promoted MIRs, skip them. - // See: https://github.com/rust-lang/rust/pull/73011#discussion_r438317601 - if mir_source.promoted.is_some() { - trace!( - "InstrumentCoverage skipped for {:?} (already promoted for Miri evaluation)", - mir_source.def_id() - ); - return; - } - - let hir_id = tcx.hir().local_def_id_to_hir_id(mir_source.def_id().expect_local()); - let is_fn_like = FnLikeNode::from_node(tcx.hir().get(hir_id)).is_some(); - - // Only instrument functions, methods, and closures (not constants since they are evaluated - // at compile time by Miri). - // FIXME(#73156): Handle source code coverage in const eval, but note, if and when const - // expressions get coverage spans, we will probably have to "carve out" space for const - // expressions from coverage spans in enclosing MIR's, like we do for closures. (That might - // be tricky if const expressions have no corresponding statements in the enclosing MIR. - // Closures are carved out by their initial `Assign` statement.) - if !is_fn_like { - trace!("InstrumentCoverage skipped for {:?} (not an FnLikeNode)", mir_source.def_id()); - return; - } - // FIXME(richkadel): By comparison, the MIR pass `ConstProp` includes associated constants, - // with functions, methods, and closures. I assume Miri is used for associated constants as - // well. If not, we may need to include them here too. - - trace!("InstrumentCoverage starting for {:?}", mir_source.def_id()); - Instrumentor::new(&self.name(), tcx, mir_body).inject_counters(); - trace!("InstrumentCoverage starting for {:?}", mir_source.def_id()); - } -} - -/// A BasicCoverageBlock (BCB) represents the maximal-length sequence of CFG (MIR) BasicBlocks -/// without conditional branches. -/// -/// The BCB allows coverage analysis to be performed on a simplified projection of the underlying -/// MIR CFG, without altering the original CFG. Note that running the MIR `SimplifyCfg` transform, -/// is not sufficient, and therefore not necessary, since the BCB-based CFG projection is a more -/// aggressive simplification. For example: -/// -/// * The BCB CFG projection ignores (trims) branches not relevant to coverage, such as unwind- -/// related code that is injected by the Rust compiler but has no physical source code to -/// count. This also means a BasicBlock with a `Call` terminator can be merged into its -/// primary successor target block, in the same BCB. -/// * Some BasicBlock terminators support Rust-specific concerns--like borrow-checking--that are -/// not relevant to coverage analysis. `FalseUnwind`, for example, can be treated the same as -/// a `Goto`, and merged with its successor into the same BCB. -/// -/// Each BCB with at least one computed `CoverageSpan` will have no more than one `Counter`. -/// In some cases, a BCB's execution count can be computed by `CounterExpression`. Additional -/// disjoint `CoverageSpan`s in a BCB can also be counted by `CounterExpression` (by adding `ZERO` -/// to the BCB's primary counter or expression). -/// -/// Dominator/dominated relationships (which are fundamental to the coverage analysis algorithm) -/// between two BCBs can be computed using the `mir::Body` `dominators()` with any `BasicBlock` -/// member of each BCB. (For consistency, BCB's use the first `BasicBlock`, also referred to as the -/// `bcb_leader_bb`.) -/// -/// The BCB CFG projection is critical to simplifying the coverage analysis by ensuring graph -/// path-based queries (`is_dominated_by()`, `predecessors`, `successors`, etc.) have branch -/// (control flow) significance. -#[derive(Debug, Clone)] -struct BasicCoverageBlock { - pub blocks: Vec, -} - -impl BasicCoverageBlock { - pub fn leader_bb(&self) -> BasicBlock { - self.blocks[0] - } - - pub fn id(&self) -> String { - format!( - "@{}", - self.blocks - .iter() - .map(|bb| bb.index().to_string()) - .collect::>() - .join(ID_SEPARATOR) - ) - } -} - -struct BasicCoverageBlocks { - vec: IndexVec>, -} - -impl BasicCoverageBlocks { - pub fn from_mir(mir_body: &mir::Body<'tcx>) -> Self { - let mut basic_coverage_blocks = - BasicCoverageBlocks { vec: IndexVec::from_elem_n(None, mir_body.basic_blocks().len()) }; - basic_coverage_blocks.extract_from_mir(mir_body); - basic_coverage_blocks - } - - pub fn iter(&self) -> impl Iterator { - self.vec.iter().filter_map(|bcb| bcb.as_ref()) - } - - pub fn num_nodes(&self) -> usize { - self.vec.len() - } - - pub fn extract_from_mir(&mut self, mir_body: &mir::Body<'tcx>) { - // Traverse the CFG but ignore anything following an `unwind` - let cfg_without_unwind = ShortCircuitPreorder::new(&mir_body, |term_kind| { - let mut successors = term_kind.successors(); - match &term_kind { - // SwitchInt successors are never unwind, and all of them should be traversed. - - // NOTE: TerminatorKind::FalseEdge targets from SwitchInt don't appear to be - // helpful in identifying unreachable code. I did test the theory, but the following - // changes were not beneficial. (I assumed that replacing some constants with - // non-deterministic variables might effect which blocks were targeted by a - // `FalseEdge` `imaginary_target`. It did not.) - // - // Also note that, if there is a way to identify BasicBlocks that are part of the - // MIR CFG, but not actually reachable, here are some other things to consider: - // - // Injecting unreachable code regions will probably require computing the set - // difference between the basic blocks found without filtering out unreachable - // blocks, and the basic blocks found with the filter; then computing the - // `CoverageSpans` without the filter; and then injecting `Counter`s or - // `CounterExpression`s for blocks that are not unreachable, or injecting - // `Unreachable` code regions otherwise. This seems straightforward, but not - // trivial. - // - // Alternatively, we might instead want to leave the unreachable blocks in - // (bypass the filter here), and inject the counters. This will result in counter - // values of zero (0) for unreachable code (and, notably, the code will be displayed - // with a red background by `llvm-cov show`). - // - // TerminatorKind::SwitchInt { .. } => { - // let some_imaginary_target = successors.clone().find_map(|&successor| { - // let term = mir_body.basic_blocks()[successor].terminator(); - // if let TerminatorKind::FalseEdge { imaginary_target, .. } = term.kind { - // if mir_body.predecessors()[imaginary_target].len() == 1 { - // return Some(imaginary_target); - // } - // } - // None - // }); - // if let Some(imaginary_target) = some_imaginary_target { - // box successors.filter(move |&&successor| successor != imaginary_target) - // } else { - // box successors - // } - // } - // - // Note this also required changing the closure signature for the - // `ShortCurcuitPreorder` to: - // - // F: Fn(&'tcx TerminatorKind<'tcx>) -> Box + 'a>, - TerminatorKind::SwitchInt { .. } => successors, - - // For all other kinds, return only the first successor, if any, and ignore unwinds - _ => successors.next().into_iter().chain(&[]), - } - }); - - // Walk the CFG using a Preorder traversal, which starts from `START_BLOCK` and follows - // each block terminator's `successors()`. Coverage spans must map to actual source code, - // so compiler generated blocks and paths can be ignored. To that end the CFG traversal - // intentionally omits unwind paths. - let mut blocks = Vec::new(); - for (bb, data) in cfg_without_unwind { - if let Some(last) = blocks.last() { - let predecessors = &mir_body.predecessors()[bb]; - if predecessors.len() > 1 || !predecessors.contains(last) { - // The `bb` has more than one _incoming_ edge, and should start its own - // `BasicCoverageBlock`. (Note, the `blocks` vector does not yet include `bb`; - // it contains a sequence of one or more sequential blocks with no intermediate - // branches in or out. Save these as a new `BasicCoverageBlock` before starting - // the new one.) - self.add_basic_coverage_block(blocks.split_off(0)); - debug!( - " because {}", - if predecessors.len() > 1 { - "predecessors.len() > 1".to_owned() - } else { - format!("bb {} is not in precessors: {:?}", bb.index(), predecessors) - } - ); - } - } - blocks.push(bb); - - let term = data.terminator(); - - match term.kind { - TerminatorKind::Return { .. } - | TerminatorKind::Abort - | TerminatorKind::Assert { .. } - | TerminatorKind::Yield { .. } - | TerminatorKind::SwitchInt { .. } => { - // The `bb` has more than one _outgoing_ edge, or exits the function. Save the - // current sequence of `blocks` gathered to this point, as a new - // `BasicCoverageBlock`. - self.add_basic_coverage_block(blocks.split_off(0)); - debug!(" because term.kind = {:?}", term.kind); - // Note that this condition is based on `TerminatorKind`, even though it - // theoretically boils down to `successors().len() != 1`; that is, either zero - // (e.g., `Return`, `Abort`) or multiple successors (e.g., `SwitchInt`), but - // since the Coverage graph (the BCB CFG projection) ignores things like unwind - // branches (which exist in the `Terminator`s `successors()` list) checking the - // number of successors won't work. - } - TerminatorKind::Goto { .. } - | TerminatorKind::Resume - | TerminatorKind::Unreachable - | TerminatorKind::Drop { .. } - | TerminatorKind::DropAndReplace { .. } - | TerminatorKind::Call { .. } - | TerminatorKind::GeneratorDrop - | TerminatorKind::FalseEdge { .. } - | TerminatorKind::FalseUnwind { .. } - | TerminatorKind::InlineAsm { .. } => {} - } - } - - if !blocks.is_empty() { - // process any remaining blocks into a final `BasicCoverageBlock` - self.add_basic_coverage_block(blocks.split_off(0)); - debug!(" because the end of the CFG was reached while traversing"); - } - } - - fn add_basic_coverage_block(&mut self, blocks: Vec) { - let leader_bb = blocks[0]; - let bcb = BasicCoverageBlock { blocks }; - debug!("adding BCB: {:?}", bcb); - self.vec[leader_bb] = Some(bcb); - } -} - -impl std::ops::Index for BasicCoverageBlocks { - type Output = BasicCoverageBlock; - - fn index(&self, index: BasicBlock) -> &Self::Output { - self.vec[index].as_ref().expect("is_some if BasicBlock is a BasicCoverageBlock leader") - } -} - #[derive(Debug, Copy, Clone)] -enum CoverageStatement { +pub(crate) enum CoverageStatement { Statement(BasicBlock, Span, usize), Terminator(BasicBlock, Span), } @@ -428,28 +56,6 @@ impl CoverageStatement { } } -/// Returns a simple string representation of a `TerminatorKind` variant, indenpendent of any -/// values it might hold. -fn term_type(kind: &TerminatorKind<'tcx>) -> &'static str { - match kind { - TerminatorKind::Goto { .. } => "Goto", - TerminatorKind::SwitchInt { .. } => "SwitchInt", - TerminatorKind::Resume => "Resume", - TerminatorKind::Abort => "Abort", - TerminatorKind::Return => "Return", - TerminatorKind::Unreachable => "Unreachable", - TerminatorKind::Drop { .. } => "Drop", - TerminatorKind::DropAndReplace { .. } => "DropAndReplace", - TerminatorKind::Call { .. } => "Call", - TerminatorKind::Assert { .. } => "Assert", - TerminatorKind::Yield { .. } => "Yield", - TerminatorKind::GeneratorDrop => "GeneratorDrop", - TerminatorKind::FalseEdge { .. } => "FalseEdge", - TerminatorKind::FalseUnwind { .. } => "FalseUnwind", - TerminatorKind::InlineAsm { .. } => "InlineAsm", - } -} - /// A BCB is deconstructed into one or more `Span`s. Each `Span` maps to a `CoverageSpan` that /// references the originating BCB and one or more MIR `Statement`s and/or `Terminator`s. /// Initially, the `Span`s come from the `Statement`s and `Terminator`s, but subsequent @@ -461,7 +67,7 @@ fn term_type(kind: &TerminatorKind<'tcx>) -> &'static str { /// or is subsumed by the `Span` associated with this `CoverageSpan`, and it's `BasicBlock` /// `is_dominated_by()` the `BasicBlock`s in this `CoverageSpan`. #[derive(Debug, Clone)] -struct CoverageSpan { +pub(crate) struct CoverageSpan { pub span: Span, pub bcb_leader_bb: BasicBlock, pub coverage_statements: Vec, @@ -547,220 +153,6 @@ impl CoverageSpan { } } -struct Instrumentor<'a, 'tcx> { - pass_name: &'a str, - tcx: TyCtxt<'tcx>, - mir_body: &'a mut mir::Body<'tcx>, - body_span: Span, - basic_coverage_blocks: BasicCoverageBlocks, - coverage_counters: CoverageCounters, -} - -impl<'a, 'tcx> Instrumentor<'a, 'tcx> { - fn new(pass_name: &'a str, tcx: TyCtxt<'tcx>, mir_body: &'a mut mir::Body<'tcx>) -> Self { - let hir_body = hir_body(tcx, mir_body.source.def_id()); - let body_span = hir_body.value.span; - let function_source_hash = hash_mir_source(tcx, hir_body); - let basic_coverage_blocks = BasicCoverageBlocks::from_mir(mir_body); - Self { - pass_name, - tcx, - mir_body, - body_span, - basic_coverage_blocks, - coverage_counters: CoverageCounters::new(function_source_hash), - } - } - - fn inject_counters(&'a mut self) { - let tcx = self.tcx; - let source_map = tcx.sess.source_map(); - let mir_source = self.mir_body.source; - let def_id = mir_source.def_id(); - let body_span = self.body_span; - - debug!("instrumenting {:?}, span: {}", def_id, source_map.span_to_string(body_span)); - - //////////////////////////////////////////////////// - // Compute `CoverageSpan`s from the `BasicCoverageBlocks`. - let coverage_spans = CoverageSpans::generate_coverage_spans( - &self.mir_body, - body_span, - &self.basic_coverage_blocks, - ); - - if pretty::dump_enabled(tcx, self.pass_name, def_id) { - dump_coverage_spanview( - tcx, - self.mir_body, - &self.basic_coverage_blocks, - self.pass_name, - &coverage_spans, - ); - } - - self.inject_coverage_span_counters(coverage_spans); - } - - /// Inject a counter for each `CoverageSpan`. There can be multiple `CoverageSpan`s for a given - /// BCB, but only one actual counter needs to be incremented per BCB. `bcb_counters` maps each - /// `bcb` to its `Counter`, when injected. Subsequent `CoverageSpan`s for a BCB that already has - /// a `Counter` will inject an `Expression` instead, and compute its value by adding `ZERO` to - /// the BCB `Counter` value. - fn inject_coverage_span_counters(&mut self, coverage_spans: Vec) { - let tcx = self.tcx; - let source_map = tcx.sess.source_map(); - let body_span = self.body_span; - let source_file = source_map.lookup_source_file(body_span.lo()); - let file_name = Symbol::intern(&source_file.name.to_string()); - - let mut bb_counters = IndexVec::from_elem_n(None, self.mir_body.basic_blocks().len()); - for CoverageSpan { span, bcb_leader_bb: bb, .. } in coverage_spans { - if let Some(&counter_operand) = bb_counters[bb].as_ref() { - let expression = self.coverage_counters.make_expression( - counter_operand, - Op::Add, - ExpressionOperandId::ZERO, - ); - debug!( - "Injecting counter expression {:?} at: {:?}:\n{}\n==========", - expression, - span, - source_map.span_to_snippet(span).expect("Error getting source for span"), - ); - let code_region = make_code_region(file_name, &source_file, span, body_span); - inject_statement(self.mir_body, expression, bb, Some(code_region)); - } else { - let counter = self.coverage_counters.make_counter(); - debug!( - "Injecting counter {:?} at: {:?}:\n{}\n==========", - counter, - span, - source_map.span_to_snippet(span).expect("Error getting source for span"), - ); - let counter_operand = counter.as_operand_id(); - bb_counters[bb] = Some(counter_operand); - let code_region = make_code_region(file_name, &source_file, span, body_span); - inject_statement(self.mir_body, counter, bb, Some(code_region)); - } - } - } -} - -/// Generates the MIR pass `CoverageSpan`-specific spanview dump file. -fn dump_coverage_spanview( - tcx: TyCtxt<'tcx>, - mir_body: &mir::Body<'tcx>, - basic_coverage_blocks: &BasicCoverageBlocks, - pass_name: &str, - coverage_spans: &Vec, -) { - let mir_source = mir_body.source; - let def_id = mir_source.def_id(); - - let span_viewables = span_viewables(tcx, mir_body, basic_coverage_blocks, &coverage_spans); - let mut file = pretty::create_dump_file(tcx, "html", None, pass_name, &0, mir_source) - .expect("Unexpected error creating MIR spanview HTML file"); - let crate_name = tcx.crate_name(def_id.krate); - let item_name = tcx.def_path(def_id).to_filename_friendly_no_crate(); - let title = format!("{}.{} - Coverage Spans", crate_name, item_name); - spanview::write_document(tcx, def_id, span_viewables, &title, &mut file) - .expect("Unexpected IO error dumping coverage spans as HTML"); -} - -/// Converts the computed `BasicCoverageBlock`s into `SpanViewable`s. -fn span_viewables( - tcx: TyCtxt<'tcx>, - mir_body: &mir::Body<'tcx>, - basic_coverage_blocks: &BasicCoverageBlocks, - coverage_spans: &Vec, -) -> Vec { - let mut span_viewables = Vec::new(); - for coverage_span in coverage_spans { - let tooltip = coverage_span.format_coverage_statements(tcx, mir_body); - let CoverageSpan { span, bcb_leader_bb: bb, .. } = coverage_span; - let bcb = &basic_coverage_blocks[*bb]; - let id = bcb.id(); - let leader_bb = bcb.leader_bb(); - span_viewables.push(SpanViewable { bb: leader_bb, span: *span, id, tooltip }); - } - span_viewables -} - -/// Manages the counter and expression indexes/IDs to generate `CoverageKind` components for MIR -/// `Coverage` statements. -struct CoverageCounters { - function_source_hash: u64, - next_counter_id: u32, - num_expressions: u32, -} - -impl CoverageCounters { - pub fn new(function_source_hash: u64) -> Self { - Self { - function_source_hash, - next_counter_id: CounterValueReference::START.as_u32(), - num_expressions: 0, - } - } - - pub fn make_counter(&mut self) -> CoverageKind { - CoverageKind::Counter { - function_source_hash: self.function_source_hash, - id: self.next_counter(), - } - } - - pub fn make_expression( - &mut self, - lhs: ExpressionOperandId, - op: Op, - rhs: ExpressionOperandId, - ) -> CoverageKind { - let id = self.next_expression(); - CoverageKind::Expression { id, lhs, op, rhs } - } - - /// Counter IDs start from one and go up. - fn next_counter(&mut self) -> CounterValueReference { - assert!(self.next_counter_id < u32::MAX - self.num_expressions); - let next = self.next_counter_id; - self.next_counter_id += 1; - CounterValueReference::from(next) - } - - /// Expression IDs start from u32::MAX and go down because a Expression can reference - /// (add or subtract counts) of both Counter regions and Expression regions. The counter - /// expression operand IDs must be unique across both types. - fn next_expression(&mut self) -> InjectedExpressionId { - assert!(self.next_counter_id < u32::MAX - self.num_expressions); - let next = u32::MAX - self.num_expressions; - self.num_expressions += 1; - InjectedExpressionId::from(next) - } -} -fn inject_statement( - mir_body: &mut mir::Body<'tcx>, - counter_kind: CoverageKind, - bb: BasicBlock, - some_code_region: Option, -) { - debug!( - " injecting statement {:?} for {:?} at code region: {:?}", - counter_kind, bb, some_code_region - ); - let data = &mut mir_body[bb]; - let source_info = data.terminator().source_info; - let statement = Statement { - source_info, - kind: StatementKind::Coverage(box Coverage { - kind: counter_kind, - code_region: some_code_region, - }), - }; - data.statements.push(statement); -} - /// Converts the initial set of `CoverageSpan`s (one per MIR `Statement` or `Terminator`) into a /// minimal set of `CoverageSpan`s, using the BCB CFG to determine where it is safe and useful to: /// @@ -818,7 +210,7 @@ pub struct CoverageSpans<'a, 'tcx> { } impl<'a, 'tcx> CoverageSpans<'a, 'tcx> { - fn generate_coverage_spans( + pub(crate) fn generate_coverage_spans( mir_body: &'a mir::Body<'tcx>, body_span: Span, basic_coverage_blocks: &'a BasicCoverageBlocks, @@ -1366,111 +758,3 @@ fn is_goto(term_kind: &TerminatorKind<'tcx>) -> bool { _ => false, } } - -/// Convert the Span into its file name, start line and column, and end line and column -fn make_code_region( - file_name: Symbol, - source_file: &Lrc, - span: Span, - body_span: Span, -) -> CodeRegion { - let (start_line, mut start_col) = source_file.lookup_file_pos(span.lo()); - let (end_line, end_col) = if span.hi() == span.lo() { - let (end_line, mut end_col) = (start_line, start_col); - // Extend an empty span by one character so the region will be counted. - let CharPos(char_pos) = start_col; - if span.hi() == body_span.hi() { - start_col = CharPos(char_pos - 1); - } else { - end_col = CharPos(char_pos + 1); - } - (end_line, end_col) - } else { - source_file.lookup_file_pos(span.hi()) - }; - CodeRegion { - file_name, - start_line: start_line as u32, - start_col: start_col.to_u32() + 1, - end_line: end_line as u32, - end_col: end_col.to_u32() + 1, - } -} - -fn hir_body<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> &'tcx rustc_hir::Body<'tcx> { - let hir_node = tcx.hir().get_if_local(def_id).expect("expected DefId is local"); - let fn_body_id = hir::map::associated_body(hir_node).expect("HIR node is a function with body"); - tcx.hir().body(fn_body_id) -} - -fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx rustc_hir::Body<'tcx>) -> u64 { - let mut hcx = tcx.create_no_span_stable_hashing_context(); - hash(&mut hcx, &hir_body.value).to_smaller_hash() -} - -fn hash( - hcx: &mut StableHashingContext<'tcx>, - node: &impl HashStable>, -) -> Fingerprint { - let mut stable_hasher = StableHasher::new(); - node.hash_stable(hcx, &mut stable_hasher); - stable_hasher.finish() -} - -pub struct ShortCircuitPreorder< - 'a, - 'tcx, - F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>, -> { - body: &'a mir::Body<'tcx>, - visited: BitSet, - worklist: Vec, - filtered_successors: F, -} - -impl<'a, 'tcx, F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>> - ShortCircuitPreorder<'a, 'tcx, F> -{ - pub fn new( - body: &'a mir::Body<'tcx>, - filtered_successors: F, - ) -> ShortCircuitPreorder<'a, 'tcx, F> { - let worklist = vec![mir::START_BLOCK]; - - ShortCircuitPreorder { - body, - visited: BitSet::new_empty(body.basic_blocks().len()), - worklist, - filtered_successors, - } - } -} - -impl<'a: 'tcx, 'tcx, F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>> Iterator - for ShortCircuitPreorder<'a, 'tcx, F> -{ - type Item = (BasicBlock, &'a BasicBlockData<'tcx>); - - fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { - while let Some(idx) = self.worklist.pop() { - if !self.visited.insert(idx) { - continue; - } - - let data = &self.body[idx]; - - if let Some(ref term) = data.terminator { - self.worklist.extend((self.filtered_successors)(&term.kind)); - } - - return Some((idx, data)); - } - - None - } - - fn size_hint(&self) -> (usize, Option) { - let size = self.body.basic_blocks().len() - self.visited.count(); - (size, Some(size)) - } -} diff --git a/compiler/rustc_mir/src/transform/mod.rs b/compiler/rustc_mir/src/transform/mod.rs index 89db6bb13ca..e3fea2d2701 100644 --- a/compiler/rustc_mir/src/transform/mod.rs +++ b/compiler/rustc_mir/src/transform/mod.rs @@ -22,6 +22,7 @@ pub mod check_packed_ref; pub mod check_unsafety; pub mod cleanup_post_borrowck; pub mod const_prop; +pub mod coverage; pub mod deaggregator; pub mod dest_prop; pub mod dump_mir; @@ -31,7 +32,6 @@ pub mod function_item_references; pub mod generator; pub mod inline; pub mod instcombine; -pub mod instrument_coverage; pub mod match_branches; pub mod multiple_return_terminators; pub mod no_landing_pads; @@ -85,7 +85,7 @@ pub(crate) fn provide(providers: &mut Providers) { }, ..*providers }; - instrument_coverage::provide(providers); + coverage::query::provide(providers); } fn is_mir_available(tcx: TyCtxt<'_>, def_id: DefId) -> bool { @@ -306,7 +306,7 @@ fn mir_promoted( ]; let opt_coverage: &[&dyn MirPass<'tcx>] = if tcx.sess.opts.debugging_opts.instrument_coverage { - &[&instrument_coverage::InstrumentCoverage] + &[&coverage::InstrumentCoverage] } else { &[] };