coverage: Build the global file table ahead of time

This commit is contained in:
Zalathar 2023-10-03 21:40:50 +11:00
parent 86b55cccff
commit e985ae5a45
4 changed files with 51 additions and 25 deletions

View File

@ -3596,6 +3596,7 @@ version = "0.0.0"
dependencies = [
"bitflags 1.3.2",
"cstr",
"itertools",
"libc",
"measureme",
"object",

View File

@ -9,6 +9,7 @@ test = false
[dependencies]
bitflags = "1.0"
cstr = "0.2"
itertools = "0.10.5"
libc = "0.2"
measureme = "10.0.0"
object = { version = "0.32.0", default-features = false, features = [

View File

@ -7,6 +7,7 @@ use rustc_middle::mir::coverage::{
CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op,
};
use rustc_middle::ty::Instance;
use rustc_span::Symbol;
/// Holds all of the coverage mapping data associated with a function instance,
/// collected during traversal of `Coverage` statements in the function's MIR.
@ -189,6 +190,11 @@ impl<'tcx> FunctionCoverage<'tcx> {
if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
}
/// Returns an iterator over all filenames used by this function's mappings.
pub(crate) fn all_file_names(&self) -> impl Iterator<Item = Symbol> + Captures<'_> {
self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name)
}
/// Convert this function's coverage expression data into a form that can be
/// passed through FFI to LLVM.
pub(crate) fn counter_expressions(

View File

@ -4,6 +4,7 @@ use crate::coverageinfo::ffi::CounterMappingRegion;
use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector};
use crate::llvm;
use itertools::Itertools as _;
use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods};
use rustc_data_structures::fx::FxIndexSet;
use rustc_hir::def::DefKind;
@ -57,12 +58,18 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
return;
}
let mut global_file_table = GlobalFileTable::new(tcx);
let function_coverage_entries = function_coverage_map
.into_iter()
.map(|(instance, function_coverage)| (instance, function_coverage.into_finished()))
.collect::<Vec<_>>();
let all_file_names =
function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names());
let global_file_table = GlobalFileTable::new(all_file_names);
// Encode coverage mappings and generate function records
let mut function_data = Vec::new();
for (instance, function_coverage) in function_coverage_map {
let function_coverage = function_coverage.into_finished();
for (instance, function_coverage) in function_coverage_entries {
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
let mangled_function_name = tcx.symbol_name(instance).name;
@ -70,7 +77,7 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
let is_used = function_coverage.is_used();
let coverage_mapping_buffer =
encode_mappings_for_function(&mut global_file_table, &function_coverage);
encode_mappings_for_function(&global_file_table, &function_coverage);
if coverage_mapping_buffer.is_empty() {
if function_coverage.is_used() {
@ -88,7 +95,7 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
}
// Encode all filenames referenced by counters/expressions in this module
let filenames_buffer = global_file_table.into_filenames_buffer();
let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
let filenames_size = filenames_buffer.len();
let filenames_val = cx.const_bytes(&filenames_buffer);
@ -139,37 +146,48 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
coverageinfo::save_cov_data_to_mod(cx, cov_data_val);
}
/// Maps "global" (per-CGU) file ID numbers to their underlying filenames.
struct GlobalFileTable {
global_file_table: FxIndexSet<Symbol>,
/// This "raw" table doesn't include the working dir, so a filename's
/// global ID is its index in this set **plus one**.
raw_file_table: FxIndexSet<Symbol>,
}
impl GlobalFileTable {
fn new(tcx: TyCtxt<'_>) -> Self {
let mut global_file_table = FxIndexSet::default();
fn new(all_file_names: impl IntoIterator<Item = Symbol>) -> Self {
// Collect all of the filenames into a set. Filenames usually come in
// contiguous runs, so we can dedup adjacent ones to save work.
let mut raw_file_table = all_file_names.into_iter().dedup().collect::<FxIndexSet<Symbol>>();
// Sort the file table by its actual string values, not the arbitrary
// ordering of its symbols.
raw_file_table.sort_unstable_by(|a, b| a.as_str().cmp(b.as_str()));
Self { raw_file_table }
}
fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 {
let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| {
bug!("file name not found in prepared global file table: {file_name}");
});
// The raw file table doesn't include an entry for the working dir
// (which has ID 0), so add 1 to get the correct ID.
(raw_id + 1) as u32
}
fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
// requires setting the first filename to the compilation directory.
// Since rustc generates coverage maps with relative paths, the
// compilation directory can be combined with the relative paths
// to get absolute paths, if needed.
use rustc_session::RemapFileNameExt;
let working_dir =
Symbol::intern(&tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy());
global_file_table.insert(working_dir);
Self { global_file_table }
}
fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 {
let (global_file_id, _) = self.global_file_table.insert_full(file_name);
global_file_id as u32
}
fn into_filenames_buffer(self) -> Vec<u8> {
// This method takes `self` so that the caller can't accidentally
// modify the original file table after encoding it into a buffer.
let working_dir: &str = &tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy();
llvm::build_byte_buffer(|buffer| {
coverageinfo::write_filenames_section_to_buffer(
self.global_file_table.iter().map(Symbol::as_str),
// Insert the working dir at index 0, before the other filenames.
std::iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str)),
buffer,
);
})
@ -182,7 +200,7 @@ impl GlobalFileTable {
///
/// Newly-encountered filenames will be added to the global file table.
fn encode_mappings_for_function(
global_file_table: &mut GlobalFileTable,
global_file_table: &GlobalFileTable,
function_coverage: &FunctionCoverage<'_>,
) -> Vec<u8> {
let mut counter_regions = function_coverage.counter_regions().collect::<Vec<_>>();
@ -203,7 +221,7 @@ fn encode_mappings_for_function(
for counter_regions_for_file in
counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
{
// Look up (or allocate) the global file ID for this filename.
// Look up the global file ID for this filename.
let file_name = counter_regions_for_file[0].1.file_name;
let global_file_id = global_file_table.global_file_id_for_file_name(file_name);