Rollup merge of #67889 - Zoxc:parallel-cgus, r=michaelwoerister
Compile some CGUs in parallel at the start of codegen This brings the compilation time for `syntex_syntax` from 11.542s to 10.453s with 6 threads in non-incremental debug mode. Just compiling `n` CGUs in parallel at the beginning of codegen seems sufficient to get rid of the staircase effect, at least for `syntex_syntax`. Based on https://github.com/rust-lang/rust/pull/67777. r? @michaelwoerister cc @alexcrichton @Mark-Simulacrum
This commit is contained in:
commit
7b741fb68e
@ -13,7 +13,7 @@
|
||||
//! but one `llvm::Type` corresponds to many `Ty`s; for instance, `tup(int, int,
|
||||
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
|
||||
|
||||
use super::{LlvmCodegenBackend, ModuleLlvm};
|
||||
use super::ModuleLlvm;
|
||||
|
||||
use crate::builder::Builder;
|
||||
use crate::common;
|
||||
@ -29,7 +29,6 @@ use rustc::middle::exported_symbols;
|
||||
use rustc::mir::mono::{Linkage, Visibility};
|
||||
use rustc::session::config::DebugInfo;
|
||||
use rustc::ty::TyCtxt;
|
||||
use rustc_codegen_ssa::back::write::submit_codegened_module_to_llvm;
|
||||
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
|
||||
use rustc_codegen_ssa::mono_item::MonoItemExt;
|
||||
use rustc_codegen_ssa::traits::*;
|
||||
@ -100,8 +99,7 @@ pub fn iter_globals(llmod: &'ll llvm::Module) -> ValueIter<'ll> {
|
||||
pub fn compile_codegen_unit(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
cgu_name: Symbol,
|
||||
tx_to_llvm_workers: &std::sync::mpsc::Sender<Box<dyn std::any::Any + Send>>,
|
||||
) {
|
||||
) -> (ModuleCodegen<ModuleLlvm>, u64) {
|
||||
let prof_timer = tcx.prof.generic_activity("codegen_module");
|
||||
let start_time = Instant::now();
|
||||
|
||||
@ -115,8 +113,6 @@ pub fn compile_codegen_unit(
|
||||
// the time we needed for codegenning it.
|
||||
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
|
||||
|
||||
submit_codegened_module_to_llvm(&LlvmCodegenBackend(()), tx_to_llvm_workers, module, cost);
|
||||
|
||||
fn module_codegen(tcx: TyCtxt<'_>, cgu_name: Symbol) -> ModuleCodegen<ModuleLlvm> {
|
||||
let cgu = tcx.codegen_unit(cgu_name);
|
||||
// Instantiate monomorphizations without filling out definitions yet...
|
||||
@ -164,6 +160,8 @@ pub fn compile_codegen_unit(
|
||||
kind: ModuleKind::Regular,
|
||||
}
|
||||
}
|
||||
|
||||
(module, cost)
|
||||
}
|
||||
|
||||
pub fn set_link_section(llval: &Value, attrs: &CodegenFnAttrs) {
|
||||
|
@ -19,6 +19,7 @@
|
||||
#![feature(link_args)]
|
||||
#![feature(static_nobundle)]
|
||||
#![feature(trusted_len)]
|
||||
#![recursion_limit = "256"]
|
||||
|
||||
use back::write::{create_informational_target_machine, create_target_machine};
|
||||
use rustc_span::symbol::Symbol;
|
||||
@ -108,9 +109,8 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
|
||||
&self,
|
||||
tcx: TyCtxt<'_>,
|
||||
cgu_name: Symbol,
|
||||
tx: &std::sync::mpsc::Sender<Box<dyn Any + Send>>,
|
||||
) {
|
||||
base::compile_codegen_unit(tcx, cgu_name, tx);
|
||||
) -> (ModuleCodegen<ModuleLlvm>, u64) {
|
||||
base::compile_codegen_unit(tcx, cgu_name)
|
||||
}
|
||||
fn target_machine_factory(
|
||||
&self,
|
||||
|
@ -14,8 +14,8 @@
|
||||
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
|
||||
|
||||
use crate::back::write::{
|
||||
start_async_codegen, submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm,
|
||||
OngoingCodegen,
|
||||
start_async_codegen, submit_codegened_module_to_llvm, submit_post_lto_module_to_llvm,
|
||||
submit_pre_lto_module_to_llvm, OngoingCodegen,
|
||||
};
|
||||
use crate::common::{IntPredicate, RealPredicate, TypeKind};
|
||||
use crate::meth;
|
||||
@ -40,6 +40,7 @@ use rustc::ty::{self, Instance, Ty, TyCtxt};
|
||||
use rustc_codegen_utils::{check_for_rustc_errors_attr, symbol_names_test};
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_data_structures::profiling::print_time_passes_entry;
|
||||
use rustc_data_structures::sync::{par_iter, Lock, ParallelIterator};
|
||||
use rustc_hir as hir;
|
||||
use rustc_hir::def_id::{DefId, LOCAL_CRATE};
|
||||
use rustc_index::vec::Idx;
|
||||
@ -606,20 +607,83 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
|
||||
codegen_units
|
||||
};
|
||||
|
||||
let mut total_codegen_time = Duration::new(0, 0);
|
||||
let total_codegen_time = Lock::new(Duration::new(0, 0));
|
||||
|
||||
for cgu in codegen_units.into_iter() {
|
||||
// The non-parallel compiler can only translate codegen units to LLVM IR
|
||||
// on a single thread, leading to a staircase effect where the N LLVM
|
||||
// threads have to wait on the single codegen threads to generate work
|
||||
// for them. The parallel compiler does not have this restriction, so
|
||||
// we can pre-load the LLVM queue in parallel before handing off
|
||||
// coordination to the OnGoingCodegen scheduler.
|
||||
//
|
||||
// This likely is a temporary measure. Once we don't have to support the
|
||||
// non-parallel compiler anymore, we can compile CGUs end-to-end in
|
||||
// parallel and get rid of the complicated scheduling logic.
|
||||
let pre_compile_cgus = |cgu_reuse: &[CguReuse]| {
|
||||
if cfg!(parallel_compiler) {
|
||||
tcx.sess.time("compile_first_CGU_batch", || {
|
||||
// Try to find one CGU to compile per thread.
|
||||
let cgus: Vec<_> = cgu_reuse
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, reuse)| reuse == &CguReuse::No)
|
||||
.take(tcx.sess.threads())
|
||||
.collect();
|
||||
|
||||
// Compile the found CGUs in parallel.
|
||||
par_iter(cgus)
|
||||
.map(|(i, _)| {
|
||||
let start_time = Instant::now();
|
||||
let module = backend.compile_codegen_unit(tcx, codegen_units[i].name());
|
||||
let mut time = total_codegen_time.lock();
|
||||
*time += start_time.elapsed();
|
||||
(i, module)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
} else {
|
||||
FxHashMap::default()
|
||||
}
|
||||
};
|
||||
|
||||
let mut cgu_reuse = Vec::new();
|
||||
let mut pre_compiled_cgus: Option<FxHashMap<usize, _>> = None;
|
||||
|
||||
for (i, cgu) in codegen_units.iter().enumerate() {
|
||||
ongoing_codegen.wait_for_signal_to_codegen_item();
|
||||
ongoing_codegen.check_for_errors(tcx.sess);
|
||||
|
||||
let cgu_reuse = determine_cgu_reuse(tcx, &cgu);
|
||||
// Do some setup work in the first iteration
|
||||
if pre_compiled_cgus.is_none() {
|
||||
// Calculate the CGU reuse
|
||||
cgu_reuse = tcx.sess.time("find_cgu_reuse", || {
|
||||
codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect()
|
||||
});
|
||||
// Pre compile some CGUs
|
||||
pre_compiled_cgus = Some(pre_compile_cgus(&cgu_reuse));
|
||||
}
|
||||
|
||||
let cgu_reuse = cgu_reuse[i];
|
||||
tcx.sess.cgu_reuse_tracker.set_actual_reuse(&cgu.name().as_str(), cgu_reuse);
|
||||
|
||||
match cgu_reuse {
|
||||
CguReuse::No => {
|
||||
let start_time = Instant::now();
|
||||
backend.compile_codegen_unit(tcx, cgu.name(), &ongoing_codegen.coordinator_send);
|
||||
total_codegen_time += start_time.elapsed();
|
||||
let (module, cost) =
|
||||
if let Some(cgu) = pre_compiled_cgus.as_mut().unwrap().remove(&i) {
|
||||
cgu
|
||||
} else {
|
||||
let start_time = Instant::now();
|
||||
let module = backend.compile_codegen_unit(tcx, cgu.name());
|
||||
let mut time = total_codegen_time.lock();
|
||||
*time += start_time.elapsed();
|
||||
module
|
||||
};
|
||||
submit_codegened_module_to_llvm(
|
||||
&backend,
|
||||
&ongoing_codegen.coordinator_send,
|
||||
module,
|
||||
cost,
|
||||
);
|
||||
false
|
||||
}
|
||||
CguReuse::PreLto => {
|
||||
@ -652,7 +716,11 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
|
||||
|
||||
// Since the main thread is sometimes blocked during codegen, we keep track
|
||||
// -Ztime-passes output manually.
|
||||
print_time_passes_entry(tcx.sess.time_passes(), "codegen_to_LLVM_IR", total_codegen_time);
|
||||
print_time_passes_entry(
|
||||
tcx.sess.time_passes(),
|
||||
"codegen_to_LLVM_IR",
|
||||
total_codegen_time.into_inner(),
|
||||
);
|
||||
|
||||
::rustc_incremental::assert_module_sources::assert_module_sources(tcx);
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
use super::write::WriteBackendMethods;
|
||||
use super::CodegenObject;
|
||||
use crate::ModuleCodegen;
|
||||
|
||||
use rustc::middle::cstore::EncodedMetadata;
|
||||
use rustc::session::{config, Session};
|
||||
@ -10,7 +11,6 @@ use rustc_codegen_utils::codegen_backend::CodegenBackend;
|
||||
use rustc_span::symbol::Symbol;
|
||||
use syntax::expand::allocator::AllocatorKind;
|
||||
|
||||
use std::sync::mpsc;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub trait BackendTypes {
|
||||
@ -34,7 +34,7 @@ impl<'tcx, T> Backend<'tcx> for T where
|
||||
{
|
||||
}
|
||||
|
||||
pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send {
|
||||
pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send + Sync {
|
||||
fn new_metadata(&self, sess: TyCtxt<'_>, mod_name: &str) -> Self::Module;
|
||||
fn write_compressed_metadata<'tcx>(
|
||||
&self,
|
||||
@ -48,12 +48,13 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
|
||||
mods: &mut Self::Module,
|
||||
kind: AllocatorKind,
|
||||
);
|
||||
/// This generates the codegen unit and returns it along with
|
||||
/// a `u64` giving an estimate of the unit's processing cost.
|
||||
fn compile_codegen_unit(
|
||||
&self,
|
||||
tcx: TyCtxt<'_>,
|
||||
cgu_name: Symbol,
|
||||
tx_to_llvm_workers: &mpsc::Sender<Box<dyn std::any::Any + Send>>,
|
||||
);
|
||||
) -> (ModuleCodegen<Self::Module>, u64);
|
||||
// If find_features is true this won't access `sess.crate_types` by assuming
|
||||
// that `is_pie_binary` is false. When we discover LLVM target features
|
||||
// `sess.crate_types` is uninitialized so we cannot access it.
|
||||
|
Loading…
x
Reference in New Issue
Block a user