Rollup merge of #67889 - Zoxc:parallel-cgus, r=michaelwoerister

Compile some CGUs in parallel at the start of codegen

This brings the compilation time for `syntex_syntax` from 11.542s to 10.453s with 6 threads in non-incremental debug mode. Just compiling `n` CGUs in parallel at the beginning of codegen seems sufficient to get rid of the staircase effect, at least for `syntex_syntax`.

Based on https://github.com/rust-lang/rust/pull/67777.

r? @michaelwoerister
cc @alexcrichton @Mark-Simulacrum
This commit is contained in:
Mazdak Farrokhzad 2020-01-11 12:36:06 +01:00 committed by GitHub
commit 7b741fb68e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 89 additions and 22 deletions

View File

@ -13,7 +13,7 @@
//! but one `llvm::Type` corresponds to many `Ty`s; for instance, `tup(int, int,
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
use super::{LlvmCodegenBackend, ModuleLlvm};
use super::ModuleLlvm;
use crate::builder::Builder;
use crate::common;
@ -29,7 +29,6 @@ use rustc::middle::exported_symbols;
use rustc::mir::mono::{Linkage, Visibility};
use rustc::session::config::DebugInfo;
use rustc::ty::TyCtxt;
use rustc_codegen_ssa::back::write::submit_codegened_module_to_llvm;
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::*;
@ -100,8 +99,7 @@ pub fn iter_globals(llmod: &'ll llvm::Module) -> ValueIter<'ll> {
pub fn compile_codegen_unit(
tcx: TyCtxt<'tcx>,
cgu_name: Symbol,
tx_to_llvm_workers: &std::sync::mpsc::Sender<Box<dyn std::any::Any + Send>>,
) {
) -> (ModuleCodegen<ModuleLlvm>, u64) {
let prof_timer = tcx.prof.generic_activity("codegen_module");
let start_time = Instant::now();
@ -115,8 +113,6 @@ pub fn compile_codegen_unit(
// the time we needed for codegenning it.
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
submit_codegened_module_to_llvm(&LlvmCodegenBackend(()), tx_to_llvm_workers, module, cost);
fn module_codegen(tcx: TyCtxt<'_>, cgu_name: Symbol) -> ModuleCodegen<ModuleLlvm> {
let cgu = tcx.codegen_unit(cgu_name);
// Instantiate monomorphizations without filling out definitions yet...
@ -164,6 +160,8 @@ pub fn compile_codegen_unit(
kind: ModuleKind::Regular,
}
}
(module, cost)
}
pub fn set_link_section(llval: &Value, attrs: &CodegenFnAttrs) {

View File

@ -19,6 +19,7 @@
#![feature(link_args)]
#![feature(static_nobundle)]
#![feature(trusted_len)]
#![recursion_limit = "256"]
use back::write::{create_informational_target_machine, create_target_machine};
use rustc_span::symbol::Symbol;
@ -108,9 +109,8 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
&self,
tcx: TyCtxt<'_>,
cgu_name: Symbol,
tx: &std::sync::mpsc::Sender<Box<dyn Any + Send>>,
) {
base::compile_codegen_unit(tcx, cgu_name, tx);
) -> (ModuleCodegen<ModuleLlvm>, u64) {
base::compile_codegen_unit(tcx, cgu_name)
}
fn target_machine_factory(
&self,

View File

@ -14,8 +14,8 @@
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
use crate::back::write::{
start_async_codegen, submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm,
OngoingCodegen,
start_async_codegen, submit_codegened_module_to_llvm, submit_post_lto_module_to_llvm,
submit_pre_lto_module_to_llvm, OngoingCodegen,
};
use crate::common::{IntPredicate, RealPredicate, TypeKind};
use crate::meth;
@ -40,6 +40,7 @@ use rustc::ty::{self, Instance, Ty, TyCtxt};
use rustc_codegen_utils::{check_for_rustc_errors_attr, symbol_names_test};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::profiling::print_time_passes_entry;
use rustc_data_structures::sync::{par_iter, Lock, ParallelIterator};
use rustc_hir as hir;
use rustc_hir::def_id::{DefId, LOCAL_CRATE};
use rustc_index::vec::Idx;
@ -606,20 +607,83 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
codegen_units
};
let mut total_codegen_time = Duration::new(0, 0);
let total_codegen_time = Lock::new(Duration::new(0, 0));
for cgu in codegen_units.into_iter() {
// The non-parallel compiler can only translate codegen units to LLVM IR
// on a single thread, leading to a staircase effect where the N LLVM
// threads have to wait on the single codegen threads to generate work
// for them. The parallel compiler does not have this restriction, so
// we can pre-load the LLVM queue in parallel before handing off
// coordination to the OnGoingCodegen scheduler.
//
// This likely is a temporary measure. Once we don't have to support the
// non-parallel compiler anymore, we can compile CGUs end-to-end in
// parallel and get rid of the complicated scheduling logic.
let pre_compile_cgus = |cgu_reuse: &[CguReuse]| {
if cfg!(parallel_compiler) {
tcx.sess.time("compile_first_CGU_batch", || {
// Try to find one CGU to compile per thread.
let cgus: Vec<_> = cgu_reuse
.iter()
.enumerate()
.filter(|&(_, reuse)| reuse == &CguReuse::No)
.take(tcx.sess.threads())
.collect();
// Compile the found CGUs in parallel.
par_iter(cgus)
.map(|(i, _)| {
let start_time = Instant::now();
let module = backend.compile_codegen_unit(tcx, codegen_units[i].name());
let mut time = total_codegen_time.lock();
*time += start_time.elapsed();
(i, module)
})
.collect()
})
} else {
FxHashMap::default()
}
};
let mut cgu_reuse = Vec::new();
let mut pre_compiled_cgus: Option<FxHashMap<usize, _>> = None;
for (i, cgu) in codegen_units.iter().enumerate() {
ongoing_codegen.wait_for_signal_to_codegen_item();
ongoing_codegen.check_for_errors(tcx.sess);
let cgu_reuse = determine_cgu_reuse(tcx, &cgu);
// Do some setup work in the first iteration
if pre_compiled_cgus.is_none() {
// Calculate the CGU reuse
cgu_reuse = tcx.sess.time("find_cgu_reuse", || {
codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect()
});
// Pre compile some CGUs
pre_compiled_cgus = Some(pre_compile_cgus(&cgu_reuse));
}
let cgu_reuse = cgu_reuse[i];
tcx.sess.cgu_reuse_tracker.set_actual_reuse(&cgu.name().as_str(), cgu_reuse);
match cgu_reuse {
CguReuse::No => {
let start_time = Instant::now();
backend.compile_codegen_unit(tcx, cgu.name(), &ongoing_codegen.coordinator_send);
total_codegen_time += start_time.elapsed();
let (module, cost) =
if let Some(cgu) = pre_compiled_cgus.as_mut().unwrap().remove(&i) {
cgu
} else {
let start_time = Instant::now();
let module = backend.compile_codegen_unit(tcx, cgu.name());
let mut time = total_codegen_time.lock();
*time += start_time.elapsed();
module
};
submit_codegened_module_to_llvm(
&backend,
&ongoing_codegen.coordinator_send,
module,
cost,
);
false
}
CguReuse::PreLto => {
@ -652,7 +716,11 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
// Since the main thread is sometimes blocked during codegen, we keep track
// -Ztime-passes output manually.
print_time_passes_entry(tcx.sess.time_passes(), "codegen_to_LLVM_IR", total_codegen_time);
print_time_passes_entry(
tcx.sess.time_passes(),
"codegen_to_LLVM_IR",
total_codegen_time.into_inner(),
);
::rustc_incremental::assert_module_sources::assert_module_sources(tcx);

View File

@ -1,5 +1,6 @@
use super::write::WriteBackendMethods;
use super::CodegenObject;
use crate::ModuleCodegen;
use rustc::middle::cstore::EncodedMetadata;
use rustc::session::{config, Session};
@ -10,7 +11,6 @@ use rustc_codegen_utils::codegen_backend::CodegenBackend;
use rustc_span::symbol::Symbol;
use syntax::expand::allocator::AllocatorKind;
use std::sync::mpsc;
use std::sync::Arc;
pub trait BackendTypes {
@ -34,7 +34,7 @@ impl<'tcx, T> Backend<'tcx> for T where
{
}
pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send {
pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send + Sync {
fn new_metadata(&self, sess: TyCtxt<'_>, mod_name: &str) -> Self::Module;
fn write_compressed_metadata<'tcx>(
&self,
@ -48,12 +48,13 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
mods: &mut Self::Module,
kind: AllocatorKind,
);
/// This generates the codegen unit and returns it along with
/// a `u64` giving an estimate of the unit's processing cost.
fn compile_codegen_unit(
&self,
tcx: TyCtxt<'_>,
cgu_name: Symbol,
tx_to_llvm_workers: &mpsc::Sender<Box<dyn std::any::Any + Send>>,
);
) -> (ModuleCodegen<Self::Module>, u64);
// If find_features is true this won't access `sess.crate_types` by assuming
// that `is_pie_binary` is false. When we discover LLVM target features
// `sess.crate_types` is uninitialized so we cannot access it.