diff --git a/src/librustc_codegen_llvm/base.rs b/src/librustc_codegen_llvm/base.rs index cb44a56d075..d3b524c1a1e 100644 --- a/src/librustc_codegen_llvm/base.rs +++ b/src/librustc_codegen_llvm/base.rs @@ -13,7 +13,7 @@ //! but one `llvm::Type` corresponds to many `Ty`s; for instance, `tup(int, int, //! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`. -use super::{LlvmCodegenBackend, ModuleLlvm}; +use super::ModuleLlvm; use crate::builder::Builder; use crate::common; @@ -29,7 +29,6 @@ use rustc::middle::exported_symbols; use rustc::mir::mono::{Linkage, Visibility}; use rustc::session::config::DebugInfo; use rustc::ty::TyCtxt; -use rustc_codegen_ssa::back::write::submit_codegened_module_to_llvm; use rustc_codegen_ssa::base::maybe_create_entry_wrapper; use rustc_codegen_ssa::mono_item::MonoItemExt; use rustc_codegen_ssa::traits::*; @@ -100,8 +99,7 @@ pub fn iter_globals(llmod: &'ll llvm::Module) -> ValueIter<'ll> { pub fn compile_codegen_unit( tcx: TyCtxt<'tcx>, cgu_name: Symbol, - tx_to_llvm_workers: &std::sync::mpsc::Sender>, -) { +) -> (ModuleCodegen, u64) { let prof_timer = tcx.prof.generic_activity("codegen_module"); let start_time = Instant::now(); @@ -115,8 +113,6 @@ pub fn compile_codegen_unit( // the time we needed for codegenning it. let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64; - submit_codegened_module_to_llvm(&LlvmCodegenBackend(()), tx_to_llvm_workers, module, cost); - fn module_codegen(tcx: TyCtxt<'_>, cgu_name: Symbol) -> ModuleCodegen { let cgu = tcx.codegen_unit(cgu_name); // Instantiate monomorphizations without filling out definitions yet... @@ -164,6 +160,8 @@ pub fn compile_codegen_unit( kind: ModuleKind::Regular, } } + + (module, cost) } pub fn set_link_section(llval: &Value, attrs: &CodegenFnAttrs) { diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs index 35c71a66756..a6168128c4d 100644 --- a/src/librustc_codegen_llvm/lib.rs +++ b/src/librustc_codegen_llvm/lib.rs @@ -19,6 +19,7 @@ #![feature(link_args)] #![feature(static_nobundle)] #![feature(trusted_len)] +#![recursion_limit = "256"] use back::write::{create_informational_target_machine, create_target_machine}; use rustc_span::symbol::Symbol; @@ -108,9 +109,8 @@ impl ExtraBackendMethods for LlvmCodegenBackend { &self, tcx: TyCtxt<'_>, cgu_name: Symbol, - tx: &std::sync::mpsc::Sender>, - ) { - base::compile_codegen_unit(tcx, cgu_name, tx); + ) -> (ModuleCodegen, u64) { + base::compile_codegen_unit(tcx, cgu_name) } fn target_machine_factory( &self, diff --git a/src/librustc_codegen_ssa/base.rs b/src/librustc_codegen_ssa/base.rs index ab5d67e5783..efd56007120 100644 --- a/src/librustc_codegen_ssa/base.rs +++ b/src/librustc_codegen_ssa/base.rs @@ -14,8 +14,8 @@ //! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`. use crate::back::write::{ - start_async_codegen, submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm, - OngoingCodegen, + start_async_codegen, submit_codegened_module_to_llvm, submit_post_lto_module_to_llvm, + submit_pre_lto_module_to_llvm, OngoingCodegen, }; use crate::common::{IntPredicate, RealPredicate, TypeKind}; use crate::meth; @@ -40,6 +40,7 @@ use rustc::ty::{self, Instance, Ty, TyCtxt}; use rustc_codegen_utils::{check_for_rustc_errors_attr, symbol_names_test}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::profiling::print_time_passes_entry; +use rustc_data_structures::sync::{par_iter, Lock, ParallelIterator}; use rustc_hir as hir; use rustc_hir::def_id::{DefId, LOCAL_CRATE}; use rustc_index::vec::Idx; @@ -606,20 +607,83 @@ pub fn codegen_crate( codegen_units }; - let mut total_codegen_time = Duration::new(0, 0); + let total_codegen_time = Lock::new(Duration::new(0, 0)); - for cgu in codegen_units.into_iter() { + // The non-parallel compiler can only translate codegen units to LLVM IR + // on a single thread, leading to a staircase effect where the N LLVM + // threads have to wait on the single codegen threads to generate work + // for them. The parallel compiler does not have this restriction, so + // we can pre-load the LLVM queue in parallel before handing off + // coordination to the OnGoingCodegen scheduler. + // + // This likely is a temporary measure. Once we don't have to support the + // non-parallel compiler anymore, we can compile CGUs end-to-end in + // parallel and get rid of the complicated scheduling logic. + let pre_compile_cgus = |cgu_reuse: &[CguReuse]| { + if cfg!(parallel_compiler) { + tcx.sess.time("compile_first_CGU_batch", || { + // Try to find one CGU to compile per thread. + let cgus: Vec<_> = cgu_reuse + .iter() + .enumerate() + .filter(|&(_, reuse)| reuse == &CguReuse::No) + .take(tcx.sess.threads()) + .collect(); + + // Compile the found CGUs in parallel. + par_iter(cgus) + .map(|(i, _)| { + let start_time = Instant::now(); + let module = backend.compile_codegen_unit(tcx, codegen_units[i].name()); + let mut time = total_codegen_time.lock(); + *time += start_time.elapsed(); + (i, module) + }) + .collect() + }) + } else { + FxHashMap::default() + } + }; + + let mut cgu_reuse = Vec::new(); + let mut pre_compiled_cgus: Option> = None; + + for (i, cgu) in codegen_units.iter().enumerate() { ongoing_codegen.wait_for_signal_to_codegen_item(); ongoing_codegen.check_for_errors(tcx.sess); - let cgu_reuse = determine_cgu_reuse(tcx, &cgu); + // Do some setup work in the first iteration + if pre_compiled_cgus.is_none() { + // Calculate the CGU reuse + cgu_reuse = tcx.sess.time("find_cgu_reuse", || { + codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect() + }); + // Pre compile some CGUs + pre_compiled_cgus = Some(pre_compile_cgus(&cgu_reuse)); + } + + let cgu_reuse = cgu_reuse[i]; tcx.sess.cgu_reuse_tracker.set_actual_reuse(&cgu.name().as_str(), cgu_reuse); match cgu_reuse { CguReuse::No => { - let start_time = Instant::now(); - backend.compile_codegen_unit(tcx, cgu.name(), &ongoing_codegen.coordinator_send); - total_codegen_time += start_time.elapsed(); + let (module, cost) = + if let Some(cgu) = pre_compiled_cgus.as_mut().unwrap().remove(&i) { + cgu + } else { + let start_time = Instant::now(); + let module = backend.compile_codegen_unit(tcx, cgu.name()); + let mut time = total_codegen_time.lock(); + *time += start_time.elapsed(); + module + }; + submit_codegened_module_to_llvm( + &backend, + &ongoing_codegen.coordinator_send, + module, + cost, + ); false } CguReuse::PreLto => { @@ -652,7 +716,11 @@ pub fn codegen_crate( // Since the main thread is sometimes blocked during codegen, we keep track // -Ztime-passes output manually. - print_time_passes_entry(tcx.sess.time_passes(), "codegen_to_LLVM_IR", total_codegen_time); + print_time_passes_entry( + tcx.sess.time_passes(), + "codegen_to_LLVM_IR", + total_codegen_time.into_inner(), + ); ::rustc_incremental::assert_module_sources::assert_module_sources(tcx); diff --git a/src/librustc_codegen_ssa/traits/backend.rs b/src/librustc_codegen_ssa/traits/backend.rs index e0d0a2f32f3..bc3a75250bf 100644 --- a/src/librustc_codegen_ssa/traits/backend.rs +++ b/src/librustc_codegen_ssa/traits/backend.rs @@ -1,5 +1,6 @@ use super::write::WriteBackendMethods; use super::CodegenObject; +use crate::ModuleCodegen; use rustc::middle::cstore::EncodedMetadata; use rustc::session::{config, Session}; @@ -10,7 +11,6 @@ use rustc_codegen_utils::codegen_backend::CodegenBackend; use rustc_span::symbol::Symbol; use syntax::expand::allocator::AllocatorKind; -use std::sync::mpsc; use std::sync::Arc; pub trait BackendTypes { @@ -34,7 +34,7 @@ impl<'tcx, T> Backend<'tcx> for T where { } -pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send { +pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send + Sync { fn new_metadata(&self, sess: TyCtxt<'_>, mod_name: &str) -> Self::Module; fn write_compressed_metadata<'tcx>( &self, @@ -48,12 +48,13 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se mods: &mut Self::Module, kind: AllocatorKind, ); + /// This generates the codegen unit and returns it along with + /// a `u64` giving an estimate of the unit's processing cost. fn compile_codegen_unit( &self, tcx: TyCtxt<'_>, cgu_name: Symbol, - tx_to_llvm_workers: &mpsc::Sender>, - ); + ) -> (ModuleCodegen, u64); // If find_features is true this won't access `sess.crate_types` by assuming // that `is_pie_binary` is false. When we discover LLVM target features // `sess.crate_types` is uninitialized so we cannot access it.