From 1c1f16c3e3222d9e8d0b17285244e6155fc69db3 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 13 May 2022 13:04:48 +0000 Subject: [PATCH 1/2] Move cgu_reuse a bit earlier There is no reason it needs to be lazily computed at the first iteration of the cgu loop. --- compiler/rustc_codegen_ssa/src/base.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 7b7e09208a2..4bee2749016 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -607,6 +607,11 @@ pub fn codegen_crate( second_half.iter().rev().interleave(first_half).copied().collect() }; + // Calculate the CGU reuse + let cgu_reuse = tcx.sess.time("find_cgu_reuse", || { + codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect::>() + }); + // The non-parallel compiler can only translate codegen units to LLVM IR // on a single thread, leading to a staircase effect where the N LLVM // threads have to wait on the single codegen threads to generate work @@ -618,7 +623,7 @@ pub fn codegen_crate( // non-parallel compiler anymore, we can compile CGUs end-to-end in // parallel and get rid of the complicated scheduling logic. #[cfg(parallel_compiler)] - let pre_compile_cgus = |cgu_reuse: &[CguReuse]| { + let pre_compile_cgus = || { tcx.sess.time("compile_first_CGU_batch", || { // Try to find one CGU to compile per thread. let cgus: Vec<_> = cgu_reuse @@ -643,9 +648,8 @@ pub fn codegen_crate( }; #[cfg(not(parallel_compiler))] - let pre_compile_cgus = |_: &[CguReuse]| (FxHashMap::default(), Duration::new(0, 0)); + let pre_compile_cgus = || (FxHashMap::default(), Duration::new(0, 0)); - let mut cgu_reuse = Vec::new(); let mut pre_compiled_cgus: Option> = None; let mut total_codegen_time = Duration::new(0, 0); let start_rss = tcx.sess.time_passes().then(|| get_resident_set_size()); @@ -656,12 +660,8 @@ pub fn codegen_crate( // Do some setup work in the first iteration if pre_compiled_cgus.is_none() { - // Calculate the CGU reuse - cgu_reuse = tcx.sess.time("find_cgu_reuse", || { - codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect() - }); // Pre compile some CGUs - let (compiled_cgus, codegen_time) = pre_compile_cgus(&cgu_reuse); + let (compiled_cgus, codegen_time) = pre_compile_cgus(); pre_compiled_cgus = Some(compiled_cgus); total_codegen_time += codegen_time; } From a06deb519173b07932b4718cdbd90e47feca17cd Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 13 May 2022 13:17:07 +0000 Subject: [PATCH 2/2] Compute pre_compiled_cgus more eagerly This reduces the complexity of this code a lot --- compiler/rustc_codegen_ssa/src/base.rs | 47 +++++++++++--------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 4bee2749016..d11f1534153 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -15,8 +15,9 @@ use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::profiling::{get_resident_set_size, print_time_passes_entry}; +use rustc_data_structures::sync::par_iter; #[cfg(parallel_compiler)] -use rustc_data_structures::sync::{par_iter, ParallelIterator}; +use rustc_data_structures::sync::ParallelIterator; use rustc_hir as hir; use rustc_hir::def_id::{DefId, LOCAL_CRATE}; use rustc_hir::lang_items::LangItem; @@ -612,6 +613,9 @@ pub fn codegen_crate( codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect::>() }); + let mut total_codegen_time = Duration::new(0, 0); + let start_rss = tcx.sess.time_passes().then(|| get_resident_set_size()); + // The non-parallel compiler can only translate codegen units to LLVM IR // on a single thread, leading to a staircase effect where the N LLVM // threads have to wait on the single codegen threads to generate work @@ -622,8 +626,7 @@ pub fn codegen_crate( // This likely is a temporary measure. Once we don't have to support the // non-parallel compiler anymore, we can compile CGUs end-to-end in // parallel and get rid of the complicated scheduling logic. - #[cfg(parallel_compiler)] - let pre_compile_cgus = || { + let mut pre_compiled_cgus = if cfg!(parallel_compiler) { tcx.sess.time("compile_first_CGU_batch", || { // Try to find one CGU to compile per thread. let cgus: Vec<_> = cgu_reuse @@ -643,43 +646,31 @@ pub fn codegen_crate( }) .collect(); - (pre_compiled_cgus, start_time.elapsed()) + total_codegen_time += start_time.elapsed(); + + pre_compiled_cgus }) + } else { + FxHashMap::default() }; - #[cfg(not(parallel_compiler))] - let pre_compile_cgus = || (FxHashMap::default(), Duration::new(0, 0)); - - let mut pre_compiled_cgus: Option> = None; - let mut total_codegen_time = Duration::new(0, 0); - let start_rss = tcx.sess.time_passes().then(|| get_resident_set_size()); - for (i, cgu) in codegen_units.iter().enumerate() { ongoing_codegen.wait_for_signal_to_codegen_item(); ongoing_codegen.check_for_errors(tcx.sess); - // Do some setup work in the first iteration - if pre_compiled_cgus.is_none() { - // Pre compile some CGUs - let (compiled_cgus, codegen_time) = pre_compile_cgus(); - pre_compiled_cgus = Some(compiled_cgus); - total_codegen_time += codegen_time; - } - let cgu_reuse = cgu_reuse[i]; tcx.sess.cgu_reuse_tracker.set_actual_reuse(cgu.name().as_str(), cgu_reuse); match cgu_reuse { CguReuse::No => { - let (module, cost) = - if let Some(cgu) = pre_compiled_cgus.as_mut().unwrap().remove(&i) { - cgu - } else { - let start_time = Instant::now(); - let module = backend.compile_codegen_unit(tcx, cgu.name()); - total_codegen_time += start_time.elapsed(); - module - }; + let (module, cost) = if let Some(cgu) = pre_compiled_cgus.remove(&i) { + cgu + } else { + let start_time = Instant::now(); + let module = backend.compile_codegen_unit(tcx, cgu.name()); + total_codegen_time += start_time.elapsed(); + module + }; // This will unwind if there are errors, which triggers our `AbortCodegenOnDrop` // guard. Unfortunately, just skipping the `submit_codegened_module_to_llvm` makes // compilation hang on post-monomorphization errors.