From 50b34279c3bb84ac5287626d8ee2bc4414e5d07a Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 11 May 2024 17:39:51 +0000 Subject: [PATCH 1/2] Split cgus into todo and done before the main module codegen loop --- src/concurrency_limiter.rs | 13 -------- src/driver/aot.rs | 67 +++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 46 deletions(-) diff --git a/src/concurrency_limiter.rs b/src/concurrency_limiter.rs index 9678969134a..c7f543cf7cd 100644 --- a/src/concurrency_limiter.rs +++ b/src/concurrency_limiter.rs @@ -78,11 +78,6 @@ pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLim } } - pub(super) fn job_already_done(&mut self) { - let mut state = self.state.lock().unwrap(); - state.job_already_done(); - } - pub(crate) fn finished(mut self) { self.helper_thread.take(); @@ -190,14 +185,6 @@ pub(super) fn job_finished(&mut self) { self.assert_invariants(); } - pub(super) fn job_already_done(&mut self) { - self.assert_invariants(); - self.pending_jobs -= 1; - self.assert_invariants(); - self.drop_excess_capacity(); - self.assert_invariants(); - } - pub(super) fn poison(&mut self, error: String) { self.poisoned = true; self.stored_error = Some(error); diff --git a/src/driver/aot.rs b/src/driver/aot.rs index e8c96486041..2466cbed299 100644 --- a/src/driver/aot.rs +++ b/src/driver/aot.rs @@ -604,40 +604,41 @@ pub(crate) fn run_aot( let global_asm_config = Arc::new(crate::global_asm::GlobalAsmConfig::new(tcx)); - let mut concurrency_limiter = ConcurrencyLimiter::new(tcx.sess, cgus.len()); + let (todo_cgus, done_cgus) = + cgus.into_iter().enumerate().partition::, _>(|&(i, _)| match cgu_reuse[i] { + _ if backend_config.disable_incr_cache => true, + CguReuse::No => true, + CguReuse::PreLto | CguReuse::PostLto => false, + }); - let modules = tcx.sess.time("codegen mono items", || { - cgus.iter() - .enumerate() - .map(|(i, cgu)| { - let cgu_reuse = - if backend_config.disable_incr_cache { CguReuse::No } else { cgu_reuse[i] }; - match cgu_reuse { - CguReuse::No => { - let dep_node = cgu.codegen_dep_node(tcx); - tcx.dep_graph - .with_task( - dep_node, - tcx, - ( - backend_config.clone(), - global_asm_config.clone(), - cgu.name(), - concurrency_limiter.acquire(tcx.dcx()), - ), - module_codegen, - Some(rustc_middle::dep_graph::hash_result), - ) - .0 - } - CguReuse::PreLto | CguReuse::PostLto => { - concurrency_limiter.job_already_done(); - OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu)) - } - } - }) - .collect::>() - }); + let mut concurrency_limiter = ConcurrencyLimiter::new(tcx.sess, todo_cgus.len()); + + let modules = + tcx.sess.time("codegen mono items", || { + todo_cgus + .into_iter() + .map(|(_, cgu)| { + let dep_node = cgu.codegen_dep_node(tcx); + tcx.dep_graph + .with_task( + dep_node, + tcx, + ( + backend_config.clone(), + global_asm_config.clone(), + cgu.name(), + concurrency_limiter.acquire(tcx.dcx()), + ), + module_codegen, + Some(rustc_middle::dep_graph::hash_result), + ) + .0 + }) + .chain(done_cgus.into_iter().map(|(_, cgu)| { + OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu)) + })) + .collect::>() + }); let mut allocator_module = make_module(tcx.sess, &backend_config, "allocator_shim".to_string()); let mut allocator_unwind_context = UnwindContext::new(allocator_module.isa(), true); From a167142946224a81ad4cdae45795a75474572fce Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sat, 11 May 2024 18:51:59 +0000 Subject: [PATCH 2/2] Translate MIR to clif ir in parallel with parallel rustc On dev-desktop the advantage of cg_clif over cg_llvm on simple-raytracer is 15% when parallel rustc is disabled. With -Zthreads=16 the advantage goes from 5% to 22% with this change. --- src/concurrency_limiter.rs | 8 +++--- src/driver/aot.rs | 54 +++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/concurrency_limiter.rs b/src/concurrency_limiter.rs index c7f543cf7cd..a73860cf18b 100644 --- a/src/concurrency_limiter.rs +++ b/src/concurrency_limiter.rs @@ -6,7 +6,7 @@ // FIXME don't panic when a worker thread panics pub(super) struct ConcurrencyLimiter { - helper_thread: Option, + helper_thread: Option>, state: Arc>, available_token_condvar: Arc, finished: bool, @@ -39,14 +39,14 @@ pub(super) fn new(sess: &Session, pending_jobs: usize) -> Self { }) .unwrap(); ConcurrencyLimiter { - helper_thread: Some(helper_thread), + helper_thread: Some(Mutex::new(helper_thread)), state, available_token_condvar, finished: false, } } - pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken { + pub(super) fn acquire(&self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken { let mut state = self.state.lock().unwrap(); loop { state.assert_invariants(); @@ -73,7 +73,7 @@ pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLim } } - self.helper_thread.as_mut().unwrap().request_token(); + self.helper_thread.as_ref().unwrap().lock().unwrap().request_token(); state = self.available_token_condvar.wait(state).unwrap(); } } diff --git a/src/driver/aot.rs b/src/driver/aot.rs index 2466cbed299..2651e56cac4 100644 --- a/src/driver/aot.rs +++ b/src/driver/aot.rs @@ -15,6 +15,7 @@ use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, ModuleKind}; use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::{par_map, IntoDynSyncSend}; use rustc_metadata::fs::copy_to_stdout; use rustc_metadata::EncodedMetadata; use rustc_middle::dep_graph::{WorkProduct, WorkProductId}; @@ -611,34 +612,33 @@ pub(crate) fn run_aot( CguReuse::PreLto | CguReuse::PostLto => false, }); - let mut concurrency_limiter = ConcurrencyLimiter::new(tcx.sess, todo_cgus.len()); + let concurrency_limiter = IntoDynSyncSend(ConcurrencyLimiter::new(tcx.sess, todo_cgus.len())); - let modules = - tcx.sess.time("codegen mono items", || { - todo_cgus - .into_iter() - .map(|(_, cgu)| { - let dep_node = cgu.codegen_dep_node(tcx); - tcx.dep_graph - .with_task( - dep_node, - tcx, - ( - backend_config.clone(), - global_asm_config.clone(), - cgu.name(), - concurrency_limiter.acquire(tcx.dcx()), - ), - module_codegen, - Some(rustc_middle::dep_graph::hash_result), - ) - .0 - }) - .chain(done_cgus.into_iter().map(|(_, cgu)| { - OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu)) - })) - .collect::>() + let modules = tcx.sess.time("codegen mono items", || { + let mut modules: Vec<_> = par_map(todo_cgus, |(_, cgu)| { + let dep_node = cgu.codegen_dep_node(tcx); + tcx.dep_graph + .with_task( + dep_node, + tcx, + ( + backend_config.clone(), + global_asm_config.clone(), + cgu.name(), + concurrency_limiter.acquire(tcx.dcx()), + ), + module_codegen, + Some(rustc_middle::dep_graph::hash_result), + ) + .0 }); + modules.extend( + done_cgus + .into_iter() + .map(|(_, cgu)| OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu))), + ); + modules + }); let mut allocator_module = make_module(tcx.sess, &backend_config, "allocator_shim".to_string()); let mut allocator_unwind_context = UnwindContext::new(allocator_module.isa(), true); @@ -706,6 +706,6 @@ pub(crate) fn run_aot( metadata_module, metadata, crate_info: CrateInfo::new(tcx, target_cpu), - concurrency_limiter, + concurrency_limiter: concurrency_limiter.0, }) }