rustc: Wait for all codegen threads to exit
This commit updates rustc to wait for all codegen threads to exit before allowing the main thread to exit. This is a stab in the dark to fix the mysterious segfaults appearing on #55238, and hopefully we'll see whether this actually fixes things in practice...
This commit is contained in:
parent
016eaf88f5
commit
14c6835e03
@ -15,17 +15,6 @@ dependencies = [
|
||||
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloc_jemalloc"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"build_helper 0.1.0",
|
||||
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"compiler_builtins 0.0.0",
|
||||
"core 0.0.0",
|
||||
"libc 0.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloc_system"
|
||||
version = "0.0.0"
|
||||
@ -2696,7 +2685,6 @@ name = "std"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"alloc 0.0.0",
|
||||
"alloc_jemalloc 0.0.0",
|
||||
"alloc_system 0.0.0",
|
||||
"build_helper 0.1.0",
|
||||
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1508,6 +1508,7 @@ enum Message {
|
||||
},
|
||||
CodegenComplete,
|
||||
CodegenItem,
|
||||
CodegenAborted,
|
||||
}
|
||||
|
||||
struct Diagnostic {
|
||||
@ -1788,6 +1789,7 @@ fn start_executing_work(tcx: TyCtxt,
|
||||
let mut needs_lto = Vec::new();
|
||||
let mut lto_import_only_modules = Vec::new();
|
||||
let mut started_lto = false;
|
||||
let mut codegen_aborted = false;
|
||||
|
||||
// This flag tracks whether all items have gone through codegens
|
||||
let mut codegen_done = false;
|
||||
@ -1805,13 +1807,19 @@ fn start_executing_work(tcx: TyCtxt,
|
||||
let mut llvm_start_time = None;
|
||||
|
||||
// Run the message loop while there's still anything that needs message
|
||||
// processing:
|
||||
// processing. Note that as soon as codegen is aborted we simply want to
|
||||
// wait for all existing work to finish, so many of the conditions here
|
||||
// only apply if codegen hasn't been aborted as they represent pending
|
||||
// work to be done.
|
||||
while !codegen_done ||
|
||||
work_items.len() > 0 ||
|
||||
running > 0 ||
|
||||
needs_lto.len() > 0 ||
|
||||
lto_import_only_modules.len() > 0 ||
|
||||
main_thread_worker_state != MainThreadWorkerState::Idle {
|
||||
(!codegen_aborted && (
|
||||
work_items.len() > 0 ||
|
||||
needs_lto.len() > 0 ||
|
||||
lto_import_only_modules.len() > 0 ||
|
||||
main_thread_worker_state != MainThreadWorkerState::Idle
|
||||
))
|
||||
{
|
||||
|
||||
// While there are still CGUs to be codegened, the coordinator has
|
||||
// to decide how to utilize the compiler processes implicit Token:
|
||||
@ -1840,6 +1848,9 @@ fn start_executing_work(tcx: TyCtxt,
|
||||
spawn_work(cgcx, item);
|
||||
}
|
||||
}
|
||||
} else if codegen_aborted {
|
||||
// don't queue up any more work if codegen was aborted, we're
|
||||
// just waiting for our existing children to finish
|
||||
} else {
|
||||
// If we've finished everything related to normal codegen
|
||||
// then it must be the case that we've got some LTO work to do.
|
||||
@ -1904,7 +1915,7 @@ fn start_executing_work(tcx: TyCtxt,
|
||||
|
||||
// Spin up what work we can, only doing this while we've got available
|
||||
// parallelism slots and work left to spawn.
|
||||
while work_items.len() > 0 && running < tokens.len() {
|
||||
while !codegen_aborted && work_items.len() > 0 && running < tokens.len() {
|
||||
let (item, _) = work_items.pop().unwrap();
|
||||
|
||||
maybe_start_llvm_timer(cgcx.config(item.module_kind()),
|
||||
@ -1969,6 +1980,7 @@ fn start_executing_work(tcx: TyCtxt,
|
||||
if !cgcx.opts.debugging_opts.no_parallel_llvm {
|
||||
helper.request_token();
|
||||
}
|
||||
assert!(!codegen_aborted);
|
||||
assert_eq!(main_thread_worker_state,
|
||||
MainThreadWorkerState::Codegenning);
|
||||
main_thread_worker_state = MainThreadWorkerState::Idle;
|
||||
@ -1976,11 +1988,26 @@ fn start_executing_work(tcx: TyCtxt,
|
||||
|
||||
Message::CodegenComplete => {
|
||||
codegen_done = true;
|
||||
assert!(!codegen_aborted);
|
||||
assert_eq!(main_thread_worker_state,
|
||||
MainThreadWorkerState::Codegenning);
|
||||
main_thread_worker_state = MainThreadWorkerState::Idle;
|
||||
}
|
||||
|
||||
// If codegen is aborted that means translation was aborted due
|
||||
// to some normal-ish compiler error. In this situation we want
|
||||
// to exit as soon as possible, but we want to make sure all
|
||||
// existing work has finished. Flag codegen as being done, and
|
||||
// then conditions above will ensure no more work is spawned but
|
||||
// we'll keep executing this loop until `running` hits 0.
|
||||
Message::CodegenAborted => {
|
||||
assert!(!codegen_aborted);
|
||||
codegen_done = true;
|
||||
codegen_aborted = true;
|
||||
assert_eq!(main_thread_worker_state,
|
||||
MainThreadWorkerState::Codegenning);
|
||||
}
|
||||
|
||||
// If a thread exits successfully then we drop a token associated
|
||||
// with that worker and update our `running` count. We may later
|
||||
// re-acquire a token to continue running more work. We may also not
|
||||
@ -2446,6 +2473,19 @@ impl OngoingCodegen {
|
||||
drop(self.coordinator_send.send(Box::new(Message::CodegenComplete)));
|
||||
}
|
||||
|
||||
/// Consume this context indicating that codegen was entirely aborted, and
|
||||
/// we need to exit as quickly as possible.
|
||||
///
|
||||
/// This method blocks the current thread until all worker threads have
|
||||
/// finished, and all worker threads should have exited or be real close to
|
||||
/// exiting at this point.
|
||||
pub fn codegen_aborted(self) {
|
||||
// Signal to the coordinator it should spawn no more work and start
|
||||
// shutdown.
|
||||
drop(self.coordinator_send.send(Box::new(Message::CodegenAborted)));
|
||||
drop(self.future.join());
|
||||
}
|
||||
|
||||
pub fn check_for_errors(&self, sess: &Session) {
|
||||
self.shared_emitter_main.check(sess, false);
|
||||
}
|
||||
@ -2464,6 +2504,11 @@ impl OngoingCodegen {
|
||||
}
|
||||
}
|
||||
|
||||
// impl Drop for OngoingCodegen {
|
||||
// fn drop(&mut self) {
|
||||
// }
|
||||
// }
|
||||
|
||||
pub(crate) fn submit_codegened_module_to_llvm(tcx: TyCtxt,
|
||||
module: ModuleCodegen,
|
||||
cost: u64) {
|
||||
|
@ -76,12 +76,13 @@ use rustc_data_structures::small_c_str::SmallCStr;
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
|
||||
use std::any::Any;
|
||||
use std::ffi::CString;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Instant, Duration};
|
||||
use std::i32;
|
||||
use std::cmp;
|
||||
use std::ffi::CString;
|
||||
use std::i32;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
use std::sync::mpsc;
|
||||
use std::time::{Instant, Duration};
|
||||
use syntax_pos::Span;
|
||||
use syntax_pos::symbol::InternedString;
|
||||
use syntax::attr;
|
||||
@ -820,6 +821,7 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
||||
metadata,
|
||||
rx,
|
||||
codegen_units.len());
|
||||
let ongoing_codegen = AbortCodegenOnDrop(Some(ongoing_codegen));
|
||||
|
||||
// Codegen an allocator shim, if necessary.
|
||||
//
|
||||
@ -949,7 +951,54 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
||||
ongoing_codegen.check_for_errors(tcx.sess);
|
||||
|
||||
assert_and_save_dep_graph(tcx);
|
||||
ongoing_codegen
|
||||
ongoing_codegen.into_inner()
|
||||
}
|
||||
|
||||
/// A curious wrapper structure whose only purpose is to call `codegen_aborted`
|
||||
/// when it's dropped abnormally.
|
||||
///
|
||||
/// In the process of working on rust-lang/rust#55238 a mysterious segfault was
|
||||
/// stumbled upon. The segfault was never reproduced locally, but it was
|
||||
/// suspected to be releated to the fact that codegen worker threads were
|
||||
/// sticking around by the time the main thread was exiting, causing issues.
|
||||
///
|
||||
/// This structure is an attempt to fix that issue where the `codegen_aborted`
|
||||
/// message will block until all workers have finished. This should ensure that
|
||||
/// even if the main codegen thread panics we'll wait for pending work to
|
||||
/// complete before returning from the main thread, hopefully avoiding
|
||||
/// segfaults.
|
||||
///
|
||||
/// If you see this comment in the code, then it means that this workaround
|
||||
/// worked! We may yet one day track down the mysterious cause of that
|
||||
/// segfault...
|
||||
struct AbortCodegenOnDrop(Option<OngoingCodegen>);
|
||||
|
||||
impl AbortCodegenOnDrop {
|
||||
fn into_inner(mut self) -> OngoingCodegen {
|
||||
self.0.take().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for AbortCodegenOnDrop {
|
||||
type Target = OngoingCodegen;
|
||||
|
||||
fn deref(&self) -> &OngoingCodegen {
|
||||
self.0.as_ref().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for AbortCodegenOnDrop {
|
||||
fn deref_mut(&mut self) -> &mut OngoingCodegen {
|
||||
self.0.as_mut().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for AbortCodegenOnDrop {
|
||||
fn drop(&mut self) {
|
||||
if let Some(codegen) = self.0.take() {
|
||||
codegen.codegen_aborted();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_and_save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user