Auto merge of #90617 - tmiasko:time-trace-threads, r=wesleywiser

Initialize LLVM time trace profiler on each code generation thread In https://reviews.llvm.org/D71059 LLVM 11, the time trace profiler was extended to support multiple threads. `timeTraceProfilerInitialize` creates a thread local profiler instance. When a thread finishes `timeTraceProfilerFinishThread` moves a thread local instance into a global collection of instances. Finally when all codegen work is complete `timeTraceProfilerWrite` writes data from the current thread local instance and the instances in global collection of instances. Previously, the profiler was intialized on a single thread only. Since this thread performs no code generation on its own, the resulting profile was empty. Update LLVM codegen to initialize & finish time trace profiler on each code generation thread. cc `@tmandry` r? `@wesleywiser`
2021-11-06 09:55:50 +00:00 · 2021-11-06 09:55:50 +00:00 · 3cd3bbecc5
commit 3cd3bbecc5
parent 7276a6a117 5a09e12135
6 changed files with 128 additions and 56 deletions
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@ -76,6 +76,27 @@ mod value;
 #[derive(Clone)]
 pub struct LlvmCodegenBackend(());

+struct TimeTraceProfiler {
+    enabled: bool,
+}
+
+impl TimeTraceProfiler {
+    fn new(enabled: bool) -> Self {
+        if enabled {
+            unsafe { llvm::LLVMTimeTraceProfilerInitialize() }
+        }
+        TimeTraceProfiler { enabled }
+    }
+}
+
+impl Drop for TimeTraceProfiler {
+    fn drop(&mut self) {
+        if self.enabled {
+            unsafe { llvm::LLVMTimeTraceProfilerFinishThread() }
+        }
+    }
+}
+
 impl ExtraBackendMethods for LlvmCodegenBackend {
    fn new_metadata(&self, tcx: TyCtxt<'_>, mod_name: &str) -> ModuleLlvm {
        ModuleLlvm::new_metadata(tcx, mod_name)
@ -119,6 +140,34 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
    fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
        llvm_util::tune_cpu(sess)
    }
+
+    fn spawn_thread<F, T>(time_trace: bool, f: F) -> std::thread::JoinHandle<T>
+    where
+        F: FnOnce() -> T,
+        F: Send + 'static,
+        T: Send + 'static,
+    {
+        std::thread::spawn(move || {
+            let _profiler = TimeTraceProfiler::new(time_trace);
+            f()
+        })
+    }
+
+    fn spawn_named_thread<F, T>(
+        time_trace: bool,
+        name: String,
+        f: F,
+    ) -> std::io::Result<std::thread::JoinHandle<T>>
+    where
+        F: FnOnce() -> T,
+        F: Send + 'static,
+        T: Send + 'static,
+    {
+        std::thread::Builder::new().name(name).spawn(move || {
+            let _profiler = TimeTraceProfiler::new(time_trace);
+            f()
+        })
+    }
 }

 impl WriteBackendMethods for LlvmCodegenBackend {
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@ -1737,6 +1737,8 @@ extern "C" {

    pub fn LLVMTimeTraceProfilerInitialize();

+    pub fn LLVMTimeTraceProfilerFinishThread();
+
    pub fn LLVMTimeTraceProfilerFinish(FileName: *const c_char);

    pub fn LLVMAddAnalysisPasses(T: &'a TargetMachine, PM: &PassManager<'a>);
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@ -113,11 +113,6 @@ unsafe fn configure_llvm(sess: &Session) {
    }

    if sess.opts.debugging_opts.llvm_time_trace {
-        // time-trace is not thread safe and running it in parallel will cause seg faults.
-        if !sess.opts.debugging_opts.no_parallel_llvm {
-            bug!("`-Z llvm-time-trace` requires `-Z no-parallel-llvm")
-        }
-
        llvm::LLVMTimeTraceProfilerInitialize();
    }

--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@ -310,6 +310,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
    pub no_landing_pads: bool,
    pub save_temps: bool,
    pub fewer_names: bool,
+    pub time_trace: bool,
    pub exported_symbols: Option<Arc<ExportedSymbols>>,
    pub opts: Arc<config::Options>,
    pub crate_types: Vec<CrateType>,
@ -1039,6 +1040,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
        no_landing_pads: sess.panic_strategy() == PanicStrategy::Abort,
        fewer_names: sess.fewer_names(),
        save_temps: sess.opts.cg.save_temps,
+        time_trace: sess.opts.debugging_opts.llvm_time_trace,
        opts: Arc::new(sess.opts.clone()),
        prof: sess.prof.clone(),
        exported_symbols,
@ -1198,7 +1200,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
    // Each LLVM module is automatically sent back to the coordinator for LTO if
    // necessary. There's already optimizations in place to avoid sending work
    // back to the coordinator if LTO isn't requested.
-    return thread::spawn(move || {
+    return B::spawn_thread(cgcx.time_trace, move || {
        let mut worker_id_counter = 0;
        let mut free_worker_ids = Vec::new();
        let mut get_worker_id = |free_worker_ids: &mut Vec<usize>| {
@ -1615,59 +1617,57 @@ fn start_executing_work<B: ExtraBackendMethods>(
 pub struct WorkerFatalError;

 fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>) {
-    let builder = thread::Builder::new().name(work.short_description());
-    builder
-        .spawn(move || {
-            // Set up a destructor which will fire off a message that we're done as
-            // we exit.
-            struct Bomb<B: ExtraBackendMethods> {
-                coordinator_send: Sender<Box<dyn Any + Send>>,
-                result: Option<Result<WorkItemResult<B>, FatalError>>,
-                worker_id: usize,
-            }
-            impl<B: ExtraBackendMethods> Drop for Bomb<B> {
-                fn drop(&mut self) {
-                    let worker_id = self.worker_id;
-                    let msg = match self.result.take() {
-                        Some(Ok(WorkItemResult::Compiled(m))) => {
-                            Message::Done::<B> { result: Ok(m), worker_id }
-                        }
-                        Some(Ok(WorkItemResult::NeedsLink(m))) => {
-                            Message::NeedsLink::<B> { module: m, worker_id }
-                        }
-                        Some(Ok(WorkItemResult::NeedsFatLTO(m))) => {
-                            Message::NeedsFatLTO::<B> { result: m, worker_id }
-                        }
-                        Some(Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer))) => {
-                            Message::NeedsThinLTO::<B> { name, thin_buffer, worker_id }
-                        }
-                        Some(Err(FatalError)) => {
-                            Message::Done::<B> { result: Err(Some(WorkerFatalError)), worker_id }
-                        }
-                        None => Message::Done::<B> { result: Err(None), worker_id },
-                    };
-                    drop(self.coordinator_send.send(Box::new(msg)));
-                }
+    B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
+        // Set up a destructor which will fire off a message that we're done as
+        // we exit.
+        struct Bomb<B: ExtraBackendMethods> {
+            coordinator_send: Sender<Box<dyn Any + Send>>,
+            result: Option<Result<WorkItemResult<B>, FatalError>>,
+            worker_id: usize,
+        }
+        impl<B: ExtraBackendMethods> Drop for Bomb<B> {
+            fn drop(&mut self) {
+                let worker_id = self.worker_id;
+                let msg = match self.result.take() {
+                    Some(Ok(WorkItemResult::Compiled(m))) => {
+                        Message::Done::<B> { result: Ok(m), worker_id }
+                    }
+                    Some(Ok(WorkItemResult::NeedsLink(m))) => {
+                        Message::NeedsLink::<B> { module: m, worker_id }
+                    }
+                    Some(Ok(WorkItemResult::NeedsFatLTO(m))) => {
+                        Message::NeedsFatLTO::<B> { result: m, worker_id }
+                    }
+                    Some(Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer))) => {
+                        Message::NeedsThinLTO::<B> { name, thin_buffer, worker_id }
+                    }
+                    Some(Err(FatalError)) => {
+                        Message::Done::<B> { result: Err(Some(WorkerFatalError)), worker_id }
+                    }
+                    None => Message::Done::<B> { result: Err(None), worker_id },
+                };
+                drop(self.coordinator_send.send(Box::new(msg)));
            }
+        }

-            let mut bomb = Bomb::<B> {
-                coordinator_send: cgcx.coordinator_send.clone(),
-                result: None,
-                worker_id: cgcx.worker,
-            };
+        let mut bomb = Bomb::<B> {
+            coordinator_send: cgcx.coordinator_send.clone(),
+            result: None,
+            worker_id: cgcx.worker,
+        };

-            // Execute the work itself, and if it finishes successfully then flag
-            // ourselves as a success as well.
-            //
-            // Note that we ignore any `FatalError` coming out of `execute_work_item`,
-            // as a diagnostic was already sent off to the main thread - just
-            // surface that there was an error in this worker.
-            bomb.result = {
-                let _prof_timer = work.start_profiling(&cgcx);
-                Some(execute_work_item(&cgcx, work))
-            };
-        })
-        .expect("failed to spawn thread");
+        // Execute the work itself, and if it finishes successfully then flag
+        // ourselves as a success as well.
+        //
+        // Note that we ignore any `FatalError` coming out of `execute_work_item`,
+        // as a diagnostic was already sent off to the main thread - just
+        // surface that there was an error in this worker.
+        bomb.result = {
+            let _prof_timer = work.start_profiling(&cgcx);
+            Some(execute_work_item(&cgcx, work))
+        };
+    })
+    .expect("failed to spawn thread");
 }

 enum SharedEmitterMessage {
--- a/compiler/rustc_codegen_ssa/src/traits/backend.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs
@ -142,4 +142,26 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
    ) -> TargetMachineFactoryFn<Self>;
    fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
    fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
+
+    fn spawn_thread<F, T>(_time_trace: bool, f: F) -> std::thread::JoinHandle<T>
+    where
+        F: FnOnce() -> T,
+        F: Send + 'static,
+        T: Send + 'static,
+    {
+        std::thread::spawn(f)
+    }
+
+    fn spawn_named_thread<F, T>(
+        _time_trace: bool,
+        name: String,
+        f: F,
+    ) -> std::io::Result<std::thread::JoinHandle<T>>
+    where
+        F: FnOnce() -> T,
+        F: Send + 'static,
+        T: Send + 'static,
+    {
+        std::thread::Builder::new().name(name).spawn(f)
+    }
 }
--- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
@ -75,6 +75,10 @@ extern "C" void LLVMTimeTraceProfilerInitialize() {
      /* ProcName */ "rustc");
 }

+extern "C" void LLVMTimeTraceProfilerFinishThread() {
+  timeTraceProfilerFinishThread();
+}
+
 extern "C" void LLVMTimeTraceProfilerFinish(const char* FileName) {
  StringRef FN(FileName);
  std::error_code EC;