Rollup merge of #64840 - michaelwoerister:self-profiling-raii-refactor, r=wesleywiser

SelfProfiler API refactoring and part one of event review This PR refactors the `SelfProfiler` a little bit so that most profiling methods are RAII-based. The codegen backend code already had something similar, this refactoring pulls this functionality up into `SelfProfiler` itself, for general use. The second commit of this PR is a review and update of the existing events we are already recording. Names have been made more consistent. CGU names have been removed from event names. They will be added back in when function parameter recording is implemented. There is still some work to be done for adding new events, especially around trait resolution and the incremental system. r? @wesleywiser
2019-10-01 23:06:16 -07:00 · 2019-10-01 23:06:16 -07:00 · 8f5f92a07a
commit 8f5f92a07a
parent 0e88e56a9a d94262272b
14 changed files with 371 additions and 273 deletions
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@ -32,7 +32,7 @@ use syntax::source_map;
 use syntax::parse::{self, ParseSess};
 use syntax::symbol::Symbol;
 use syntax_pos::{MultiSpan, Span};
-use crate::util::profiling::SelfProfiler;
+use crate::util::profiling::{SelfProfiler, SelfProfilerRef};

 use rustc_target::spec::{PanicStrategy, RelroLevel, Target, TargetTriple};
 use rustc_data_structures::flock;
@ -129,7 +129,7 @@ pub struct Session {
    pub profile_channel: Lock<Option<mpsc::Sender<ProfileQueriesMsg>>>,

    /// Used by `-Z self-profile`.
-    pub self_profiling: Option<Arc<SelfProfiler>>,
+    pub prof: SelfProfilerRef,

    /// Some measurements that are being gathered during compilation.
    pub perf_stats: PerfStats,
@ -835,24 +835,6 @@ impl Session {
        }
    }

-    #[inline(never)]
-    #[cold]
-    fn profiler_active<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        match &self.self_profiling {
-            None => bug!("profiler_active() called but there was no profiler active"),
-            Some(profiler) => {
-                f(&profiler);
-            }
-        }
-    }
-
-    #[inline(always)]
-    pub fn profiler<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        if unlikely!(self.self_profiling.is_some()) {
-            self.profiler_active(f)
-        }
-    }
-
    pub fn print_perf_stats(&self) {
        println!(
            "Total time spent computing symbol hashes:      {}",
@ -1251,7 +1233,7 @@ fn build_session_(
        imported_macro_spans: OneThread::new(RefCell::new(FxHashMap::default())),
        incr_comp_session: OneThread::new(RefCell::new(IncrCompSession::NotInitialized)),
        cgu_reuse_tracker,
-        self_profiling: self_profiler,
+        prof: SelfProfilerRef::new(self_profiler),
        profile_channel: Lock::new(None),
        perf_stats: PerfStats {
            symbol_hash_time: Lock::new(Duration::from_secs(0)),
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@ -45,6 +45,7 @@ use crate::ty::CanonicalPolyFnSig;
 use crate::util::common::ErrorReported;
 use crate::util::nodemap::{DefIdMap, DefIdSet, ItemLocalMap, ItemLocalSet};
 use crate::util::nodemap::{FxHashMap, FxHashSet};
+use crate::util::profiling::SelfProfilerRef;

 use errors::DiagnosticBuilder;
 use arena::SyncDroplessArena;
@ -1030,6 +1031,8 @@ pub struct GlobalCtxt<'tcx> {

    pub dep_graph: DepGraph,

+    pub prof: SelfProfilerRef,
+
    /// Common objects.
    pub common: Common<'tcx>,

@ -1260,6 +1263,7 @@ impl<'tcx> TyCtxt<'tcx> {
            arena: WorkerLocal::new(|_| Arena::default()),
            interners,
            dep_graph,
+            prof: s.prof.clone(),
            common,
            types: common_types,
            lifetimes: common_lifetimes,
--- a/src/librustc/ty/query/plumbing.rs
+++ b/src/librustc/ty/query/plumbing.rs
@ -112,7 +112,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
            let mut lock = cache.get_shard_by_value(key).lock();
            if let Some(value) = lock.results.get(key) {
                profq_msg!(tcx, ProfileQueriesMsg::CacheHit);
-                tcx.sess.profiler(|p| p.record_query_hit(Q::NAME));
+                tcx.prof.query_cache_hit(Q::NAME);
                let result = (value.value.clone(), value.index);
                #[cfg(debug_assertions)]
                {
@ -128,7 +128,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
                            // in another thread has completed. Record how long we wait in the
                            // self-profiler.
                            #[cfg(parallel_compiler)]
-                            tcx.sess.profiler(|p| p.query_blocked_start(Q::NAME));
+                            tcx.prof.query_blocked_start(Q::NAME);

                            job.clone()
                        },
@ -170,7 +170,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
            #[cfg(parallel_compiler)]
            {
                let result = job.r#await(tcx, span);
-                tcx.sess.profiler(|p| p.query_blocked_end(Q::NAME));
+                tcx.prof.query_blocked_end(Q::NAME);

                if let Err(cycle) = result {
                    return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
@ -382,8 +382,9 @@ impl<'tcx> TyCtxt<'tcx> {
        }

        if Q::ANON {
+
            profq_msg!(self, ProfileQueriesMsg::ProviderBegin);
-            self.sess.profiler(|p| p.start_query(Q::NAME));
+            let prof_timer = self.prof.query_provider(Q::NAME);

            let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
                self.start_query(job.job.clone(), diagnostics, |tcx| {
@ -393,7 +394,7 @@ impl<'tcx> TyCtxt<'tcx> {
                })
            });

-            self.sess.profiler(|p| p.end_query(Q::NAME));
+            drop(prof_timer);
            profq_msg!(self, ProfileQueriesMsg::ProviderEnd);

            self.dep_graph.read_index(dep_node_index);
@ -451,9 +452,8 @@ impl<'tcx> TyCtxt<'tcx> {
        // First we try to load the result from the on-disk cache.
        let result = if Q::cache_on_disk(self, key.clone(), None) &&
                        self.sess.opts.debugging_opts.incremental_queries {
-            self.sess.profiler(|p| p.incremental_load_result_start(Q::NAME));
+            let _prof_timer = self.prof.incr_cache_loading(Q::NAME);
            let result = Q::try_load_from_disk(self, prev_dep_node_index);
-            self.sess.profiler(|p| p.incremental_load_result_end(Q::NAME));

            // We always expect to find a cached result for things that
            // can be forced from `DepNode`.
@ -469,21 +469,17 @@ impl<'tcx> TyCtxt<'tcx> {

        let result = if let Some(result) = result {
            profq_msg!(self, ProfileQueriesMsg::CacheHit);
-            self.sess.profiler(|p| p.record_query_hit(Q::NAME));
-
            result
        } else {
            // We could not load a result from the on-disk cache, so
            // recompute.
-
-            self.sess.profiler(|p| p.start_query(Q::NAME));
+            let _prof_timer = self.prof.query_provider(Q::NAME);

            // The dep-graph for this computation is already in-place.
            let result = self.dep_graph.with_ignore(|| {
                Q::compute(self, key)
            });

-            self.sess.profiler(|p| p.end_query(Q::NAME));
            result
        };

@ -551,7 +547,7 @@ impl<'tcx> TyCtxt<'tcx> {
                key, dep_node);

        profq_msg!(self, ProfileQueriesMsg::ProviderBegin);
-        self.sess.profiler(|p| p.start_query(Q::NAME));
+        let prof_timer = self.prof.query_provider(Q::NAME);

        let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
            self.start_query(job.job.clone(), diagnostics, |tcx| {
@ -571,7 +567,7 @@ impl<'tcx> TyCtxt<'tcx> {
            })
        });

-        self.sess.profiler(|p| p.end_query(Q::NAME));
+        drop(prof_timer);
        profq_msg!(self, ProfileQueriesMsg::ProviderEnd);

        if unlikely!(self.sess.opts.debugging_opts.query_dep_graph) {
@ -619,7 +615,7 @@ impl<'tcx> TyCtxt<'tcx> {
            let _ = self.get_query::<Q>(DUMMY_SP, key);
        } else {
            profq_msg!(self, ProfileQueriesMsg::CacheHit);
-            self.sess.profiler(|p| p.record_query_hit(Q::NAME));
+            self.prof.query_cache_hit(Q::NAME);
        }
    }

--- a/src/librustc/util/profiling.rs
+++ b/src/librustc/util/profiling.rs
@ -1,9 +1,9 @@
-use std::borrow::Cow;
 use std::error::Error;
 use std::fs;
 use std::mem::{self, Discriminant};
 use std::path::Path;
 use std::process;
+use std::sync::Arc;
 use std::thread::ThreadId;
 use std::u32;

@ -62,6 +62,206 @@ fn thread_id_to_u64(tid: ThreadId) -> u64 {
    unsafe { mem::transmute::<ThreadId, u64>(tid) }
 }

+
+/// A reference to the SelfProfiler. It can be cloned and sent across thread
+/// boundaries at will.
+#[derive(Clone)]
+pub struct SelfProfilerRef {
+    // This field is `None` if self-profiling is disabled for the current
+    // compilation session.
+    profiler: Option<Arc<SelfProfiler>>,
+
+    // We store the filter mask directly in the reference because that doesn't
+    // cost anything and allows for filtering with checking if the profiler is
+    // actually enabled.
+    event_filter_mask: EventFilter,
+}
+
+impl SelfProfilerRef {
+
+    pub fn new(profiler: Option<Arc<SelfProfiler>>) -> SelfProfilerRef {
+        // If there is no SelfProfiler then the filter mask is set to NONE,
+        // ensuring that nothing ever tries to actually access it.
+        let event_filter_mask = profiler
+            .as_ref()
+            .map(|p| p.event_filter_mask)
+            .unwrap_or(EventFilter::NONE);
+
+        SelfProfilerRef {
+            profiler,
+            event_filter_mask,
+        }
+    }
+
+    // This shim makes sure that calls only get executed if the filter mask
+    // lets them pass. It also contains some trickery to make sure that
+    // code is optimized for non-profiling compilation sessions, i.e. anything
+    // past the filter check is never inlined so it doesn't clutter the fast
+    // path.
+    #[inline(always)]
+    fn exec<F>(&self, event_filter: EventFilter, f: F) -> TimingGuard<'_>
+        where F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>
+    {
+        #[inline(never)]
+        fn cold_call<F>(profiler_ref: &SelfProfilerRef, f: F) -> TimingGuard<'_>
+            where F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>
+        {
+            let profiler = profiler_ref.profiler.as_ref().unwrap();
+            f(&**profiler)
+        }
+
+        if unlikely!(self.event_filter_mask.contains(event_filter)) {
+            cold_call(self, f)
+        } else {
+            TimingGuard::none()
+        }
+    }
+
+    /// Start profiling a generic activity. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn generic_activity(&self, event_id: &str) -> TimingGuard<'_> {
+        self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| {
+            let event_id = profiler.profiler.alloc_string(event_id);
+            TimingGuard::start(
+                profiler,
+                profiler.generic_activity_event_kind,
+                event_id
+            )
+        })
+    }
+
+    /// Start profiling a generic activity. Profiling continues until
+    /// `generic_activity_end` is called. The RAII-based `generic_activity`
+    /// usually is the better alternative.
+    #[inline(always)]
+    pub fn generic_activity_start(&self, event_id: &str) {
+        self.non_guard_generic_event(
+            |profiler| profiler.generic_activity_event_kind,
+            |profiler| profiler.profiler.alloc_string(event_id),
+            EventFilter::GENERIC_ACTIVITIES,
+            TimestampKind::Start,
+        );
+    }
+
+    /// End profiling a generic activity that was started with
+    /// `generic_activity_start`. The RAII-based `generic_activity` usually is
+    /// the better alternative.
+    #[inline(always)]
+    pub fn generic_activity_end(&self, event_id: &str) {
+        self.non_guard_generic_event(
+            |profiler| profiler.generic_activity_event_kind,
+            |profiler| profiler.profiler.alloc_string(event_id),
+            EventFilter::GENERIC_ACTIVITIES,
+            TimestampKind::End,
+        );
+    }
+
+    /// Start profiling a query provider. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn query_provider(&self, query_name: QueryName) -> TimingGuard<'_> {
+        self.exec(EventFilter::QUERY_PROVIDERS, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            TimingGuard::start(profiler, profiler.query_event_kind, event_id)
+        })
+    }
+
+    /// Record a query in-memory cache hit.
+    #[inline(always)]
+    pub fn query_cache_hit(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_cache_hit_event_kind,
+            query_name,
+            EventFilter::QUERY_CACHE_HITS,
+            TimestampKind::Instant,
+        );
+    }
+
+    /// Start profiling a query being blocked on a concurrent execution.
+    /// Profiling continues until `query_blocked_end` is called.
+    #[inline(always)]
+    pub fn query_blocked_start(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_blocked_event_kind,
+            query_name,
+            EventFilter::QUERY_BLOCKED,
+            TimestampKind::Start,
+        );
+    }
+
+    /// End profiling a query being blocked on a concurrent execution.
+    #[inline(always)]
+    pub fn query_blocked_end(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_blocked_event_kind,
+            query_name,
+            EventFilter::QUERY_BLOCKED,
+            TimestampKind::End,
+        );
+    }
+
+    /// Start profiling how long it takes to load a query result from the
+    /// incremental compilation on-disk cache. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn incr_cache_loading(&self, query_name: QueryName) -> TimingGuard<'_> {
+        self.exec(EventFilter::INCR_CACHE_LOADS, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            TimingGuard::start(
+                profiler,
+                profiler.incremental_load_result_event_kind,
+                event_id
+            )
+        })
+    }
+
+    #[inline(always)]
+    fn non_guard_query_event(
+        &self,
+        event_kind: fn(&SelfProfiler) -> StringId,
+        query_name: QueryName,
+        event_filter: EventFilter,
+        timestamp_kind: TimestampKind
+    ) {
+        drop(self.exec(event_filter, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            let thread_id = thread_id_to_u64(std::thread::current().id());
+
+            profiler.profiler.record_event(
+                event_kind(profiler),
+                event_id,
+                thread_id,
+                timestamp_kind,
+            );
+
+            TimingGuard::none()
+        }));
+    }
+
+    #[inline(always)]
+    fn non_guard_generic_event<F: FnOnce(&SelfProfiler) -> StringId>(
+        &self,
+        event_kind: fn(&SelfProfiler) -> StringId,
+        event_id: F,
+        event_filter: EventFilter,
+        timestamp_kind: TimestampKind
+    ) {
+        drop(self.exec(event_filter, |profiler| {
+            let thread_id = thread_id_to_u64(std::thread::current().id());
+
+            profiler.profiler.record_event(
+                event_kind(profiler),
+                event_id(profiler),
+                thread_id,
+                timestamp_kind,
+            );
+
+            TimingGuard::none()
+        }));
+    }
+}
+
 pub struct SelfProfiler {
    profiler: Profiler,
    event_filter_mask: EventFilter,
@ -143,103 +343,51 @@ impl SelfProfiler {
        let id = SelfProfiler::get_query_name_string_id(query_name);
        self.profiler.alloc_string_with_reserved_id(id, query_name.as_str());
    }
+}

+#[must_use]
+pub struct TimingGuard<'a>(Option<TimingGuardInternal<'a>>);
+
+struct TimingGuardInternal<'a> {
+    raw_profiler: &'a Profiler,
+    event_id: StringId,
+    event_kind: StringId,
+    thread_id: u64,
+}
+
+impl<'a> TimingGuard<'a> {
    #[inline]
-    pub fn start_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) {
-        if self.event_filter_mask.contains(EventFilter::GENERIC_ACTIVITIES) {
-            self.record(&label.into(), self.generic_activity_event_kind, TimestampKind::Start);
-        }
-    }
-
-    #[inline]
-    pub fn end_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) {
-        if self.event_filter_mask.contains(EventFilter::GENERIC_ACTIVITIES) {
-            self.record(&label.into(), self.generic_activity_event_kind, TimestampKind::End);
-        }
-    }
-
-    #[inline]
-    pub fn record_query_hit(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS) {
-            self.record_query(query_name, self.query_cache_hit_event_kind, TimestampKind::Instant);
-        }
-    }
-
-    #[inline]
-    pub fn start_query(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_PROVIDERS) {
-            self.record_query(query_name, self.query_event_kind, TimestampKind::Start);
-        }
-    }
-
-    #[inline]
-    pub fn end_query(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_PROVIDERS) {
-            self.record_query(query_name, self.query_event_kind, TimestampKind::End);
-        }
-    }
-
-    #[inline]
-    pub fn incremental_load_result_start(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::INCR_CACHE_LOADS) {
-            self.record_query(
-                query_name,
-                self.incremental_load_result_event_kind,
-                TimestampKind::Start
-            );
-        }
-    }
-
-    #[inline]
-    pub fn incremental_load_result_end(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::INCR_CACHE_LOADS) {
-            self.record_query(
-                query_name,
-                self.incremental_load_result_event_kind,
-                TimestampKind::End
-            );
-        }
-    }
-
-    #[inline]
-    pub fn query_blocked_start(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_BLOCKED) {
-            self.record_query(query_name, self.query_blocked_event_kind, TimestampKind::Start);
-        }
-    }
-
-    #[inline]
-    pub fn query_blocked_end(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_BLOCKED) {
-            self.record_query(query_name, self.query_blocked_event_kind, TimestampKind::End);
-        }
-    }
-
-    #[inline]
-    fn record(&self, event_id: &str, event_kind: StringId, timestamp_kind: TimestampKind) {
-        let thread_id = thread_id_to_u64(std::thread::current().id());
-
-        let event_id = self.profiler.alloc_string(event_id);
-        self.profiler.record_event(event_kind, event_id, thread_id, timestamp_kind);
-    }
-
-    #[inline]
-    fn record_query(
-        &self,
-        query_name: QueryName,
+    pub fn start(
+        profiler: &'a SelfProfiler,
        event_kind: StringId,
-        timestamp_kind: TimestampKind,
-    ) {
-        let dep_node_name = SelfProfiler::get_query_name_string_id(query_name);
-
+        event_id: StringId,
+    ) -> TimingGuard<'a> {
        let thread_id = thread_id_to_u64(std::thread::current().id());
+        let raw_profiler = &profiler.profiler;
+        raw_profiler.record_event(event_kind, event_id, thread_id, TimestampKind::Start);

-        self.profiler.record_event(event_kind, dep_node_name, thread_id, timestamp_kind);
+        TimingGuard(Some(TimingGuardInternal {
+            raw_profiler,
+            event_kind,
+            event_id,
+            thread_id,
+        }))
+    }
+
+    #[inline]
+    pub fn none() -> TimingGuard<'a> {
+        TimingGuard(None)
+    }
+}
+
+impl<'a> Drop for TimingGuardInternal<'a> {
+    #[inline]
+    fn drop(&mut self) {
+        self.raw_profiler.record_event(
+            self.event_kind,
+            self.event_id,
+            self.thread_id,
+            TimestampKind::End
+        );
    }
 }
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@ -62,11 +62,13 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
    };
    let exported_symbols = cgcx.exported_symbols
        .as_ref().expect("needs exported symbols for LTO");
-    let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
-        .iter()
-        .filter_map(symbol_filter)
-        .collect::<Vec<CString>>();
-    let _timer = cgcx.profile_activity("generate_symbol_white_list_for_thinlto");
+    let mut symbol_white_list = {
+        let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbol_white_list");
+        exported_symbols[&LOCAL_CRATE]
+            .iter()
+            .filter_map(symbol_filter)
+            .collect::<Vec<CString>>()
+    };
    info!("{} symbols to preserve in this crate", symbol_white_list.len());

    // If we're performing LTO for the entire crate graph, then for each of our
@ -95,14 +97,17 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
        }

        for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
-            let _timer = cgcx.profile_activity(format!("load: {}", path.display()));
            let exported_symbols = cgcx.exported_symbols
                .as_ref().expect("needs exported symbols for LTO");
-            symbol_white_list.extend(
-                exported_symbols[&cnum]
-                    .iter()
-                    .filter_map(symbol_filter));
+            {
+                let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbol_white_list");
+                symbol_white_list.extend(
+                    exported_symbols[&cnum]
+                        .iter()
+                        .filter_map(symbol_filter));
+            }

+            let _timer = cgcx.prof.generic_activity("LLVM_lto_load_upstream_bitcode");
            let archive = ArchiveRO::open(&path).expect("wanted an rlib");
            let bytecodes = archive.iter().filter_map(|child| {
                child.ok().and_then(|c| c.name().map(|name| (name, c)))
@ -189,6 +194,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
           symbol_white_list: &[*const libc::c_char])
    -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
    info!("going for a fat lto");

    // Sort out all our lists of incoming modules into two lists.
@ -287,6 +293,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
        // save and persist everything with the original module.
        let mut linker = Linker::new(llmod);
        for (bc_decoded, name) in serialized_modules {
+            let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_link_module");
            info!("linking {:?}", name);
            time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
                let data = bc_decoded.data();
@ -388,6 +395,7 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
            symbol_white_list: &[*const libc::c_char])
    -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
    unsafe {
        info!("going for that thin, thin LTO");

@ -601,16 +609,6 @@ impl ModuleBuffer {
            llvm::LLVMRustModuleBufferCreate(m)
        })
    }
-
-    pub fn parse<'a>(
-        &self,
-        name: &str,
-        cx: &'a llvm::Context,
-        handler: &Handler,
-    ) -> Result<&'a llvm::Module, FatalError> {
-        let name = CString::new(name).unwrap();
-        parse_module(cx, &name, self.data(), handler)
-    }
 }

 impl ModuleBufferMethods for ModuleBuffer {
@ -723,7 +721,7 @@ pub unsafe fn optimize_thin_module(
        // Like with "fat" LTO, get some better optimizations if landing pads
        // are disabled by removing all landing pads.
        if cgcx.no_landing_pads {
-            let _timer = cgcx.profile_activity("LLVM_remove_landing_pads");
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_remove_landing_pads");
            llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
            save_temp_bitcode(&cgcx, &module, "thin-lto-after-nounwind");
        }
@ -736,26 +734,41 @@ pub unsafe fn optimize_thin_module(
        //
        // You can find some more comments about these functions in the LLVM
        // bindings we've got (currently `PassWrapper.cpp`)
-        if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_rename");
+            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
        }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
-        if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_resolve_weak");
+            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
        }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
-        if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_internalize");
+            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
        }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
-        if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_import");
+            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
        }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-import");

        // Ok now this is a bit unfortunate. This is also something you won't
        // find upstream in LLVM's ThinLTO passes! This is a hack for now to
@ -786,18 +799,24 @@ pub unsafe fn optimize_thin_module(
        // not too much) but for now at least gets LLVM to emit valid DWARF (or
        // so it appears). Hopefully we can remove this once upstream bugs are
        // fixed in LLVM.
-        llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_patch_debuginfo");
+            llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
+        }

        // Alright now that we've done everything related to the ThinLTO
        // analysis it's time to run some optimizations! Here we use the same
        // `run_pass_manager` as the "fat" LTO above except that we tell it to
        // populate a thin-specific pass manager, which presumably LLVM treats a
        // little differently.
-        info!("running thin lto passes over {}", module.name);
-        let config = cgcx.config(module.kind);
-        run_pass_manager(cgcx, &module, config, true);
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_optimize");
+            info!("running thin lto passes over {}", module.name);
+            let config = cgcx.config(module.kind);
+            run_pass_manager(cgcx, &module, config, true);
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        }
    }
    Ok(module)
 }
--- a/src/librustc_codegen_llvm/back/write.rs
+++ b/src/librustc_codegen_llvm/back/write.rs
@ -306,6 +306,8 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
                   config: &ModuleConfig)
    -> Result<(), FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_module_optimize");
+
    let llmod = module.module_llvm.llmod();
    let llcx = &*module.module_llvm.llcx;
    let tm = &*module.module_llvm.tm;
@ -423,7 +425,7 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,

        // Finally, run the actual optimization passes
        {
-            let _timer = cgcx.profile_activity("LLVM_function_passes");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_function_passes");
            time_ext(config.time_passes,
                        None,
                        &format!("llvm function passes [{}]", module_name.unwrap()),
@ -432,7 +434,7 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
            });
        }
        {
-            let _timer = cgcx.profile_activity("LLVM_module_passes");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_module_passes");
            time_ext(config.time_passes,
                    None,
                    &format!("llvm module passes [{}]", module_name.unwrap()),
@ -454,7 +456,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
                  config: &ModuleConfig)
    -> Result<CompiledModule, FatalError>
 {
-    let _timer = cgcx.profile_activity("codegen");
+    let _timer = cgcx.prof.generic_activity("LLVM_module_codegen");
    {
        let llmod = module.module_llvm.llmod();
        let llcx = &*module.module_llvm.llcx;
@ -505,12 +507,12 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,


        if write_bc || config.emit_bc_compressed || config.embed_bitcode {
-            let _timer = cgcx.profile_activity("LLVM_make_bitcode");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_make_bitcode");
            let thin = ThinBuffer::new(llmod);
            let data = thin.data();

            if write_bc {
-                let _timer = cgcx.profile_activity("LLVM_emit_bitcode");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_bitcode");
                if let Err(e) = fs::write(&bc_out, data) {
                    let msg = format!("failed to write bytecode to {}: {}", bc_out.display(), e);
                    diag_handler.err(&msg);
@ -518,12 +520,13 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
            }

            if config.embed_bitcode {
-                let _timer = cgcx.profile_activity("LLVM_embed_bitcode");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_embed_bitcode");
                embed_bitcode(cgcx, llcx, llmod, Some(data));
            }

            if config.emit_bc_compressed {
-                let _timer = cgcx.profile_activity("LLVM_compress_bitcode");
+                let _timer =
+                    cgcx.prof.generic_activity("LLVM_module_codegen_emit_compressed_bitcode");
                let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
                let data = bytecode::encode(&module.name, data);
                if let Err(e) = fs::write(&dst, data) {
@ -538,7 +541,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
        time_ext(config.time_passes, None, &format!("codegen passes [{}]", module_name.unwrap()),
            || -> Result<(), FatalError> {
            if config.emit_ir {
-                let _timer = cgcx.profile_activity("LLVM_emit_ir");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_ir");
                let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
                let out_c = path_to_c_string(&out);

@ -585,7 +588,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
            }

            if config.emit_asm || asm_to_obj {
-                let _timer = cgcx.profile_activity("LLVM_emit_asm");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_asm");
                let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);

                // We can't use the same module for asm and binary output, because that triggers
@ -603,13 +606,13 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
            }

            if write_obj {
-                let _timer = cgcx.profile_activity("LLVM_emit_obj");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_obj");
                with_codegen(tm, llmod, config.no_builtins, |cpm| {
                    write_output_file(diag_handler, tm, cpm, llmod, &obj_out,
                                      llvm::FileType::ObjectFile)
                })?;
            } else if asm_to_obj {
-                let _timer = cgcx.profile_activity("LLVM_asm_to_obj");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_asm_to_obj");
                let assembly = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
                run_assembler(cgcx, diag_handler, &assembly, &obj_out);

--- a/src/librustc_codegen_llvm/base.rs
+++ b/src/librustc_codegen_llvm/base.rs
@ -108,6 +108,7 @@ pub fn compile_codegen_unit(
    cgu_name: InternedString,
    tx_to_llvm_workers: &std::sync::mpsc::Sender<Box<dyn std::any::Any + Send>>,
 ) {
+    let prof_timer = tcx.prof.generic_activity("codegen_module");
    let start_time = Instant::now();

    let dep_node = tcx.codegen_unit(cgu_name).codegen_dep_node(tcx);
@ -119,6 +120,7 @@ pub fn compile_codegen_unit(
        dep_graph::hash_result,
    );
    let time_to_codegen = start_time.elapsed();
+    drop(prof_timer);

    // We assume that the cost to run LLVM on a CGU is proportional to
    // the time we needed for codegenning it.
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@ -324,8 +324,9 @@ impl CodegenBackend for LlvmCodegenBackend {

        // Run the linker on any artifacts that resulted from the LLVM run.
        // This should produce either a finished executable or library.
-        sess.profiler(|p| p.start_activity("link_crate"));
        time(sess, "linking", || {
+            let _prof_timer = sess.prof.generic_activity("link_crate");
+
            use rustc_codegen_ssa::back::link::link_binary;
            use crate::back::archive::LlvmArchiveBuilder;

@ -338,7 +339,6 @@ impl CodegenBackend for LlvmCodegenBackend {
                target_cpu,
            );
        });
-        sess.profiler(|p| p.end_activity("link_crate"));

        // Now that we won't touch anything in the incremental compilation directory
        // any more, we can finalize it (which involves renaming it)
--- a/src/librustc_codegen_ssa/back/write.rs
+++ b/src/librustc_codegen_ssa/back/write.rs
@ -19,7 +19,7 @@ use rustc::util::nodemap::FxHashMap;
 use rustc::hir::def_id::{CrateNum, LOCAL_CRATE};
 use rustc::ty::TyCtxt;
 use rustc::util::common::{time_depth, set_time_depth, print_time_passes_entry};
-use rustc::util::profiling::SelfProfiler;
+use rustc::util::profiling::SelfProfilerRef;
 use rustc_fs_util::link_or_copy;
 use rustc_data_structures::svh::Svh;
 use rustc_errors::{Handler, Level, FatalError, DiagnosticId};
@ -31,7 +31,6 @@ use syntax_pos::symbol::{Symbol, sym};
 use jobserver::{Client, Acquired};

 use std::any::Any;
-use std::borrow::Cow;
 use std::fs;
 use std::io;
 use std::mem;
@ -196,42 +195,13 @@ impl<B: WriteBackendMethods> Clone for TargetMachineFactory<B> {
    }
 }

-pub struct ProfileGenericActivityTimer {
-    profiler: Option<Arc<SelfProfiler>>,
-    label: Cow<'static, str>,
-}
-
-impl ProfileGenericActivityTimer {
-    pub fn start(
-        profiler: Option<Arc<SelfProfiler>>,
-        label: Cow<'static, str>,
-    ) -> ProfileGenericActivityTimer {
-        if let Some(profiler) = &profiler {
-            profiler.start_activity(label.clone());
-        }
-
-        ProfileGenericActivityTimer {
-            profiler,
-            label,
-        }
-    }
-}
-
-impl Drop for ProfileGenericActivityTimer {
-    fn drop(&mut self) {
-        if let Some(profiler) = &self.profiler {
-            profiler.end_activity(self.label.clone());
-        }
-    }
-}
-
 /// Additional resources used by optimize_and_codegen (not module specific)
 #[derive(Clone)]
 pub struct CodegenContext<B: WriteBackendMethods> {
    // Resources needed when running LTO
    pub backend: B,
    pub time_passes: bool,
-    pub profiler: Option<Arc<SelfProfiler>>,
+    pub prof: SelfProfilerRef,
    pub lto: Lto,
    pub no_landing_pads: bool,
    pub save_temps: bool,
@ -283,31 +253,6 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
            ModuleKind::Allocator => &self.allocator_module_config,
        }
    }
-
-    #[inline(never)]
-    #[cold]
-    fn profiler_active<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        match &self.profiler {
-            None => bug!("profiler_active() called but there was no profiler active"),
-            Some(profiler) => {
-                f(&*profiler);
-            }
-        }
-    }
-
-    #[inline(always)]
-    pub fn profile<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        if unlikely!(self.profiler.is_some()) {
-            self.profiler_active(f)
-        }
-    }
-
-    pub fn profile_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) -> ProfileGenericActivityTimer {
-        ProfileGenericActivityTimer::start(self.profiler.clone(), label.into())
-    }
 }

 fn generate_lto_work<B: ExtraBackendMethods>(
@ -316,7 +261,7 @@ fn generate_lto_work<B: ExtraBackendMethods>(
    needs_thin_lto: Vec<(String, B::ThinBuffer)>,
    import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>
 ) -> Vec<(WorkItem<B>, u64)> {
-    cgcx.profile(|p| p.start_activity("codegen_run_lto"));
+    let _prof_timer = cgcx.prof.generic_activity("codegen_run_lto");

    let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() {
        assert!(needs_thin_lto.is_empty());
@ -343,8 +288,6 @@ fn generate_lto_work<B: ExtraBackendMethods>(
        }), 0)
    })).collect();

-    cgcx.profile(|p| p.end_activity("codegen_run_lto"));
-
    result
 }

@ -380,6 +323,9 @@ pub fn start_async_codegen<B: ExtraBackendMethods>(
 ) -> OngoingCodegen<B> {
    let (coordinator_send, coordinator_receive) = channel();
    let sess = tcx.sess;
+
+    sess.prof.generic_activity_start("codegen_and_optimize_crate");
+
    let crate_name = tcx.crate_name(LOCAL_CRATE);
    let crate_hash = tcx.crate_hash(LOCAL_CRATE);
    let no_builtins = attr::contains_name(&tcx.hir().krate().attrs, sym::no_builtins);
@ -1088,7 +1034,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
        save_temps: sess.opts.cg.save_temps,
        opts: Arc::new(sess.opts.clone()),
        time_passes: sess.time_extended(),
-        profiler: sess.self_profiling.clone(),
+        prof: sess.prof.clone(),
        exported_symbols,
        plugin_passes: sess.plugin_llvm_passes.borrow().clone(),
        remark: sess.opts.cg.remark.clone(),
@ -1645,12 +1591,8 @@ fn spawn_work<B: ExtraBackendMethods>(
        // as a diagnostic was already sent off to the main thread - just
        // surface that there was an error in this worker.
        bomb.result = {
-            let label = work.name();
-            cgcx.profile(|p| p.start_activity(label.clone()));
-            let result = execute_work_item(&cgcx, work).ok();
-            cgcx.profile(|p| p.end_activity(label));
-
-            result
+            let _prof_timer = cgcx.prof.generic_activity(&work.name());
+            execute_work_item(&cgcx, work).ok()
        };
    });
 }
@ -1835,6 +1777,8 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
            self.backend.print_pass_timings()
        }

+        sess.prof.generic_activity_end("codegen_and_optimize_crate");
+
        (CodegenResults {
            crate_name: self.crate_name,
            crate_hash: self.crate_hash,
--- a/src/librustc_codegen_ssa/base.rs
+++ b/src/librustc_codegen_ssa/base.rs
@ -559,7 +559,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(

    if need_metadata_module {
        // Codegen the encoded metadata.
-        tcx.sess.profiler(|p| p.start_activity("codegen crate metadata"));
+        let _prof_timer = tcx.prof.generic_activity("codegen_crate_metadata");

        let metadata_cgu_name = cgu_name_builder.build_cgu_name(LOCAL_CRATE,
                                                                &["crate"],
@ -570,7 +570,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
            backend.write_compressed_metadata(tcx, &ongoing_codegen.metadata,
                                              &mut metadata_llvm_module);
        });
-        tcx.sess.profiler(|p| p.end_activity("codegen crate metadata"));

        let metadata_module = ModuleCodegen {
            name: metadata_cgu_name,
@ -599,11 +598,9 @@ pub fn codegen_crate<B: ExtraBackendMethods>(

        match cgu_reuse {
            CguReuse::No => {
-                tcx.sess.profiler(|p| p.start_activity(format!("codegen {}", cgu.name())));
                let start_time = Instant::now();
                backend.compile_codegen_unit(tcx, *cgu.name(), &ongoing_codegen.coordinator_send);
                total_codegen_time += start_time.elapsed();
-                tcx.sess.profiler(|p| p.end_activity(format!("codegen {}", cgu.name())));
                false
            }
            CguReuse::PreLto => {
--- a/src/librustc_codegen_ssa/lib.rs
+++ b/src/librustc_codegen_ssa/lib.rs
@ -21,7 +21,6 @@

 #[macro_use] extern crate log;
 #[macro_use] extern crate rustc;
-#[macro_use] extern crate rustc_data_structures;
 #[macro_use] extern crate syntax;

 use std::path::PathBuf;
--- a/src/librustc_incremental/persist/save.rs
+++ b/src/librustc_incremental/persist/save.rs
@ -28,6 +28,8 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {

        join(move || {
            if tcx.sess.opts.debugging_opts.incremental_queries {
+                let _timer = tcx.prof.generic_activity("incr_comp_persist_result_cache");
+
                time(sess, "persist query result cache", || {
                    save_in(sess,
                            query_cache_path,
@ -36,6 +38,8 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
            }
        }, || {
            time(sess, "persist dep-graph", || {
+                let _timer = tcx.prof.generic_activity("incr_comp_persist_dep_graph");
+
                save_in(sess,
                        dep_graph_path,
                        |e| {
@ -135,6 +139,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {

    // Encode the graph data.
    let serialized_graph = time(tcx.sess, "getting serialized graph", || {
+        let _timer = tcx.prof.generic_activity("incr_comp_serialize_dep_graph");
        tcx.dep_graph.serialize()
    });

@ -214,6 +219,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
    }

    time(tcx.sess, "encoding serialized graph", || {
+        let _timer = tcx.prof.generic_activity("incr_comp_encode_serialized_dep_graph");
        serialized_graph.encode(encoder).unwrap();
    });
 }
--- a/src/librustc_interface/passes.rs
+++ b/src/librustc_interface/passes.rs
@ -59,15 +59,17 @@ use std::rc::Rc;
 pub fn parse<'a>(sess: &'a Session, input: &Input) -> PResult<'a, ast::Crate> {
    sess.diagnostic()
        .set_continue_after_error(sess.opts.debugging_opts.continue_parse_after_error);
-    sess.profiler(|p| p.start_activity("parsing"));
-    let krate = time(sess, "parsing", || match *input {
-        Input::File(ref file) => parse::parse_crate_from_file(file, &sess.parse_sess),
-        Input::Str {
-            ref input,
-            ref name,
-        } => parse::parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess),
+    let krate = time(sess, "parsing", || {
+        let _prof_timer = sess.prof.generic_activity("parse_crate");
+
+        match *input {
+            Input::File(ref file) => parse::parse_crate_from_file(file, &sess.parse_sess),
+            Input::Str {
+                ref input,
+                ref name,
+            } => parse::parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess),
+        }
    })?;
-    sess.profiler(|p| p.end_activity("parsing"));

    sess.diagnostic().set_continue_after_error(true);

@ -355,8 +357,8 @@ fn configure_and_expand_inner<'a>(
    );

    // Expand all macros
-    sess.profiler(|p| p.start_activity("macro expansion"));
    krate = time(sess, "expansion", || {
+        let _prof_timer = sess.prof.generic_activity("macro_expand_crate");
        // Windows dlls do not have rpaths, so they don't know how to find their
        // dependencies. It's up to us to tell the system where to find all the
        // dependent dlls. Note that this uses cfg!(windows) as opposed to
@ -430,7 +432,6 @@ fn configure_and_expand_inner<'a>(
        }
        krate
    });
-    sess.profiler(|p| p.end_activity("macro expansion"));

    time(sess, "maybe building test harness", || {
        syntax_ext::test_harness::inject(
@ -1071,11 +1072,10 @@ pub fn start_codegen<'tcx>(
        encode_and_write_metadata(tcx, outputs)
    });

-    tcx.sess.profiler(|p| p.start_activity("codegen crate"));
    let codegen = time(tcx.sess, "codegen", move || {
+        let _prof_timer = tcx.prof.generic_activity("codegen_crate");
        codegen_backend.codegen_crate(tcx, metadata, need_metadata_module)
    });
-    tcx.sess.profiler(|p| p.end_activity("codegen crate"));

    if log_enabled!(::log::Level::Info) {
        println!("Post-codegen");
--- a/src/librustc_typeck/lib.rs
+++ b/src/librustc_typeck/lib.rs
@ -295,7 +295,7 @@ pub fn provide(providers: &mut Providers<'_>) {
 }

 pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
-    tcx.sess.profiler(|p| p.start_activity("type-check crate"));
+    let _prof_timer = tcx.prof.generic_activity("type_check_crate");

    // this ensures that later parts of type checking can assume that items
    // have valid types and not error
@ -347,8 +347,6 @@ pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
    check_unused::check_crate(tcx);
    check_for_entry_fn(tcx);

-    tcx.sess.profiler(|p| p.end_activity("type-check crate"));
-
    if tcx.sess.err_count() == 0 {
        Ok(())
    } else {