2017-07-27 13:02:31 +02:00
|
|
|
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
|
|
|
use std::collections::HashMap;
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
use std::fs::File;
|
|
|
|
use std::io::prelude::*;
|
2017-07-27 13:02:31 +02:00
|
|
|
use std::marker::PhantomData;
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
use std::mem;
|
2017-07-27 13:02:31 +02:00
|
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
use std::time::Instant;
|
|
|
|
|
|
|
|
const OUTPUT_WIDTH_IN_PX: u64 = 1000;
|
2017-07-23 08:14:38 -07:00
|
|
|
const TIME_LINE_HEIGHT_IN_PX: u64 = 20;
|
|
|
|
const TIME_LINE_HEIGHT_STRIDE_IN_PX: usize = 30;
|
2017-07-27 13:02:31 +02:00
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
struct Timing {
|
|
|
|
start: Instant,
|
|
|
|
end: Instant,
|
|
|
|
work_package_kind: WorkPackageKind,
|
2017-07-23 08:14:38 -07:00
|
|
|
name: String,
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
events: Vec<(String, Instant)>,
|
2017-07-27 13:02:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Copy, Hash, Eq, PartialEq, Debug)]
|
|
|
|
pub struct TimelineId(pub usize);
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
struct PerThread {
|
|
|
|
timings: Vec<Timing>,
|
2017-07-23 08:14:38 -07:00
|
|
|
open_work_package: Option<(Instant, WorkPackageKind, String)>,
|
2017-07-27 13:02:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct TimeGraph {
|
|
|
|
data: Arc<Mutex<HashMap<TimelineId, PerThread>>>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Copy)]
|
|
|
|
pub struct WorkPackageKind(pub &'static [&'static str]);
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
pub struct Timeline {
|
|
|
|
token: Option<RaiiToken>,
|
|
|
|
}
|
|
|
|
|
|
|
|
struct RaiiToken {
|
2017-07-27 13:02:31 +02:00
|
|
|
graph: TimeGraph,
|
|
|
|
timeline: TimelineId,
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
events: Vec<(String, Instant)>,
|
2017-07-27 13:02:31 +02:00
|
|
|
// The token must not be Send:
|
|
|
|
_marker: PhantomData<*const ()>
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl Drop for RaiiToken {
|
|
|
|
fn drop(&mut self) {
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
self.graph.end(self.timeline, mem::replace(&mut self.events, Vec::new()));
|
2017-07-27 13:02:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TimeGraph {
|
|
|
|
pub fn new() -> TimeGraph {
|
|
|
|
TimeGraph {
|
|
|
|
data: Arc::new(Mutex::new(HashMap::new()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn start(&self,
|
|
|
|
timeline: TimelineId,
|
2017-07-23 08:14:38 -07:00
|
|
|
work_package_kind: WorkPackageKind,
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
name: &str) -> Timeline {
|
2017-07-27 13:02:31 +02:00
|
|
|
{
|
|
|
|
let mut table = self.data.lock().unwrap();
|
|
|
|
|
2017-08-02 00:55:05 +01:00
|
|
|
let data = table.entry(timeline).or_insert(PerThread {
|
2017-07-27 13:02:31 +02:00
|
|
|
timings: Vec::new(),
|
|
|
|
open_work_package: None,
|
|
|
|
});
|
|
|
|
|
|
|
|
assert!(data.open_work_package.is_none());
|
2017-07-23 08:14:38 -07:00
|
|
|
data.open_work_package = Some((Instant::now(), work_package_kind, name.to_string()));
|
2017-07-27 13:02:31 +02:00
|
|
|
}
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
Timeline {
|
|
|
|
token: Some(RaiiToken {
|
|
|
|
graph: self.clone(),
|
|
|
|
timeline,
|
|
|
|
events: Vec::new(),
|
|
|
|
_marker: PhantomData,
|
|
|
|
}),
|
2017-07-27 13:02:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
fn end(&self, timeline: TimelineId, events: Vec<(String, Instant)>) {
|
2017-07-27 13:02:31 +02:00
|
|
|
let end = Instant::now();
|
|
|
|
|
|
|
|
let mut table = self.data.lock().unwrap();
|
2017-08-02 00:55:05 +01:00
|
|
|
let data = table.get_mut(&timeline).unwrap();
|
2017-07-27 13:02:31 +02:00
|
|
|
|
2017-07-23 08:14:38 -07:00
|
|
|
if let Some((start, work_package_kind, name)) = data.open_work_package.take() {
|
2017-07-27 13:02:31 +02:00
|
|
|
data.timings.push(Timing {
|
|
|
|
start,
|
|
|
|
end,
|
|
|
|
work_package_kind,
|
2017-07-23 08:14:38 -07:00
|
|
|
name,
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
events,
|
2017-07-27 13:02:31 +02:00
|
|
|
});
|
|
|
|
} else {
|
|
|
|
bug!("end timing without start?")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn dump(&self, output_filename: &str) {
|
|
|
|
let table = self.data.lock().unwrap();
|
|
|
|
|
|
|
|
for data in table.values() {
|
|
|
|
assert!(data.open_work_package.is_none());
|
|
|
|
}
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
let mut threads: Vec<PerThread> =
|
2017-07-27 13:02:31 +02:00
|
|
|
table.values().map(|data| data.clone()).collect();
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
threads.sort_by_key(|timeline| timeline.timings[0].start);
|
2017-07-27 13:02:31 +02:00
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
let earliest_instant = threads[0].timings[0].start;
|
|
|
|
let latest_instant = threads.iter()
|
2017-07-27 13:02:31 +02:00
|
|
|
.map(|timeline| timeline.timings
|
|
|
|
.last()
|
|
|
|
.unwrap()
|
|
|
|
.end)
|
|
|
|
.max()
|
|
|
|
.unwrap();
|
|
|
|
let max_distance = distance(earliest_instant, latest_instant);
|
|
|
|
|
|
|
|
let mut file = File::create(format!("{}.html", output_filename)).unwrap();
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
writeln!(file, "
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<style>
|
|
|
|
#threads a {{
|
|
|
|
position: absolute;
|
|
|
|
overflow: hidden;
|
|
|
|
}}
|
|
|
|
#threads {{
|
|
|
|
height: {total_height}px;
|
|
|
|
width: {width}px;
|
|
|
|
}}
|
|
|
|
|
|
|
|
.timeline {{
|
|
|
|
display: none;
|
|
|
|
width: {width}px;
|
|
|
|
position: relative;
|
|
|
|
}}
|
|
|
|
|
|
|
|
.timeline:target {{
|
|
|
|
display: block;
|
|
|
|
}}
|
|
|
|
|
|
|
|
.event {{
|
|
|
|
position: absolute;
|
|
|
|
}}
|
|
|
|
</style>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<div id='threads'>
|
|
|
|
",
|
|
|
|
total_height = threads.len() * TIME_LINE_HEIGHT_STRIDE_IN_PX,
|
|
|
|
width = OUTPUT_WIDTH_IN_PX,
|
|
|
|
).unwrap();
|
2017-07-27 13:02:31 +02:00
|
|
|
|
|
|
|
let mut color = 0;
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
for (line_index, thread) in threads.iter().enumerate() {
|
2017-07-27 13:02:31 +02:00
|
|
|
let line_top = line_index * TIME_LINE_HEIGHT_STRIDE_IN_PX;
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
for span in &thread.timings {
|
2017-07-27 13:02:31 +02:00
|
|
|
let start = distance(earliest_instant, span.start);
|
|
|
|
let end = distance(earliest_instant, span.end);
|
|
|
|
|
|
|
|
let start = normalize(start, max_distance, OUTPUT_WIDTH_IN_PX);
|
|
|
|
let end = normalize(end, max_distance, OUTPUT_WIDTH_IN_PX);
|
|
|
|
|
|
|
|
let colors = span.work_package_kind.0;
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
writeln!(file, "<a href='#timing{}'
|
|
|
|
style='top:{}px; \
|
|
|
|
left:{}px; \
|
|
|
|
width:{}px; \
|
|
|
|
height:{}px; \
|
|
|
|
background:{};'>{}</a>",
|
|
|
|
color,
|
2017-07-27 13:02:31 +02:00
|
|
|
line_top,
|
|
|
|
start,
|
|
|
|
end - start,
|
|
|
|
TIME_LINE_HEIGHT_IN_PX,
|
2017-07-23 08:14:38 -07:00
|
|
|
colors[color % colors.len()],
|
|
|
|
span.name,
|
2017-07-27 13:02:31 +02:00
|
|
|
).unwrap();
|
|
|
|
|
|
|
|
color += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
rustc: Implement ThinLTO
This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.
This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:
* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
single compilation. That is, we won't load upstream rlibs, but we'll instead
just perform ThinLTO amongst all codegen units produced by the compiler for
the local crate. This is intended to emulate a desired end point where we have
codegen units turned on by default for all crates and ThinLTO allows us to do
this without performance loss.
* In anther mode, like full LTO today, we'll optimize all upstream dependencies
in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
should finish much more quickly.
There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:
* Controlling parallelism means we can use the existing jobserver support to
avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
integrates with our own incremental strategy, but this is yet to be
determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
creation, where all our options we used today aren't necessarily supported by
upstream LLVM yet.
My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
2017-07-23 08:14:38 -07:00
|
|
|
writeln!(file, "
|
|
|
|
</div>
|
|
|
|
").unwrap();
|
|
|
|
|
|
|
|
let mut idx = 0;
|
|
|
|
for thread in threads.iter() {
|
|
|
|
for timing in &thread.timings {
|
|
|
|
let colors = timing.work_package_kind.0;
|
|
|
|
let height = TIME_LINE_HEIGHT_STRIDE_IN_PX * timing.events.len();
|
|
|
|
writeln!(file, "<div class='timeline'
|
|
|
|
id='timing{}'
|
|
|
|
style='background:{};height:{}px;'>",
|
|
|
|
idx,
|
|
|
|
colors[idx % colors.len()],
|
|
|
|
height).unwrap();
|
|
|
|
idx += 1;
|
|
|
|
let max = distance(timing.start, timing.end);
|
|
|
|
for (i, &(ref event, time)) in timing.events.iter().enumerate() {
|
|
|
|
let i = i as u64;
|
|
|
|
let time = distance(timing.start, time);
|
|
|
|
let at = normalize(time, max, OUTPUT_WIDTH_IN_PX);
|
|
|
|
writeln!(file, "<span class='event'
|
|
|
|
style='left:{}px;\
|
|
|
|
top:{}px;'>{}</span>",
|
|
|
|
at,
|
|
|
|
TIME_LINE_HEIGHT_IN_PX * i,
|
|
|
|
event).unwrap();
|
|
|
|
}
|
|
|
|
writeln!(file, "</div>").unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
writeln!(file, "
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
").unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Timeline {
|
|
|
|
pub fn noop() -> Timeline {
|
|
|
|
Timeline { token: None }
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Record an event which happened at this moment on this timeline.
|
|
|
|
///
|
|
|
|
/// Events are displayed in the eventual HTML output where you can click on
|
|
|
|
/// a particular timeline and it'll expand to all of the events that
|
|
|
|
/// happened on that timeline. This can then be used to drill into a
|
|
|
|
/// particular timeline and see what events are happening and taking the
|
|
|
|
/// most time.
|
|
|
|
pub fn record(&mut self, name: &str) {
|
|
|
|
if let Some(ref mut token) = self.token {
|
|
|
|
token.events.push((name.to_string(), Instant::now()));
|
|
|
|
}
|
2017-07-27 13:02:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn distance(zero: Instant, x: Instant) -> u64 {
|
|
|
|
|
|
|
|
let duration = x.duration_since(zero);
|
|
|
|
(duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64) // / div
|
|
|
|
}
|
|
|
|
|
|
|
|
fn normalize(distance: u64, max: u64, max_pixels: u64) -> u64 {
|
|
|
|
(max_pixels * distance) / max
|
|
|
|
}
|
|
|
|
|