auto merge of #8894 : alexcrichton/rust/faster, r=thestinger
The only changes to the default passes is that O1 now doesn't run the inline pass, just always-inline with lifetime intrinsics. O2 also now has a threshold of 225 instead of 275. Otherwise the default passes being run is the same. I've also added a few more options for configuring the pass pipeline. Namely you can now specify arguments to LLVM directly via the `--llvm-args` command line option which operates similarly to `--passes`. I also added the ability to turn off pre-population of the pass manager in case you want to run *only* your own passes. I would consider this as closing #8890. I don't think that we should change the default inlining threshold because LLVM/clang will probably have chosen those numbers more carefully than we would. Regardless, here's the performance numbers from this commit: ``` $ ./x86_64-apple-darwin/stage0/bin/rustc ./gistfile1.rs --test --opt-level=3 -o before warning: no debug symbols in executable (-arch x86_64) $ ./before --bench running 1 test test bench::aes_bench_x8 ... bench: 1602 ns/iter (+/- 66) = 7990 MB/s test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured $ ./x86_64-apple-darwin/stage1/bin/rustc ./gistfile1.rs --test --opt-level=3 -o after warning: no debug symbols in executable (-arch x86_64) $ ./after --bench running 1 test test bench::aes_bench_x8 ... bench: 2103 ns/iter (+/- 175) = 6086 MB/s test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured $ ./x86_64-apple-darwin/stage1/bin/rustc ./gistfile1.rs --test --opt-level=3 -o after --llvm-args '-inline-threshold=225' warning: no debug symbols in executable (-arch x86_64) $ ./after --bench running 1 test test bench::aes_bench_x8 ... bench: 1600 ns/iter (+/- 71) = 8000 MB/s test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured ```
This commit is contained in:
commit
206ad61fd8
@ -216,7 +216,7 @@ pub mod write {
|
||||
use lib;
|
||||
|
||||
use std::c_str::ToCStr;
|
||||
use std::libc::c_uint;
|
||||
use std::libc::{c_uint, c_int};
|
||||
use std::path::Path;
|
||||
use std::run;
|
||||
use std::str;
|
||||
@ -257,17 +257,7 @@ pub mod write {
|
||||
}
|
||||
}
|
||||
|
||||
// Copy what clan does by turning on loop vectorization at O2 and
|
||||
// slp vectorization at O3
|
||||
let vectorize_loop = !sess.no_vectorize_loops() &&
|
||||
(sess.opts.optimize == session::Default ||
|
||||
sess.opts.optimize == session::Aggressive);
|
||||
let vectorize_slp = !sess.no_vectorize_slp() &&
|
||||
sess.opts.optimize == session::Aggressive;
|
||||
llvm::LLVMRustSetLLVMOptions(sess.print_llvm_passes(),
|
||||
vectorize_loop,
|
||||
vectorize_slp,
|
||||
sess.time_llvm_passes());
|
||||
configure_llvm(sess);
|
||||
|
||||
let OptLevel = match sess.opts.optimize {
|
||||
session::No => lib::llvm::CodeGenLevelNone,
|
||||
@ -293,12 +283,9 @@ pub mod write {
|
||||
// Create the two optimizing pass managers. These mirror what clang
|
||||
// does, and are by populated by LLVM's default PassManagerBuilder.
|
||||
// Each manager has a different set of passes, but they also share
|
||||
// some common passes. Each one is initialized with the analyis
|
||||
// passes the target requires, and then further passes are added.
|
||||
// some common passes.
|
||||
let fpm = llvm::LLVMCreateFunctionPassManagerForModule(llmod);
|
||||
let mpm = llvm::LLVMCreatePassManager();
|
||||
llvm::LLVMRustAddAnalysisPasses(tm, fpm, llmod);
|
||||
llvm::LLVMRustAddAnalysisPasses(tm, mpm, llmod);
|
||||
|
||||
// If we're verifying or linting, add them to the function pass
|
||||
// manager.
|
||||
@ -308,32 +295,11 @@ pub mod write {
|
||||
if !sess.no_verify() { assert!(addpass("verify")); }
|
||||
if sess.lint_llvm() { assert!(addpass("lint")); }
|
||||
|
||||
// Create the PassManagerBuilder for LLVM. We configure it with
|
||||
// reasonable defaults and prepare it to actually populate the pass
|
||||
// manager.
|
||||
let builder = llvm::LLVMPassManagerBuilderCreate();
|
||||
match sess.opts.optimize {
|
||||
session::No => {
|
||||
// Don't add lifetime intrinsics add O0
|
||||
llvm::LLVMRustAddAlwaysInlinePass(builder, false);
|
||||
}
|
||||
// numeric values copied from clang
|
||||
session::Less => {
|
||||
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
|
||||
225);
|
||||
}
|
||||
session::Default | session::Aggressive => {
|
||||
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
|
||||
275);
|
||||
}
|
||||
if !sess.no_prepopulate_passes() {
|
||||
llvm::LLVMRustAddAnalysisPasses(tm, fpm, llmod);
|
||||
llvm::LLVMRustAddAnalysisPasses(tm, mpm, llmod);
|
||||
populate_llvm_passess(fpm, mpm, llmod, OptLevel);
|
||||
}
|
||||
llvm::LLVMPassManagerBuilderSetOptLevel(builder, OptLevel as c_uint);
|
||||
llvm::LLVMRustAddBuilderLibraryInfo(builder, llmod);
|
||||
|
||||
// Use the builder to populate the function/module pass managers.
|
||||
llvm::LLVMPassManagerBuilderPopulateFunctionPassManager(builder, fpm);
|
||||
llvm::LLVMPassManagerBuilderPopulateModulePassManager(builder, mpm);
|
||||
llvm::LLVMPassManagerBuilderDispose(builder);
|
||||
|
||||
for pass in sess.opts.custom_passes.iter() {
|
||||
do pass.with_c_str |s| {
|
||||
@ -424,6 +390,74 @@ pub mod write {
|
||||
sess.abort_if_errors();
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn configure_llvm(sess: Session) {
|
||||
// Copy what clan does by turning on loop vectorization at O2 and
|
||||
// slp vectorization at O3
|
||||
let vectorize_loop = !sess.no_vectorize_loops() &&
|
||||
(sess.opts.optimize == session::Default ||
|
||||
sess.opts.optimize == session::Aggressive);
|
||||
let vectorize_slp = !sess.no_vectorize_slp() &&
|
||||
sess.opts.optimize == session::Aggressive;
|
||||
|
||||
let mut llvm_c_strs = ~[];
|
||||
let mut llvm_args = ~[];
|
||||
let add = |arg: &str| {
|
||||
let s = arg.to_c_str();
|
||||
llvm_args.push(s.with_ref(|p| p));
|
||||
llvm_c_strs.push(s);
|
||||
};
|
||||
add("rustc"); // fake program name
|
||||
add("-arm-enable-ehabi");
|
||||
add("-arm-enable-ehabi-descriptors");
|
||||
if vectorize_loop { add("-vectorize-loops"); }
|
||||
if vectorize_slp { add("-vectorize-slp"); }
|
||||
if sess.time_llvm_passes() { add("-time-passes"); }
|
||||
if sess.print_llvm_passes() { add("-debug-pass=Structure"); }
|
||||
|
||||
for arg in sess.opts.llvm_args.iter() {
|
||||
add(*arg);
|
||||
}
|
||||
|
||||
do llvm_args.as_imm_buf |p, len| {
|
||||
llvm::LLVMRustSetLLVMOptions(len as c_int, p);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn populate_llvm_passess(fpm: lib::llvm::PassManagerRef,
|
||||
mpm: lib::llvm::PassManagerRef,
|
||||
llmod: ModuleRef,
|
||||
opt: lib::llvm::CodeGenOptLevel) {
|
||||
// Create the PassManagerBuilder for LLVM. We configure it with
|
||||
// reasonable defaults and prepare it to actually populate the pass
|
||||
// manager.
|
||||
let builder = llvm::LLVMPassManagerBuilderCreate();
|
||||
match opt {
|
||||
lib::llvm::CodeGenLevelNone => {
|
||||
// Don't add lifetime intrinsics add O0
|
||||
llvm::LLVMRustAddAlwaysInlinePass(builder, false);
|
||||
}
|
||||
lib::llvm::CodeGenLevelLess => {
|
||||
llvm::LLVMRustAddAlwaysInlinePass(builder, true);
|
||||
}
|
||||
// numeric values copied from clang
|
||||
lib::llvm::CodeGenLevelDefault => {
|
||||
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
|
||||
225);
|
||||
}
|
||||
lib::llvm::CodeGenLevelAggressive => {
|
||||
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
|
||||
275);
|
||||
}
|
||||
}
|
||||
llvm::LLVMPassManagerBuilderSetOptLevel(builder, opt as c_uint);
|
||||
llvm::LLVMRustAddBuilderLibraryInfo(builder, llmod);
|
||||
|
||||
// Use the builder to populate the function/module pass managers.
|
||||
llvm::LLVMPassManagerBuilderPopulateFunctionPassManager(builder, fpm);
|
||||
llvm::LLVMPassManagerBuilderPopulateModulePassManager(builder, mpm);
|
||||
llvm::LLVMPassManagerBuilderDispose(builder);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -737,6 +737,14 @@ pub fn build_session_options(binary: @str,
|
||||
}).collect()
|
||||
}
|
||||
};
|
||||
let llvm_args = match getopts::opt_maybe_str(matches, "llvm-args") {
|
||||
None => ~[],
|
||||
Some(s) => {
|
||||
s.split_iter(|c: char| c == ' ' || c == ',').map(|s| {
|
||||
s.trim().to_owned()
|
||||
}).collect()
|
||||
}
|
||||
};
|
||||
|
||||
let sopts = @session::options {
|
||||
crate_type: crate_type,
|
||||
@ -744,6 +752,7 @@ pub fn build_session_options(binary: @str,
|
||||
gc: gc,
|
||||
optimize: opt_level,
|
||||
custom_passes: custom_passes,
|
||||
llvm_args: llvm_args,
|
||||
debuginfo: debuginfo,
|
||||
extra_debuginfo: extra_debuginfo,
|
||||
lint_opts: lint_opts,
|
||||
@ -851,6 +860,8 @@ pub fn optgroups() -> ~[getopts::groups::OptGroup] {
|
||||
Appends to the default list of passes to run for the \
|
||||
specified current optimization level. A value of \
|
||||
\"list\" will list all of the available passes", "NAMES"),
|
||||
optopt("", "llvm-args", "A list of arguments to pass to llvm, comma \
|
||||
separated", "ARGS"),
|
||||
optopt( "", "out-dir",
|
||||
"Write output to compiler-chosen filename
|
||||
in <dir>", "DIR"),
|
||||
|
@ -79,6 +79,7 @@ pub static once_fns: uint = 1 << 26;
|
||||
pub static print_llvm_passes: uint = 1 << 27;
|
||||
pub static no_vectorize_loops: uint = 1 << 28;
|
||||
pub static no_vectorize_slp: uint = 1 << 29;
|
||||
pub static no_prepopulate_passes: uint = 1 << 30;
|
||||
|
||||
pub fn debugging_opts_map() -> ~[(~str, ~str, uint)] {
|
||||
~[(~"verbose", ~"in general, enable more debug printouts", verbose),
|
||||
@ -126,6 +127,10 @@ pub fn debugging_opts_map() -> ~[(~str, ~str, uint)] {
|
||||
(~"print-llvm-passes",
|
||||
~"Prints the llvm optimization passes being run",
|
||||
print_llvm_passes),
|
||||
(~"no-prepopulate-passes",
|
||||
~"Don't pre-populate the pass managers with a list of passes, only use \
|
||||
the passes from --passes",
|
||||
no_prepopulate_passes),
|
||||
(~"no-vectorize-loops",
|
||||
~"Don't run the loop vectorization optimization passes",
|
||||
no_vectorize_loops),
|
||||
@ -152,6 +157,7 @@ pub struct options {
|
||||
gc: bool,
|
||||
optimize: OptLevel,
|
||||
custom_passes: ~[~str],
|
||||
llvm_args: ~[~str],
|
||||
debuginfo: bool,
|
||||
extra_debuginfo: bool,
|
||||
lint_opts: ~[(lint::lint, lint::level)],
|
||||
@ -320,6 +326,9 @@ impl Session_ {
|
||||
pub fn print_llvm_passes(@self) -> bool {
|
||||
self.debugging_opt(print_llvm_passes)
|
||||
}
|
||||
pub fn no_prepopulate_passes(@self) -> bool {
|
||||
self.debugging_opt(no_prepopulate_passes)
|
||||
}
|
||||
pub fn no_vectorize_loops(@self) -> bool {
|
||||
self.debugging_opt(no_vectorize_loops)
|
||||
}
|
||||
@ -351,6 +360,7 @@ pub fn basic_options() -> @options {
|
||||
gc: false,
|
||||
optimize: No,
|
||||
custom_passes: ~[],
|
||||
llvm_args: ~[],
|
||||
debuginfo: false,
|
||||
extra_debuginfo: false,
|
||||
lint_opts: ~[],
|
||||
|
@ -191,6 +191,7 @@ pub enum AsmDialect {
|
||||
AD_Intel = 1
|
||||
}
|
||||
|
||||
#[deriving(Eq)]
|
||||
pub enum CodeGenOptLevel {
|
||||
CodeGenLevelNone = 0,
|
||||
CodeGenLevelLess = 1,
|
||||
@ -2123,10 +2124,7 @@ pub mod llvm {
|
||||
pub fn LLVMRustPrintModule(PM: PassManagerRef,
|
||||
M: ModuleRef,
|
||||
Output: *c_char);
|
||||
pub fn LLVMRustSetLLVMOptions(PrintPasses: bool,
|
||||
VectorizeLoops: bool,
|
||||
VectorizeSLP: bool,
|
||||
TimePasses: bool);
|
||||
pub fn LLVMRustSetLLVMOptions(Argc: c_int, Argv: **c_char);
|
||||
pub fn LLVMRustPrintPasses();
|
||||
pub fn LLVMRustSetNormalizedTarget(M: ModuleRef, triple: *c_char);
|
||||
pub fn LLVMRustAddAlwaysInlinePass(P: PassManagerBuilderRef,
|
||||
|
@ -143,36 +143,15 @@ LLVMRustRunFunctionPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
LLVMRustSetLLVMOptions(bool PrintPasses,
|
||||
bool VectorizeLoops,
|
||||
bool VectorizeSLP,
|
||||
bool TimePasses) {
|
||||
LLVMRustSetLLVMOptions(int Argc, char **Argv) {
|
||||
// Initializing the command-line options more than once is not allowed. So,
|
||||
// check if they've already been initialized. (This could happen if we're
|
||||
// being called from rustpkg, for example). If the arguments change, then
|
||||
// that's just kinda unfortunate.
|
||||
static bool initialized = false;
|
||||
if (initialized) return;
|
||||
|
||||
int argc = 3;
|
||||
const char *argv[20] = {"rustc",
|
||||
"-arm-enable-ehabi",
|
||||
"-arm-enable-ehabi-descriptors"};
|
||||
if (PrintPasses) {
|
||||
argv[argc++] = "-debug-pass";
|
||||
argv[argc++] = "Structure";
|
||||
}
|
||||
if (VectorizeLoops) {
|
||||
argv[argc++] = "-vectorize-loops";
|
||||
}
|
||||
if (VectorizeSLP) {
|
||||
argv[argc++] = "-vectorize-slp";
|
||||
}
|
||||
if (TimePasses) {
|
||||
argv[argc++] = "-time-passes";
|
||||
}
|
||||
cl::ParseCommandLineOptions(argc, argv);
|
||||
initialized = true;
|
||||
cl::ParseCommandLineOptions(Argc, Argv);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
|
Loading…
x
Reference in New Issue
Block a user