auto merge of #8894 : alexcrichton/rust/faster, r=thestinger

The only changes to the default passes is that O1 now doesn't run the inline
pass, just always-inline with lifetime intrinsics. O2 also now has a threshold
of 225 instead of 275. Otherwise the default passes being run is the same.

I've also added a few more options for configuring the pass pipeline. Namely you
can now specify arguments to LLVM directly via the `--llvm-args` command line
option which operates similarly to `--passes`. I also added the ability to turn
off pre-population of the pass manager in case you want to run *only* your own
passes.

I would consider this as closing #8890. I don't think that we should change the default inlining threshold because LLVM/clang will probably have chosen those numbers more carefully than we would. Regardless, here's the performance numbers from this commit:

```
$ ./x86_64-apple-darwin/stage0/bin/rustc ./gistfile1.rs --test --opt-level=3 -o before
warning: no debug symbols in executable (-arch x86_64)
$ ./before --bench

running 1 test
test bench::aes_bench_x8 ... bench: 1602 ns/iter (+/- 66) = 7990 MB/s

test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured

$ ./x86_64-apple-darwin/stage1/bin/rustc ./gistfile1.rs --test --opt-level=3 -o after
warning: no debug symbols in executable (-arch x86_64)
$ ./after --bench

running 1 test
test bench::aes_bench_x8 ... bench: 2103 ns/iter (+/- 175) = 6086 MB/s

test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured

$ ./x86_64-apple-darwin/stage1/bin/rustc ./gistfile1.rs --test --opt-level=3 -o after --llvm-args '-inline-threshold=225'
warning: no debug symbols in executable (-arch x86_64)
$ ./after --bench

running 1 test
test bench::aes_bench_x8 ... bench: 1600 ns/iter (+/- 71) = 8000 MB/s

test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured

```
This commit is contained in:
bors 2013-08-30 19:20:36 -07:00
commit 206ad61fd8
5 changed files with 100 additions and 68 deletions

View File

@ -216,7 +216,7 @@ pub mod write {
use lib;
use std::c_str::ToCStr;
use std::libc::c_uint;
use std::libc::{c_uint, c_int};
use std::path::Path;
use std::run;
use std::str;
@ -257,17 +257,7 @@ pub mod write {
}
}
// Copy what clan does by turning on loop vectorization at O2 and
// slp vectorization at O3
let vectorize_loop = !sess.no_vectorize_loops() &&
(sess.opts.optimize == session::Default ||
sess.opts.optimize == session::Aggressive);
let vectorize_slp = !sess.no_vectorize_slp() &&
sess.opts.optimize == session::Aggressive;
llvm::LLVMRustSetLLVMOptions(sess.print_llvm_passes(),
vectorize_loop,
vectorize_slp,
sess.time_llvm_passes());
configure_llvm(sess);
let OptLevel = match sess.opts.optimize {
session::No => lib::llvm::CodeGenLevelNone,
@ -293,12 +283,9 @@ pub mod write {
// Create the two optimizing pass managers. These mirror what clang
// does, and are by populated by LLVM's default PassManagerBuilder.
// Each manager has a different set of passes, but they also share
// some common passes. Each one is initialized with the analyis
// passes the target requires, and then further passes are added.
// some common passes.
let fpm = llvm::LLVMCreateFunctionPassManagerForModule(llmod);
let mpm = llvm::LLVMCreatePassManager();
llvm::LLVMRustAddAnalysisPasses(tm, fpm, llmod);
llvm::LLVMRustAddAnalysisPasses(tm, mpm, llmod);
// If we're verifying or linting, add them to the function pass
// manager.
@ -308,32 +295,11 @@ pub mod write {
if !sess.no_verify() { assert!(addpass("verify")); }
if sess.lint_llvm() { assert!(addpass("lint")); }
// Create the PassManagerBuilder for LLVM. We configure it with
// reasonable defaults and prepare it to actually populate the pass
// manager.
let builder = llvm::LLVMPassManagerBuilderCreate();
match sess.opts.optimize {
session::No => {
// Don't add lifetime intrinsics add O0
llvm::LLVMRustAddAlwaysInlinePass(builder, false);
}
// numeric values copied from clang
session::Less => {
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
225);
}
session::Default | session::Aggressive => {
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
275);
}
if !sess.no_prepopulate_passes() {
llvm::LLVMRustAddAnalysisPasses(tm, fpm, llmod);
llvm::LLVMRustAddAnalysisPasses(tm, mpm, llmod);
populate_llvm_passess(fpm, mpm, llmod, OptLevel);
}
llvm::LLVMPassManagerBuilderSetOptLevel(builder, OptLevel as c_uint);
llvm::LLVMRustAddBuilderLibraryInfo(builder, llmod);
// Use the builder to populate the function/module pass managers.
llvm::LLVMPassManagerBuilderPopulateFunctionPassManager(builder, fpm);
llvm::LLVMPassManagerBuilderPopulateModulePassManager(builder, mpm);
llvm::LLVMPassManagerBuilderDispose(builder);
for pass in sess.opts.custom_passes.iter() {
do pass.with_c_str |s| {
@ -424,6 +390,74 @@ pub mod write {
sess.abort_if_errors();
}
}
unsafe fn configure_llvm(sess: Session) {
// Copy what clan does by turning on loop vectorization at O2 and
// slp vectorization at O3
let vectorize_loop = !sess.no_vectorize_loops() &&
(sess.opts.optimize == session::Default ||
sess.opts.optimize == session::Aggressive);
let vectorize_slp = !sess.no_vectorize_slp() &&
sess.opts.optimize == session::Aggressive;
let mut llvm_c_strs = ~[];
let mut llvm_args = ~[];
let add = |arg: &str| {
let s = arg.to_c_str();
llvm_args.push(s.with_ref(|p| p));
llvm_c_strs.push(s);
};
add("rustc"); // fake program name
add("-arm-enable-ehabi");
add("-arm-enable-ehabi-descriptors");
if vectorize_loop { add("-vectorize-loops"); }
if vectorize_slp { add("-vectorize-slp"); }
if sess.time_llvm_passes() { add("-time-passes"); }
if sess.print_llvm_passes() { add("-debug-pass=Structure"); }
for arg in sess.opts.llvm_args.iter() {
add(*arg);
}
do llvm_args.as_imm_buf |p, len| {
llvm::LLVMRustSetLLVMOptions(len as c_int, p);
}
}
unsafe fn populate_llvm_passess(fpm: lib::llvm::PassManagerRef,
mpm: lib::llvm::PassManagerRef,
llmod: ModuleRef,
opt: lib::llvm::CodeGenOptLevel) {
// Create the PassManagerBuilder for LLVM. We configure it with
// reasonable defaults and prepare it to actually populate the pass
// manager.
let builder = llvm::LLVMPassManagerBuilderCreate();
match opt {
lib::llvm::CodeGenLevelNone => {
// Don't add lifetime intrinsics add O0
llvm::LLVMRustAddAlwaysInlinePass(builder, false);
}
lib::llvm::CodeGenLevelLess => {
llvm::LLVMRustAddAlwaysInlinePass(builder, true);
}
// numeric values copied from clang
lib::llvm::CodeGenLevelDefault => {
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
225);
}
lib::llvm::CodeGenLevelAggressive => {
llvm::LLVMPassManagerBuilderUseInlinerWithThreshold(builder,
275);
}
}
llvm::LLVMPassManagerBuilderSetOptLevel(builder, opt as c_uint);
llvm::LLVMRustAddBuilderLibraryInfo(builder, llmod);
// Use the builder to populate the function/module pass managers.
llvm::LLVMPassManagerBuilderPopulateFunctionPassManager(builder, fpm);
llvm::LLVMPassManagerBuilderPopulateModulePassManager(builder, mpm);
llvm::LLVMPassManagerBuilderDispose(builder);
}
}

View File

@ -737,6 +737,14 @@ pub fn build_session_options(binary: @str,
}).collect()
}
};
let llvm_args = match getopts::opt_maybe_str(matches, "llvm-args") {
None => ~[],
Some(s) => {
s.split_iter(|c: char| c == ' ' || c == ',').map(|s| {
s.trim().to_owned()
}).collect()
}
};
let sopts = @session::options {
crate_type: crate_type,
@ -744,6 +752,7 @@ pub fn build_session_options(binary: @str,
gc: gc,
optimize: opt_level,
custom_passes: custom_passes,
llvm_args: llvm_args,
debuginfo: debuginfo,
extra_debuginfo: extra_debuginfo,
lint_opts: lint_opts,
@ -851,6 +860,8 @@ pub fn optgroups() -> ~[getopts::groups::OptGroup] {
Appends to the default list of passes to run for the \
specified current optimization level. A value of \
\"list\" will list all of the available passes", "NAMES"),
optopt("", "llvm-args", "A list of arguments to pass to llvm, comma \
separated", "ARGS"),
optopt( "", "out-dir",
"Write output to compiler-chosen filename
in <dir>", "DIR"),

View File

@ -79,6 +79,7 @@ pub static once_fns: uint = 1 << 26;
pub static print_llvm_passes: uint = 1 << 27;
pub static no_vectorize_loops: uint = 1 << 28;
pub static no_vectorize_slp: uint = 1 << 29;
pub static no_prepopulate_passes: uint = 1 << 30;
pub fn debugging_opts_map() -> ~[(~str, ~str, uint)] {
~[(~"verbose", ~"in general, enable more debug printouts", verbose),
@ -126,6 +127,10 @@ pub fn debugging_opts_map() -> ~[(~str, ~str, uint)] {
(~"print-llvm-passes",
~"Prints the llvm optimization passes being run",
print_llvm_passes),
(~"no-prepopulate-passes",
~"Don't pre-populate the pass managers with a list of passes, only use \
the passes from --passes",
no_prepopulate_passes),
(~"no-vectorize-loops",
~"Don't run the loop vectorization optimization passes",
no_vectorize_loops),
@ -152,6 +157,7 @@ pub struct options {
gc: bool,
optimize: OptLevel,
custom_passes: ~[~str],
llvm_args: ~[~str],
debuginfo: bool,
extra_debuginfo: bool,
lint_opts: ~[(lint::lint, lint::level)],
@ -320,6 +326,9 @@ impl Session_ {
pub fn print_llvm_passes(@self) -> bool {
self.debugging_opt(print_llvm_passes)
}
pub fn no_prepopulate_passes(@self) -> bool {
self.debugging_opt(no_prepopulate_passes)
}
pub fn no_vectorize_loops(@self) -> bool {
self.debugging_opt(no_vectorize_loops)
}
@ -351,6 +360,7 @@ pub fn basic_options() -> @options {
gc: false,
optimize: No,
custom_passes: ~[],
llvm_args: ~[],
debuginfo: false,
extra_debuginfo: false,
lint_opts: ~[],

View File

@ -191,6 +191,7 @@ pub enum AsmDialect {
AD_Intel = 1
}
#[deriving(Eq)]
pub enum CodeGenOptLevel {
CodeGenLevelNone = 0,
CodeGenLevelLess = 1,
@ -2123,10 +2124,7 @@ pub mod llvm {
pub fn LLVMRustPrintModule(PM: PassManagerRef,
M: ModuleRef,
Output: *c_char);
pub fn LLVMRustSetLLVMOptions(PrintPasses: bool,
VectorizeLoops: bool,
VectorizeSLP: bool,
TimePasses: bool);
pub fn LLVMRustSetLLVMOptions(Argc: c_int, Argv: **c_char);
pub fn LLVMRustPrintPasses();
pub fn LLVMRustSetNormalizedTarget(M: ModuleRef, triple: *c_char);
pub fn LLVMRustAddAlwaysInlinePass(P: PassManagerBuilderRef,

View File

@ -143,36 +143,15 @@ LLVMRustRunFunctionPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
}
extern "C" void
LLVMRustSetLLVMOptions(bool PrintPasses,
bool VectorizeLoops,
bool VectorizeSLP,
bool TimePasses) {
LLVMRustSetLLVMOptions(int Argc, char **Argv) {
// Initializing the command-line options more than once is not allowed. So,
// check if they've already been initialized. (This could happen if we're
// being called from rustpkg, for example). If the arguments change, then
// that's just kinda unfortunate.
static bool initialized = false;
if (initialized) return;
int argc = 3;
const char *argv[20] = {"rustc",
"-arm-enable-ehabi",
"-arm-enable-ehabi-descriptors"};
if (PrintPasses) {
argv[argc++] = "-debug-pass";
argv[argc++] = "Structure";
}
if (VectorizeLoops) {
argv[argc++] = "-vectorize-loops";
}
if (VectorizeSLP) {
argv[argc++] = "-vectorize-slp";
}
if (TimePasses) {
argv[argc++] = "-time-passes";
}
cl::ParseCommandLineOptions(argc, argv);
initialized = true;
cl::ParseCommandLineOptions(Argc, Argv);
}
extern "C" bool