Enable AutoFDO.
This largely involves implementing the options debug-info-for-profiling and profile-sample-use and forwarding them on to LLVM. AutoFDO can be used on x86-64 Linux like this: rustc -O -Cdebug-info-for-profiling main.rs -o main perf record -b ./main create_llvm_prof --binary=main --out=code.prof rustc -O -Cprofile-sample-use=code.prof main.rs -o main2 Now `main2` will have feedback directed optimization applied to it. The create_llvm_prof tool can be obtained from this github repository: https://github.com/google/autofdo Fixes #64892.
This commit is contained in:
parent
d7539a6af0
commit
a17193dbb9
@ -263,6 +263,10 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
|
||||
attributes::emit_uwtable(llfn, true);
|
||||
}
|
||||
|
||||
if cx.sess().opts.debugging_opts.profile_sample_use.is_some() {
|
||||
llvm::AddFunctionAttrString(llfn, Function, cstr!("use-sample-profile"));
|
||||
}
|
||||
|
||||
// FIXME: none of these three functions interact with source level attributes.
|
||||
set_frame_pointer_type(cx, llfn);
|
||||
set_instrument_function(cx, llfn);
|
||||
|
@ -370,6 +370,13 @@ fn get_pgo_use_path(config: &ModuleConfig) -> Option<CString> {
|
||||
.map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
|
||||
}
|
||||
|
||||
fn get_pgo_sample_use_path(config: &ModuleConfig) -> Option<CString> {
|
||||
config
|
||||
.pgo_sample_use
|
||||
.as_ref()
|
||||
.map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
|
||||
}
|
||||
|
||||
pub(crate) fn should_use_new_llvm_pass_manager(config: &ModuleConfig) -> bool {
|
||||
// The new pass manager is enabled by default for LLVM >= 13.
|
||||
// This matches Clang, which also enables it since Clang 13.
|
||||
@ -389,6 +396,7 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
|
||||
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
|
||||
let pgo_gen_path = get_pgo_gen_path(config);
|
||||
let pgo_use_path = get_pgo_use_path(config);
|
||||
let pgo_sample_use_path = get_pgo_sample_use_path(config);
|
||||
let is_lto = opt_stage == llvm::OptStage::ThinLTO || opt_stage == llvm::OptStage::FatLTO;
|
||||
// Sanitizer instrumentation is only inserted during the pre-link optimization stage.
|
||||
let sanitizer_options = if !is_lto {
|
||||
@ -439,6 +447,8 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
|
||||
pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
|
||||
config.instrument_coverage,
|
||||
config.instrument_gcov,
|
||||
pgo_sample_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
|
||||
config.debug_info_for_profiling,
|
||||
llvm_selfprofiler,
|
||||
selfprofile_before_pass_callback,
|
||||
selfprofile_after_pass_callback,
|
||||
@ -544,6 +554,9 @@ pub(crate) unsafe fn optimize(
|
||||
if config.instrument_coverage {
|
||||
llvm::LLVMRustAddPass(mpm, find_pass("instrprof").unwrap());
|
||||
}
|
||||
if config.debug_info_for_profiling {
|
||||
llvm::LLVMRustAddPass(mpm, find_pass("add-discriminators").unwrap());
|
||||
}
|
||||
|
||||
add_sanitizer_passes(config, &mut extra_passes);
|
||||
|
||||
@ -1001,6 +1014,7 @@ pub unsafe fn with_llvm_pmb(
|
||||
let inline_threshold = config.inline_threshold;
|
||||
let pgo_gen_path = get_pgo_gen_path(config);
|
||||
let pgo_use_path = get_pgo_use_path(config);
|
||||
let pgo_sample_use_path = get_pgo_sample_use_path(config);
|
||||
|
||||
llvm::LLVMRustConfigurePassManagerBuilder(
|
||||
builder,
|
||||
@ -1011,6 +1025,7 @@ pub unsafe fn with_llvm_pmb(
|
||||
prepare_for_thin_lto,
|
||||
pgo_gen_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
|
||||
pgo_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
|
||||
pgo_sample_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
|
||||
);
|
||||
|
||||
llvm::LLVMPassManagerBuilderSetSizeLevel(builder, opt_size as u32);
|
||||
|
@ -2176,6 +2176,7 @@ pub fn LLVMRustConfigurePassManagerBuilder(
|
||||
PrepareForThinLTO: bool,
|
||||
PGOGenPath: *const c_char,
|
||||
PGOUsePath: *const c_char,
|
||||
PGOSampleUsePath: *const c_char,
|
||||
);
|
||||
pub fn LLVMRustAddLibraryInfo(
|
||||
PM: &PassManager<'a>,
|
||||
@ -2210,6 +2211,8 @@ pub fn LLVMRustOptimizeWithNewPassManager(
|
||||
PGOUsePath: *const c_char,
|
||||
InstrumentCoverage: bool,
|
||||
InstrumentGCOV: bool,
|
||||
PGOSampleUsePath: *const c_char,
|
||||
DebugInfoForProfiling: bool,
|
||||
llvm_selfprofiler: *mut c_void,
|
||||
begin_callback: SelfProfileBeforePassCallback,
|
||||
end_callback: SelfProfileAfterPassCallback,
|
||||
|
@ -286,6 +286,9 @@ fn push_linker_plugin_lto_args(&mut self, plugin_path: Option<&OsStr>) {
|
||||
config::OptLevel::Aggressive => "O3",
|
||||
};
|
||||
|
||||
if let Some(path) = &self.sess.opts.debugging_opts.profile_sample_use {
|
||||
self.linker_arg(&format!("-plugin-opt=sample-profile={}", path.display()));
|
||||
};
|
||||
self.linker_arg(&format!("-plugin-opt={}", opt_level));
|
||||
self.linker_arg(&format!("-plugin-opt=mcpu={}", self.target_cpu));
|
||||
}
|
||||
|
@ -83,6 +83,8 @@ pub struct ModuleConfig {
|
||||
|
||||
pub pgo_gen: SwitchWithOptPath,
|
||||
pub pgo_use: Option<PathBuf>,
|
||||
pub pgo_sample_use: Option<PathBuf>,
|
||||
pub debug_info_for_profiling: bool,
|
||||
pub instrument_coverage: bool,
|
||||
pub instrument_gcov: bool,
|
||||
|
||||
@ -176,6 +178,8 @@ macro_rules! if_regular {
|
||||
SwitchWithOptPath::Disabled
|
||||
),
|
||||
pgo_use: if_regular!(sess.opts.cg.profile_use.clone(), None),
|
||||
pgo_sample_use: if_regular!(sess.opts.debugging_opts.profile_sample_use.clone(), None),
|
||||
debug_info_for_profiling: sess.opts.debugging_opts.debug_info_for_profiling,
|
||||
instrument_coverage: if_regular!(sess.instrument_coverage(), false),
|
||||
instrument_gcov: if_regular!(
|
||||
// compiler_builtins overrides the codegen-units settings,
|
||||
|
@ -715,6 +715,7 @@ macro_rules! tracked {
|
||||
tracked!(chalk, true);
|
||||
tracked!(codegen_backend, Some("abc".to_string()));
|
||||
tracked!(crate_attr, vec!["abc".to_string()]);
|
||||
tracked!(debug_info_for_profiling, true);
|
||||
tracked!(debug_macros, true);
|
||||
tracked!(dep_info_omit_d_target, true);
|
||||
tracked!(dual_proc_macros, true);
|
||||
@ -752,6 +753,7 @@ macro_rules! tracked {
|
||||
tracked!(profile, true);
|
||||
tracked!(profile_emit, Some(PathBuf::from("abc")));
|
||||
tracked!(profiler_runtime, "abc".to_string());
|
||||
tracked!(profile_sample_use, Some(PathBuf::from("abc")));
|
||||
tracked!(relax_elf_relocations, Some(true));
|
||||
tracked!(relro_level, Some(RelroLevel::Full));
|
||||
tracked!(remap_cwd_prefix, Some(PathBuf::from("abc")));
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#include "llvm/Transforms/IPO/AlwaysInliner.h"
|
||||
#include "llvm/Transforms/IPO/FunctionImport.h"
|
||||
#include "llvm/Transforms/Utils/AddDiscriminators.h"
|
||||
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
||||
#include "llvm/LTO/LTO.h"
|
||||
#include "llvm-c/Transforms/PassManagerBuilder.h"
|
||||
@ -39,6 +40,7 @@
|
||||
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
|
||||
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
|
||||
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
|
||||
#include "llvm/Transforms/Utils.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -523,7 +525,7 @@ extern "C" void LLVMRustDisposeTargetMachine(LLVMTargetMachineRef TM) {
|
||||
extern "C" void LLVMRustConfigurePassManagerBuilder(
|
||||
LLVMPassManagerBuilderRef PMBR, LLVMRustCodeGenOptLevel OptLevel,
|
||||
bool MergeFunctions, bool SLPVectorize, bool LoopVectorize, bool PrepareForThinLTO,
|
||||
const char* PGOGenPath, const char* PGOUsePath) {
|
||||
const char* PGOGenPath, const char* PGOUsePath, const char* PGOSampleUsePath) {
|
||||
unwrap(PMBR)->MergeFunctions = MergeFunctions;
|
||||
unwrap(PMBR)->SLPVectorize = SLPVectorize;
|
||||
unwrap(PMBR)->OptLevel = fromRust(OptLevel);
|
||||
@ -531,13 +533,14 @@ extern "C" void LLVMRustConfigurePassManagerBuilder(
|
||||
unwrap(PMBR)->PrepareForThinLTO = PrepareForThinLTO;
|
||||
|
||||
if (PGOGenPath) {
|
||||
assert(!PGOUsePath);
|
||||
assert(!PGOUsePath && !PGOSampleUsePath);
|
||||
unwrap(PMBR)->EnablePGOInstrGen = true;
|
||||
unwrap(PMBR)->PGOInstrGen = PGOGenPath;
|
||||
}
|
||||
if (PGOUsePath) {
|
||||
assert(!PGOGenPath);
|
||||
} else if (PGOUsePath) {
|
||||
assert(!PGOSampleUsePath);
|
||||
unwrap(PMBR)->PGOInstrUse = PGOUsePath;
|
||||
} else if (PGOSampleUsePath) {
|
||||
unwrap(PMBR)->PGOSampleUse = PGOSampleUsePath;
|
||||
}
|
||||
}
|
||||
|
||||
@ -759,6 +762,7 @@ LLVMRustOptimizeWithNewPassManager(
|
||||
LLVMRustSanitizerOptions *SanitizerOptions,
|
||||
const char *PGOGenPath, const char *PGOUsePath,
|
||||
bool InstrumentCoverage, bool InstrumentGCOV,
|
||||
const char *PGOSampleUsePath, bool DebugInfoForProfiling,
|
||||
void* LlvmSelfProfiler,
|
||||
LLVMRustSelfProfileBeforePassCallback BeforePassCallback,
|
||||
LLVMRustSelfProfileAfterPassCallback AfterPassCallback,
|
||||
@ -797,11 +801,19 @@ LLVMRustOptimizeWithNewPassManager(
|
||||
|
||||
Optional<PGOOptions> PGOOpt;
|
||||
if (PGOGenPath) {
|
||||
assert(!PGOUsePath);
|
||||
PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr);
|
||||
assert(!PGOUsePath && !PGOSampleUsePath);
|
||||
PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr,
|
||||
PGOOptions::NoCSAction, DebugInfoForProfiling);
|
||||
} else if (PGOUsePath) {
|
||||
assert(!PGOGenPath);
|
||||
PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse);
|
||||
assert(!PGOSampleUsePath);
|
||||
PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse,
|
||||
PGOOptions::NoCSAction, DebugInfoForProfiling);
|
||||
} else if (PGOSampleUsePath) {
|
||||
PGOOpt = PGOOptions(PGOSampleUsePath, "", "", PGOOptions::SampleUse,
|
||||
PGOOptions::NoCSAction, DebugInfoForProfiling);
|
||||
} else if (DebugInfoForProfiling) {
|
||||
PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
|
||||
PGOOptions::NoCSAction, DebugInfoForProfiling);
|
||||
}
|
||||
|
||||
#if LLVM_VERSION_GE(12, 0) && !LLVM_VERSION_GE(13,0)
|
||||
|
@ -2009,6 +2009,15 @@ pub fn build_session_options(matches: &getopts::Matches) -> Options {
|
||||
);
|
||||
}
|
||||
|
||||
if debugging_opts.profile_sample_use.is_some()
|
||||
&& (cg.profile_generate.enabled() || cg.profile_use.is_some())
|
||||
{
|
||||
early_error(
|
||||
error_format,
|
||||
"option `-Z profile-sample-use` cannot be used with `-C profile-generate` or `-C profile-use`",
|
||||
);
|
||||
}
|
||||
|
||||
if debugging_opts.instrument_coverage.is_some()
|
||||
&& debugging_opts.instrument_coverage != Some(InstrumentCoverage::Off)
|
||||
{
|
||||
|
@ -1040,6 +1040,8 @@ mod parse {
|
||||
"combine CGUs into a single one"),
|
||||
crate_attr: Vec<String> = (Vec::new(), parse_string_push, [TRACKED],
|
||||
"inject the given attribute in the crate"),
|
||||
debug_info_for_profiling: bool = (false, parse_bool, [TRACKED],
|
||||
"emit discriminators and other data necessary for AutoFDO"),
|
||||
debug_macros: bool = (false, parse_bool, [TRACKED],
|
||||
"emit line numbers debug info inside macros (default: no)"),
|
||||
deduplicate_diagnostics: bool = (true, parse_bool, [UNTRACKED],
|
||||
@ -1242,6 +1244,8 @@ mod parse {
|
||||
(default based on relative source path)"),
|
||||
profiler_runtime: String = (String::from("profiler_builtins"), parse_string, [TRACKED],
|
||||
"name of the profiler runtime crate to automatically inject (default: `profiler_builtins`)"),
|
||||
profile_sample_use: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
|
||||
"use the given `.prof` file for sampled profile-guided optimization (also known as AutoFDO)"),
|
||||
query_dep_graph: bool = (false, parse_bool, [UNTRACKED],
|
||||
"enable queries of the dependency graph for regression testing (default: no)"),
|
||||
query_stats: bool = (false, parse_bool, [UNTRACKED],
|
||||
|
@ -1353,6 +1353,16 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
|
||||
}
|
||||
}
|
||||
|
||||
// Do the same for sample profile data.
|
||||
if let Some(ref path) = sess.opts.debugging_opts.profile_sample_use {
|
||||
if !path.exists() {
|
||||
sess.err(&format!(
|
||||
"File `{}` passed to `-C profile-sample-use` does not exist.",
|
||||
path.display()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Unwind tables cannot be disabled if the target requires them.
|
||||
if let Some(include_uwtables) = sess.opts.cg.force_unwind_tables {
|
||||
if sess.target.requires_uwtable && !include_uwtables {
|
||||
|
@ -0,0 +1,35 @@
|
||||
# `debug-info-for-profiling
|
||||
|
||||
---
|
||||
|
||||
## Introduction
|
||||
|
||||
Automatic Feedback Directed Optimization (AFDO) is a method for using sampling
|
||||
based profiles to guide optimizations. This is contrasted with other methods of
|
||||
FDO or profile-guided optimization (PGO) which use instrumented profiling.
|
||||
|
||||
Unlike PGO (controlled by the `rustc` flags `-Cprofile-generate` and
|
||||
`-Cprofile-use`), a binary being profiled does not perform significantly worse,
|
||||
and thus it's possible to profile binaries used in real workflows and not
|
||||
necessary to construct artificial workflows.
|
||||
|
||||
## Use
|
||||
|
||||
In order to use AFDO, the target platform must be Linux running on an `x86_64`
|
||||
architecture with the performance profiler `perf` available. In addition, the
|
||||
external tool `create_llvm_prof` from [this repository] must be used.
|
||||
|
||||
Given a Rust file `main.rs`, we can produce an optimized binary as follows:
|
||||
|
||||
```shell
|
||||
rustc -O -Zdebug-info-for-profiling main.rs -o main
|
||||
perf record -b ./main
|
||||
create_llvm_prof --binary=main --out=code.prof
|
||||
rustc -O -Zprofile-sample-use=code.prof main.rs -o main2
|
||||
```
|
||||
|
||||
The `perf` command produces a profile `perf.data`, which is then used by the
|
||||
`create_llvm_prof` command to create `code.prof`. This final profile is then
|
||||
used by `rustc` to guide optimizations in producing the binary `main2`.
|
||||
|
||||
[this repository]: https://github.com/google/autofdo
|
@ -0,0 +1,10 @@
|
||||
# `profile-sample-use
|
||||
|
||||
---
|
||||
|
||||
`-Zprofile-sample-use=code.prof` directs `rustc` to use the profile
|
||||
`code.prof` as a source for Automatic Feedback Directed Optimization (AFDO).
|
||||
See the documentation of [`-Zdebug-info-for-profiling`] for more information
|
||||
on using AFDO.
|
||||
|
||||
[`-Zdebug-info-for-profiling`]: debug_info_for_profiling.html
|
Loading…
Reference in New Issue
Block a user