Auto merge of #76830 - Artoria2e5:tune, r=nagisa
Pass tune-cpu to LLVM I think this is how it should work... See https://internals.rust-lang.org/t/expose-tune-cpu-from-llvm/13088 for the background. Or the documentation diff.
This commit is contained in:
commit
f54072bb81
@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
|
||||
);
|
||||
}
|
||||
|
||||
pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
|
||||
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
|
||||
let tune_cpu = SmallCStr::new(tune);
|
||||
llvm::AddFunctionAttrStringValue(
|
||||
llfn,
|
||||
llvm::AttributePlace::Function,
|
||||
const_cstr!("tune-cpu"),
|
||||
tune_cpu.as_c_str(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the `NonLazyBind` LLVM attribute on a given function,
|
||||
/// assuming the codegen options allow skipping the PLT.
|
||||
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
|
||||
@ -303,6 +315,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
|
||||
// Without this, ThinLTO won't inline Rust functions into Clang generated
|
||||
// functions (because Clang annotates functions this way too).
|
||||
apply_target_cpu_attr(cx, llfn);
|
||||
// tune-cpu is only conveyed through the attribute for our purpose.
|
||||
// The target doesn't care; the subtarget reads our attribute.
|
||||
apply_tune_cpu_attr(cx, llfn);
|
||||
|
||||
let features = llvm_target_features(cx.tcx.sess)
|
||||
.map(|s| s.to_string())
|
||||
|
@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
|
||||
}
|
||||
|
||||
fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
|
||||
attributes::apply_target_cpu_attr(self, llfn)
|
||||
attributes::apply_target_cpu_attr(self, llfn);
|
||||
attributes::apply_tune_cpu_attr(self, llfn);
|
||||
}
|
||||
|
||||
fn create_used_variable(&self) {
|
||||
|
@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
|
||||
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
|
||||
llvm_util::target_cpu(sess)
|
||||
}
|
||||
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
|
||||
llvm_util::tune_cpu(sess)
|
||||
}
|
||||
}
|
||||
|
||||
impl WriteBackendMethods for LlvmCodegenBackend {
|
||||
|
@ -202,11 +202,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn target_cpu(sess: &Session) -> &str {
|
||||
let name = match sess.opts.cg.target_cpu {
|
||||
Some(ref s) => &**s,
|
||||
None => &*sess.target.target.options.cpu,
|
||||
};
|
||||
fn handle_native(name: &str) -> &str {
|
||||
if name != "native" {
|
||||
return name;
|
||||
}
|
||||
@ -217,3 +213,19 @@ pub fn target_cpu(sess: &Session) -> &str {
|
||||
str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn target_cpu(sess: &Session) -> &str {
|
||||
let name = match sess.opts.cg.target_cpu {
|
||||
Some(ref s) => &**s,
|
||||
None => &*sess.target.target.options.cpu,
|
||||
};
|
||||
|
||||
handle_native(name)
|
||||
}
|
||||
|
||||
pub fn tune_cpu(sess: &Session) -> Option<&str> {
|
||||
match sess.opts.debugging_opts.tune_cpu {
|
||||
Some(ref s) => Some(handle_native(&**s)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
@ -124,4 +124,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
|
||||
opt_level: config::OptLevel,
|
||||
) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
|
||||
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
|
||||
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
|
||||
}
|
||||
|
@ -585,6 +585,7 @@ fn test_debugging_options_tracking_hash() {
|
||||
tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
|
||||
tracked!(teach, true);
|
||||
tracked!(thinlto, Some(true));
|
||||
tracked!(tune_cpu, Some(String::from("abc")));
|
||||
tracked!(tls_model, Some(TlsModel::GeneralDynamic));
|
||||
tracked!(treat_err_as_bug, Some(1));
|
||||
tracked!(unleash_the_miri_inside_of_you, true);
|
||||
|
@ -1078,6 +1078,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
|
||||
"show extended diagnostic help (default: no)"),
|
||||
terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
|
||||
"set the current terminal width"),
|
||||
tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
|
||||
"select processor to schedule for (`rustc --print target-cpus` for details)"),
|
||||
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
||||
"enable ThinLTO when possible"),
|
||||
// We default to 1 here since we want to behave like
|
||||
|
@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
|
||||
This instructs `rustc` to generate code specifically for a particular processor.
|
||||
|
||||
You can run `rustc --print target-cpus` to see the valid options to pass
|
||||
here. Additionally, `native` can be passed to use the processor of the host
|
||||
machine. Each target has a default base CPU.
|
||||
here. Each target has a default base CPU. Special values include:
|
||||
|
||||
* `native` can be passed to use the processor of the host machine.
|
||||
* `generic` refers to an LLVM target with minimal features but modern tuning.
|
||||
|
||||
## target-feature
|
||||
|
||||
@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
|
||||
Each target and [`target-cpu`](#target-cpu) has a default set of enabled
|
||||
features.
|
||||
|
||||
## tune-cpu
|
||||
|
||||
This instructs `rustc` to schedule code specifically for a particular
|
||||
processor. This does not affect the compatibility (instruction sets or ABI),
|
||||
but should make your code slightly more efficient on the selected CPU.
|
||||
|
||||
The valid options are the same as those for [`target-cpu`](#target-cpu).
|
||||
The default is `None`, which LLVM translates as the `target-cpu`.
|
||||
|
||||
This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.
|
||||
|
||||
Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
|
||||
effective only for x86 targets.
|
||||
|
||||
[option-emit]: ../command-line-arguments.md#option-emit
|
||||
[option-o-optimize]: ../command-line-arguments.md#option-o-optimize
|
||||
[profile-guided optimization]: ../profile-guided-optimization.md
|
||||
|
21
src/test/codegen/tune-cpu-on-functions.rs
Normal file
21
src/test/codegen/tune-cpu-on-functions.rs
Normal file
@ -0,0 +1,21 @@
|
||||
// This test makes sure that functions get annotated with the proper
|
||||
// "tune-cpu" attribute in LLVM.
|
||||
|
||||
// no-prefer-dynamic
|
||||
// ignore-tidy-linelength
|
||||
// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic
|
||||
|
||||
#![crate_type = "staticlib"]
|
||||
|
||||
// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
|
||||
#[no_mangle]
|
||||
pub extern fn exported() {
|
||||
not_exported();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: ; tune_cpu_on_functions::not_exported
|
||||
// CHECK-NEXT: ; Function Attrs:
|
||||
// CHECK-NEXT: define {{.*}}() {{.*}} #0
|
||||
fn not_exported() {}
|
||||
|
||||
// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"
|
Loading…
x
Reference in New Issue
Block a user