Add support for detecting CPU features

This commit is contained in:
Antoni Boucher 2023-07-04 21:04:15 -04:00
parent 48a3613a73
commit 91e04000ea
5 changed files with 50 additions and 52 deletions

4
Cargo.lock generated
View File

@ -77,7 +77,7 @@ dependencies = [
[[package]]
name = "gccjit"
version = "1.0.0"
source = "git+https://github.com/antoyo/gccjit.rs#2f6b60543d0f72003a2d19430d446dae27b06753"
source = "git+https://github.com/antoyo/gccjit.rs#79c8bb49ff09b7f40a04055203a5f3894a266210"
dependencies = [
"gccjit_sys",
]
@ -85,7 +85,7 @@ dependencies = [
[[package]]
name = "gccjit_sys"
version = "0.0.1"
source = "git+https://github.com/antoyo/gccjit.rs#2f6b60543d0f72003a2d19430d446dae27b06753"
source = "git+https://github.com/antoyo/gccjit.rs#79c8bb49ff09b7f40a04055203a5f3894a266210"
dependencies = [
"libc",
]

View File

@ -14,9 +14,7 @@ A secondary goal is to check if using the gcc backend will provide any run-time
## Building
**This requires a patched libgccjit in order to work.
The patches in [this repository](https://github.com/antoyo/libgccjit-patches) need to be applied.
(Those patches should work when applied on master, but in case it doesn't work, they are known to work when applied on 079c23cfe079f203d5df83fea8e92a60c7d7e878.)
You can also use my [fork of gcc](https://github.com/antoyo/gcc) which already includes these patches.**
You need to use my [fork of gcc](https://github.com/antoyo/gcc) which already includes these patches.**
To build it (most of these instructions come from [here](https://gcc.gnu.org/onlinedocs/jit/internals/index.html), so don't hesitate to take a look there if you encounter an issue):

View File

@ -21,7 +21,6 @@ tests/ui/sepcomp/sepcomp-fns-backwards.rs
tests/ui/sepcomp/sepcomp-fns.rs
tests/ui/sepcomp/sepcomp-statics.rs
tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
tests/ui/sse2.rs
tests/ui/target-feature/missing-plusminus.rs
tests/ui/asm/x86_64/may_unwind.rs
tests/ui/backtrace.rs

View File

@ -1,10 +1,12 @@
use std::collections::HashSet;
use std::env;
use std::sync::Arc;
use std::time::Instant;
use gccjit::{
Context,
FunctionType,
GlobalKind,
GlobalKind, TargetInfo,
};
use rustc_middle::dep_graph;
use rustc_middle::ty::TyCtxt;
@ -63,7 +65,7 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
}
}
pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_integers: bool) -> (ModuleCodegen<GccContext>, u64) {
pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc<TargetInfo>) -> (ModuleCodegen<GccContext>, u64) {
let prof_timer = tcx.prof.generic_activity("codegen_module");
let start_time = Instant::now();
@ -71,7 +73,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
let (module, _) = tcx.dep_graph.with_task(
dep_node,
tcx,
(cgu_name, supports_128bit_integers),
(cgu_name, target_info),
module_codegen,
Some(dep_graph::hash_result),
);
@ -82,7 +84,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
// the time we needed for codegenning it.
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, supports_128bit_integers): (Symbol, bool)) -> ModuleCodegen<GccContext> {
fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, Arc<TargetInfo>)) -> ModuleCodegen<GccContext> {
let cgu = tcx.codegen_unit(cgu_name);
// Instantiate monomorphizations without filling out definitions yet...
//let llvm_module = ModuleLlvm::new(tcx, &cgu_name.as_str());
@ -91,29 +93,36 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
context.add_command_line_option("-fexceptions");
context.add_driver_option("-fexceptions");
let disabled_features: HashSet<_> = tcx.sess.opts.cg.target_feature.split(',')
.filter(|feature| feature.starts_with('-'))
.map(|string| &string[1..])
.collect();
let add_cpu_feature_flag = |feature: &str| {
// FIXME(antoyo): some tests cause a segfault in GCC when not enabling all these
// features.
if (true || target_info.cpu_supports(feature)) && !disabled_features.contains(feature) {
context.add_command_line_option(&format!("-m{}", feature));
}
};
// TODO(antoyo): only set on x86 platforms.
context.add_command_line_option("-masm=intel");
// TODO(antoyo): only add the following cli argument if the feature is supported.
context.add_command_line_option("-msse2");
context.add_command_line_option("-mavx2");
// FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
// Only add if the CPU supports it.
context.add_command_line_option("-msha");
let features = ["sse2", "avx", "avx2", "sha", "fma", "gfni", "f16c", "aes", "bmi2", "rtm",
"vaes", "vpclmulqdq", "xsavec",
];
for feature in &features {
add_cpu_feature_flag(feature);
}
// TODO(antoyo): only add the following cli arguments if the feature is supported.
context.add_command_line_option("-mpclmul");
context.add_command_line_option("-mfma");
context.add_command_line_option("-mfma4");
context.add_command_line_option("-m64");
context.add_command_line_option("-mbmi");
context.add_command_line_option("-mgfni");
//context.add_command_line_option("-mavxvnni"); // The CI doesn't support this option.
context.add_command_line_option("-mf16c");
context.add_command_line_option("-maes");
context.add_command_line_option("-mxsavec");
context.add_command_line_option("-mbmi2");
context.add_command_line_option("-mrtm");
context.add_command_line_option("-mvaes");
context.add_command_line_option("-mvpclmulqdq");
context.add_command_line_option("-mavx");
for arg in &tcx.sess.opts.cg.llvm_args {
context.add_command_line_option(arg);
@ -156,7 +165,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
context.set_allow_unreachable_blocks(true);
{
let cx = CodegenCx::new(&context, cgu, tcx, supports_128bit_integers);
let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int());
let mono_items = cgu.items_in_deterministic_order(tcx);
for &(mono_item, (linkage, visibility)) in &mono_items {

View File

@ -35,7 +35,6 @@ extern crate rustc_middle;
extern crate rustc_session;
extern crate rustc_span;
extern crate rustc_target;
extern crate tempfile;
// This prevents duplicating functions and statics that are already part of the host rustc process.
#[allow(unused_extern_crates)]
@ -64,10 +63,10 @@ mod type_;
mod type_of;
use std::any::Any;
use std::sync::{Arc, Mutex};
use std::sync::Arc;
use crate::errors::LTONotSupported;
use gccjit::{Context, OptimizationLevel, CType};
use gccjit::{Context, OptimizationLevel, TargetInfo};
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
use rustc_codegen_ssa::base::codegen_crate;
@ -86,7 +85,6 @@ use rustc_session::config::{Lto, OptLevel, OutputFilenames};
use rustc_session::Session;
use rustc_span::Symbol;
use rustc_span::fatal_error::FatalError;
use tempfile::TempDir;
fluent_messages! { "../messages.ftl" }
@ -102,7 +100,7 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
#[derive(Clone)]
pub struct GccCodegenBackend {
supports_128bit_integers: Arc<Mutex<bool>>,
target_info: Arc<TargetInfo>,
}
impl CodegenBackend for GccCodegenBackend {
@ -116,15 +114,6 @@ impl CodegenBackend for GccCodegenBackend {
if sess.lto() != Lto::No {
sess.emit_warning(LTONotSupported {});
}
let temp_dir = TempDir::new().expect("cannot create temporary directory");
let temp_file = temp_dir.into_path().join("result.asm");
let check_context = Context::default();
check_context.set_print_errors_to_stderr(false);
let _int128_ty = check_context.new_c_type(CType::UInt128t);
// NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
*self.supports_128bit_integers.lock().expect("lock") = check_context.get_last_error() == Ok(None);
}
fn provide(&self, providers: &mut Providers) {
@ -160,7 +149,7 @@ impl CodegenBackend for GccCodegenBackend {
}
fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
target_features(sess, allow_unstable)
target_features(sess, allow_unstable, &self.target_info)
}
}
@ -174,7 +163,7 @@ impl ExtraBackendMethods for GccCodegenBackend {
}
fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen<Self::Module>, u64) {
base::compile_codegen_unit(tcx, cgu_name, *self.supports_128bit_integers.lock().expect("lock"))
base::compile_codegen_unit(tcx, cgu_name, Arc::clone(&self.target_info))
}
fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn<Self> {
@ -273,8 +262,17 @@ impl WriteBackendMethods for GccCodegenBackend {
/// This is the entrypoint for a hot plugged rustc_codegen_gccjit
#[no_mangle]
pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
// Get the native arch and check whether the target supports 128-bit integers.
let context = Context::default();
let arch = context.get_target_info().arch().unwrap();
// Get the second TargetInfo with the correct CPU features by setting the arch.
let context = Context::default();
context.add_driver_option(&format!("-march={}", arch.to_str().unwrap()));
let target_info = Arc::new(context.get_target_info());
Box::new(GccCodegenBackend {
supports_128bit_integers: Arc::new(Mutex::new(false)),
target_info,
})
}
@ -308,7 +306,7 @@ pub fn target_cpu(sess: &Session) -> &str {
}
}
pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &Arc<TargetInfo>) -> Vec<Symbol> {
supported_target_features(sess)
.iter()
.filter_map(
@ -317,14 +315,9 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
},
)
.filter(|_feature| {
// TODO(antoyo): implement a way to get enabled feature in libgccjit.
// Probably using the equivalent of __builtin_cpu_supports.
// TODO(antoyo): maybe use whatever outputs the following command:
// gcc -march=native -Q --help=target
#[cfg(feature="master")]
{
// NOTE: the CPU in the CI doesn't support sse4a, so disable it to make the stdarch tests pass in the CI.
(_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512") && !_feature.contains("sse4a")
target_info.cpu_supports(_feature)
}
#[cfg(not(feature="master"))]
{
@ -336,7 +329,6 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
*/
//false
})
.map(|feature| Symbol::intern(feature))
.collect()