Add support for detecting CPU features
This commit is contained in:
parent
48a3613a73
commit
91e04000ea
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -77,7 +77,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "gccjit"
|
||||
version = "1.0.0"
|
||||
source = "git+https://github.com/antoyo/gccjit.rs#2f6b60543d0f72003a2d19430d446dae27b06753"
|
||||
source = "git+https://github.com/antoyo/gccjit.rs#79c8bb49ff09b7f40a04055203a5f3894a266210"
|
||||
dependencies = [
|
||||
"gccjit_sys",
|
||||
]
|
||||
@ -85,7 +85,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "gccjit_sys"
|
||||
version = "0.0.1"
|
||||
source = "git+https://github.com/antoyo/gccjit.rs#2f6b60543d0f72003a2d19430d446dae27b06753"
|
||||
source = "git+https://github.com/antoyo/gccjit.rs#79c8bb49ff09b7f40a04055203a5f3894a266210"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -14,9 +14,7 @@ A secondary goal is to check if using the gcc backend will provide any run-time
|
||||
## Building
|
||||
|
||||
**This requires a patched libgccjit in order to work.
|
||||
The patches in [this repository](https://github.com/antoyo/libgccjit-patches) need to be applied.
|
||||
(Those patches should work when applied on master, but in case it doesn't work, they are known to work when applied on 079c23cfe079f203d5df83fea8e92a60c7d7e878.)
|
||||
You can also use my [fork of gcc](https://github.com/antoyo/gcc) which already includes these patches.**
|
||||
You need to use my [fork of gcc](https://github.com/antoyo/gcc) which already includes these patches.**
|
||||
|
||||
To build it (most of these instructions come from [here](https://gcc.gnu.org/onlinedocs/jit/internals/index.html), so don't hesitate to take a look there if you encounter an issue):
|
||||
|
||||
|
@ -21,7 +21,6 @@ tests/ui/sepcomp/sepcomp-fns-backwards.rs
|
||||
tests/ui/sepcomp/sepcomp-fns.rs
|
||||
tests/ui/sepcomp/sepcomp-statics.rs
|
||||
tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
|
||||
tests/ui/sse2.rs
|
||||
tests/ui/target-feature/missing-plusminus.rs
|
||||
tests/ui/asm/x86_64/may_unwind.rs
|
||||
tests/ui/backtrace.rs
|
||||
|
51
src/base.rs
51
src/base.rs
@ -1,10 +1,12 @@
|
||||
use std::collections::HashSet;
|
||||
use std::env;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use gccjit::{
|
||||
Context,
|
||||
FunctionType,
|
||||
GlobalKind,
|
||||
GlobalKind, TargetInfo,
|
||||
};
|
||||
use rustc_middle::dep_graph;
|
||||
use rustc_middle::ty::TyCtxt;
|
||||
@ -63,7 +65,7 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_integers: bool) -> (ModuleCodegen<GccContext>, u64) {
|
||||
pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc<TargetInfo>) -> (ModuleCodegen<GccContext>, u64) {
|
||||
let prof_timer = tcx.prof.generic_activity("codegen_module");
|
||||
let start_time = Instant::now();
|
||||
|
||||
@ -71,7 +73,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
|
||||
let (module, _) = tcx.dep_graph.with_task(
|
||||
dep_node,
|
||||
tcx,
|
||||
(cgu_name, supports_128bit_integers),
|
||||
(cgu_name, target_info),
|
||||
module_codegen,
|
||||
Some(dep_graph::hash_result),
|
||||
);
|
||||
@ -82,7 +84,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
|
||||
// the time we needed for codegenning it.
|
||||
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
|
||||
|
||||
fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, supports_128bit_integers): (Symbol, bool)) -> ModuleCodegen<GccContext> {
|
||||
fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, Arc<TargetInfo>)) -> ModuleCodegen<GccContext> {
|
||||
let cgu = tcx.codegen_unit(cgu_name);
|
||||
// Instantiate monomorphizations without filling out definitions yet...
|
||||
//let llvm_module = ModuleLlvm::new(tcx, &cgu_name.as_str());
|
||||
@ -91,29 +93,36 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
|
||||
context.add_command_line_option("-fexceptions");
|
||||
context.add_driver_option("-fexceptions");
|
||||
|
||||
let disabled_features: HashSet<_> = tcx.sess.opts.cg.target_feature.split(',')
|
||||
.filter(|feature| feature.starts_with('-'))
|
||||
.map(|string| &string[1..])
|
||||
.collect();
|
||||
|
||||
let add_cpu_feature_flag = |feature: &str| {
|
||||
// FIXME(antoyo): some tests cause a segfault in GCC when not enabling all these
|
||||
// features.
|
||||
if (true || target_info.cpu_supports(feature)) && !disabled_features.contains(feature) {
|
||||
context.add_command_line_option(&format!("-m{}", feature));
|
||||
}
|
||||
};
|
||||
|
||||
// TODO(antoyo): only set on x86 platforms.
|
||||
context.add_command_line_option("-masm=intel");
|
||||
// TODO(antoyo): only add the following cli argument if the feature is supported.
|
||||
context.add_command_line_option("-msse2");
|
||||
context.add_command_line_option("-mavx2");
|
||||
// FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
|
||||
// Only add if the CPU supports it.
|
||||
context.add_command_line_option("-msha");
|
||||
|
||||
let features = ["sse2", "avx", "avx2", "sha", "fma", "gfni", "f16c", "aes", "bmi2", "rtm",
|
||||
"vaes", "vpclmulqdq", "xsavec",
|
||||
];
|
||||
|
||||
for feature in &features {
|
||||
add_cpu_feature_flag(feature);
|
||||
}
|
||||
|
||||
// TODO(antoyo): only add the following cli arguments if the feature is supported.
|
||||
context.add_command_line_option("-mpclmul");
|
||||
context.add_command_line_option("-mfma");
|
||||
context.add_command_line_option("-mfma4");
|
||||
context.add_command_line_option("-m64");
|
||||
context.add_command_line_option("-mbmi");
|
||||
context.add_command_line_option("-mgfni");
|
||||
//context.add_command_line_option("-mavxvnni"); // The CI doesn't support this option.
|
||||
context.add_command_line_option("-mf16c");
|
||||
context.add_command_line_option("-maes");
|
||||
context.add_command_line_option("-mxsavec");
|
||||
context.add_command_line_option("-mbmi2");
|
||||
context.add_command_line_option("-mrtm");
|
||||
context.add_command_line_option("-mvaes");
|
||||
context.add_command_line_option("-mvpclmulqdq");
|
||||
context.add_command_line_option("-mavx");
|
||||
|
||||
for arg in &tcx.sess.opts.cg.llvm_args {
|
||||
context.add_command_line_option(arg);
|
||||
@ -156,7 +165,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
|
||||
context.set_allow_unreachable_blocks(true);
|
||||
|
||||
{
|
||||
let cx = CodegenCx::new(&context, cgu, tcx, supports_128bit_integers);
|
||||
let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int());
|
||||
|
||||
let mono_items = cgu.items_in_deterministic_order(tcx);
|
||||
for &(mono_item, (linkage, visibility)) in &mono_items {
|
||||
|
42
src/lib.rs
42
src/lib.rs
@ -35,7 +35,6 @@ extern crate rustc_middle;
|
||||
extern crate rustc_session;
|
||||
extern crate rustc_span;
|
||||
extern crate rustc_target;
|
||||
extern crate tempfile;
|
||||
|
||||
// This prevents duplicating functions and statics that are already part of the host rustc process.
|
||||
#[allow(unused_extern_crates)]
|
||||
@ -64,10 +63,10 @@ mod type_;
|
||||
mod type_of;
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::errors::LTONotSupported;
|
||||
use gccjit::{Context, OptimizationLevel, CType};
|
||||
use gccjit::{Context, OptimizationLevel, TargetInfo};
|
||||
use rustc_ast::expand::allocator::AllocatorKind;
|
||||
use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
|
||||
use rustc_codegen_ssa::base::codegen_crate;
|
||||
@ -86,7 +85,6 @@ use rustc_session::config::{Lto, OptLevel, OutputFilenames};
|
||||
use rustc_session::Session;
|
||||
use rustc_span::Symbol;
|
||||
use rustc_span::fatal_error::FatalError;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fluent_messages! { "../messages.ftl" }
|
||||
|
||||
@ -102,7 +100,7 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GccCodegenBackend {
|
||||
supports_128bit_integers: Arc<Mutex<bool>>,
|
||||
target_info: Arc<TargetInfo>,
|
||||
}
|
||||
|
||||
impl CodegenBackend for GccCodegenBackend {
|
||||
@ -116,15 +114,6 @@ impl CodegenBackend for GccCodegenBackend {
|
||||
if sess.lto() != Lto::No {
|
||||
sess.emit_warning(LTONotSupported {});
|
||||
}
|
||||
|
||||
let temp_dir = TempDir::new().expect("cannot create temporary directory");
|
||||
let temp_file = temp_dir.into_path().join("result.asm");
|
||||
let check_context = Context::default();
|
||||
check_context.set_print_errors_to_stderr(false);
|
||||
let _int128_ty = check_context.new_c_type(CType::UInt128t);
|
||||
// NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
|
||||
check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
|
||||
*self.supports_128bit_integers.lock().expect("lock") = check_context.get_last_error() == Ok(None);
|
||||
}
|
||||
|
||||
fn provide(&self, providers: &mut Providers) {
|
||||
@ -160,7 +149,7 @@ impl CodegenBackend for GccCodegenBackend {
|
||||
}
|
||||
|
||||
fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
|
||||
target_features(sess, allow_unstable)
|
||||
target_features(sess, allow_unstable, &self.target_info)
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,7 +163,7 @@ impl ExtraBackendMethods for GccCodegenBackend {
|
||||
}
|
||||
|
||||
fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen<Self::Module>, u64) {
|
||||
base::compile_codegen_unit(tcx, cgu_name, *self.supports_128bit_integers.lock().expect("lock"))
|
||||
base::compile_codegen_unit(tcx, cgu_name, Arc::clone(&self.target_info))
|
||||
}
|
||||
|
||||
fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn<Self> {
|
||||
@ -273,8 +262,17 @@ impl WriteBackendMethods for GccCodegenBackend {
|
||||
/// This is the entrypoint for a hot plugged rustc_codegen_gccjit
|
||||
#[no_mangle]
|
||||
pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
|
||||
// Get the native arch and check whether the target supports 128-bit integers.
|
||||
let context = Context::default();
|
||||
let arch = context.get_target_info().arch().unwrap();
|
||||
|
||||
// Get the second TargetInfo with the correct CPU features by setting the arch.
|
||||
let context = Context::default();
|
||||
context.add_driver_option(&format!("-march={}", arch.to_str().unwrap()));
|
||||
let target_info = Arc::new(context.get_target_info());
|
||||
|
||||
Box::new(GccCodegenBackend {
|
||||
supports_128bit_integers: Arc::new(Mutex::new(false)),
|
||||
target_info,
|
||||
})
|
||||
}
|
||||
|
||||
@ -308,7 +306,7 @@ pub fn target_cpu(sess: &Session) -> &str {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
|
||||
pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &Arc<TargetInfo>) -> Vec<Symbol> {
|
||||
supported_target_features(sess)
|
||||
.iter()
|
||||
.filter_map(
|
||||
@ -317,14 +315,9 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
|
||||
},
|
||||
)
|
||||
.filter(|_feature| {
|
||||
// TODO(antoyo): implement a way to get enabled feature in libgccjit.
|
||||
// Probably using the equivalent of __builtin_cpu_supports.
|
||||
// TODO(antoyo): maybe use whatever outputs the following command:
|
||||
// gcc -march=native -Q --help=target
|
||||
#[cfg(feature="master")]
|
||||
{
|
||||
// NOTE: the CPU in the CI doesn't support sse4a, so disable it to make the stdarch tests pass in the CI.
|
||||
(_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512") && !_feature.contains("sse4a")
|
||||
target_info.cpu_supports(_feature)
|
||||
}
|
||||
#[cfg(not(feature="master"))]
|
||||
{
|
||||
@ -336,7 +329,6 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
|
||||
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
|
||||
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
|
||||
*/
|
||||
//false
|
||||
})
|
||||
.map(|feature| Symbol::intern(feature))
|
||||
.collect()
|
||||
|
Loading…
x
Reference in New Issue
Block a user