Merge pull request #190 from rust-lang/feature/more-simd

Feature/more simd
This commit is contained in:
antoyo 2022-07-07 00:10:17 -04:00 committed by GitHub
commit 0d687bd829
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 226 additions and 23 deletions

11
Cargo.lock generated
View File

@ -41,7 +41,7 @@ dependencies = [
[[package]]
name = "gccjit"
version = "1.0.0"
source = "git+https://github.com/antoyo/gccjit.rs#bdb86fb5092895ff5589726b33250010c64d93f6"
source = "git+https://github.com/antoyo/gccjit.rs#a8997afb665dc467c1bdbddf04877143683f0cce"
dependencies = [
"gccjit_sys",
]
@ -49,7 +49,7 @@ dependencies = [
[[package]]
name = "gccjit_sys"
version = "0.0.1"
source = "git+https://github.com/antoyo/gccjit.rs#bdb86fb5092895ff5589726b33250010c64d93f6"
source = "git+https://github.com/antoyo/gccjit.rs#a8997afb665dc467c1bdbddf04877143683f0cce"
dependencies = [
"libc 0.1.12",
]
@ -215,6 +215,7 @@ dependencies = [
"ar",
"gccjit",
"lang_tester",
"smallvec",
"target-lexicon",
"tempfile",
]
@ -228,6 +229,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "smallvec"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
[[package]]
name = "target-lexicon"
version = "0.10.0"

View File

@ -27,6 +27,7 @@ gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
# Local copy.
#gccjit = { path = "../gccjit.rs" }
smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }
target-lexicon = "0.10.0"
ar = "0.8.0"

View File

@ -0,0 +1,39 @@
From c3821e02fbd6cb5ad6e06d759fccdc9073712375 Mon Sep 17 00:00:00 2001
From: Antoni Boucher <bouanto@zoho.com>
Date: Tue, 7 Jun 2022 21:40:13 -0400
Subject: [PATCH] Add stdarch Cargo.toml for testing
---
library/stdarch/Cargo.toml | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
create mode 100644 library/stdarch/Cargo.toml
diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml
new file mode 100644
index 0000000..fbe0a95
--- /dev/null
+++ b/library/stdarch/Cargo.toml
@@ -0,0 +1,20 @@
+[workspace]
+members = [
+ "crates/core_arch",
+ "crates/std_detect",
+ "crates/stdarch-gen",
+ "examples/"
+]
+exclude = [
+ "crates/wasm-assert-instr-tests"
+]
+
+[profile.release]
+debug = true
+opt-level = 3
+incremental = true
+
+[profile.bench]
+debug = 1
+opt-level = 3
+incremental = true
--
2.26.2.7.g19db9cfb68.dirty

112
src/attributes.rs Normal file
View File

@ -0,0 +1,112 @@
#[cfg_attr(not(feature="master"), allow(unused_imports))]
use gccjit::FnAttribute;
use gccjit::Function;
use rustc_attr::InstructionSetAttr;
use rustc_codegen_ssa::target_features::tied_target_features;
use rustc_data_structures::fx::FxHashMap;
use rustc_middle::ty;
use rustc_session::Session;
use rustc_span::symbol::sym;
use smallvec::{smallvec, SmallVec};
use crate::context::CodegenCx;
// Given a map from target_features to whether they are enabled or disabled,
// ensure only valid combinations are allowed.
pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> {
for tied in tied_target_features(sess) {
// Tied features must be set to the same value, or not set at all
let mut tied_iter = tied.iter();
let enabled = features.get(tied_iter.next().unwrap());
if tied_iter.any(|feature| enabled != features.get(feature)) {
return Some(tied);
}
}
None
}
// TODO(antoyo): maybe move to a new module gcc_util.
// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
match (arch, s) {
("x86", "sse4.2") => smallvec!["sse4.2", "crc32"],
("x86", "pclmulqdq") => smallvec!["pclmul"],
("x86", "rdrand") => smallvec!["rdrnd"],
("x86", "bmi1") => smallvec!["bmi"],
("x86", "cmpxchg16b") => smallvec!["cx16"],
("x86", "avx512vaes") => smallvec!["vaes"],
("x86", "avx512gfni") => smallvec!["gfni"],
("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"],
// NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'.
("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"],
// NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'.
("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"],
("aarch64", "rcpc2") => smallvec!["rcpc-immo"],
("aarch64", "dpb") => smallvec!["ccpp"],
("aarch64", "dpb2") => smallvec!["ccdp"],
("aarch64", "frintts") => smallvec!["fptoint"],
("aarch64", "fcma") => smallvec!["complxnum"],
("aarch64", "pmuv3") => smallvec!["perfmon"],
("aarch64", "paca") => smallvec!["pauth"],
("aarch64", "pacg") => smallvec!["pauth"],
// Rust ties fp and neon together. In LLVM neon implicitly enables fp,
// but we manually enable neon when a feature only implicitly enables fp
("aarch64", "f32mm") => smallvec!["f32mm", "neon"],
("aarch64", "f64mm") => smallvec!["f64mm", "neon"],
("aarch64", "fhm") => smallvec!["fp16fml", "neon"],
("aarch64", "fp16") => smallvec!["fullfp16", "neon"],
("aarch64", "jsconv") => smallvec!["jsconv", "neon"],
("aarch64", "sve") => smallvec!["sve", "neon"],
("aarch64", "sve2") => smallvec!["sve2", "neon"],
("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"],
("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"],
("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"],
("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
(_, s) => smallvec![s],
}
}
/// Composite function which sets GCC attributes for function depending on its AST (`#[attribute]`)
/// attributes.
pub fn from_fn_attrs<'gcc, 'tcx>(
cx: &CodegenCx<'gcc, 'tcx>,
#[cfg_attr(not(feature="master"), allow(unused_variables))]
func: Function<'gcc>,
instance: ty::Instance<'tcx>,
) {
let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
let function_features =
codegen_fn_attrs.target_features.iter().map(|features| features.as_str()).collect::<Vec<&str>>();
if let Some(features) = check_tied_features(cx.tcx.sess, &function_features.iter().map(|features| (*features, true)).collect()) {
let span = cx.tcx
.get_attr(instance.def_id(), sym::target_feature)
.map_or_else(|| cx.tcx.def_span(instance.def_id()), |a| a.span);
let msg = format!("the target features {} must all be either enabled or disabled together", features.join(", "));
let mut err = cx.tcx.sess.struct_span_err(span, &msg);
err.help("add the missing features in a `target_feature` attribute");
err.emit();
return;
}
let mut function_features = function_features
.iter()
.flat_map(|feat| to_gcc_features(cx.tcx.sess, feat).into_iter())
.chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
InstructionSetAttr::ArmA32 => "-thumb-mode", // TODO(antoyo): support removing feature.
InstructionSetAttr::ArmT32 => "thumb-mode",
}))
.collect::<Vec<_>>();
// TODO(antoyo): check if we really need global backend features. (Maybe they could be applied
// globally?)
let mut global_features = cx.tcx.global_backend_features(()).iter().map(|s| s.as_str());
function_features.extend(&mut global_features);
let target_features = function_features.join(",");
if !target_features.is_empty() {
#[cfg(feature="master")]
func.add_attribute(FnAttribute::Target, &target_features);
}
}

View File

@ -83,15 +83,23 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_
context.add_command_line_option("-mavx2");
// FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
// Only add if the CPU supports it.
/*context.add_command_line_option("-mavx512f");
context.add_command_line_option("-msha");
context.add_command_line_option("-mpclmul");
context.add_command_line_option("-mfma");
context.add_command_line_option("-mfma4");
context.add_command_line_option("-mavx512vpopcntdq");
context.add_command_line_option("-mavx512vl");
context.add_command_line_option("-m64");
context.add_command_line_option("-mbmi");*/
context.add_command_line_option("-mbmi");
context.add_command_line_option("-mgfni");
context.add_command_line_option("-mavxvnni");
context.add_command_line_option("-mf16c");
context.add_command_line_option("-maes");
context.add_command_line_option("-mxsavec");
context.add_command_line_option("-mbmi2");
context.add_command_line_option("-mrtm");
context.add_command_line_option("-mvaes");
context.add_command_line_option("-mvpclmulqdq");
context.add_command_line_option("-mavx");
for arg in &tcx.sess.opts.cg.llvm_args {
context.add_command_line_option(arg);
}

View File

@ -213,7 +213,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
let actual_ty = actual_val.get_type();
if expected_ty != actual_ty {
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() {
if !actual_ty.is_vector() && !expected_ty.is_vector() && (actual_ty.is_integral() && expected_ty.is_integral()) || (actual_ty.get_pointee().is_some() && expected_ty.get_pointee().is_some()) {
self.context.new_cast(None, actual_val, expected_ty)
}
else if on_stack_param_indices.contains(&index) {
@ -1490,6 +1490,9 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
let zeros = self.context.new_rvalue_from_vector(None, cond_type, &zeros);
let masks = self.context.new_comparison(None, ComparisonOp::NotEquals, cond, zeros);
// NOTE: masks is a vector of integers, but the values can be vectors of floats, so use bitcast to make
// the & operation work.
let masks = self.bitcast_if_needed(masks, then_val.get_type());
let then_vals = masks & then_val;
let ones = vec![self.context.new_rvalue_one(element_type); num_units];
@ -1509,6 +1512,16 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
fn difference_or_zero<'gcc>(a: RValue<'gcc>, b: RValue<'gcc>, context: &'gcc Context<'gcc>) -> RValue<'gcc> {
let difference = a - b;
let masks = context.new_comparison(None, ComparisonOp::GreaterThanEquals, b, a);
// NOTE: masks is a vector of integers, but the values can be vectors of floats, so use bitcast to make
// the & operation work.
let a_type = a.get_type();
let masks =
if masks.get_type() != a_type {
context.new_bitcast(None, masks, a_type)
}
else {
masks
};
difference & masks
}

View File

@ -4,6 +4,7 @@ use rustc_middle::ty::{self, Instance, TypeFoldable};
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt};
use crate::abi::FnAbiGccExt;
use crate::attributes;
use crate::context::CodegenCx;
/// Codegens a reference to a fn/method item, monomorphizing and
@ -67,8 +68,12 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
cx.linkage.set(FunctionType::Extern);
let func = cx.declare_fn(&sym, &fn_abi);
attributes::from_fn_attrs(cx, func, instance);
// TODO(antoyo): set linkage and attributes.
func
// FIXME(antoyo): this is a wrong cast. That requires changing the compiler API.
unsafe { std::mem::transmute(func) }
};
cx.function_instances.borrow_mut().insert(instance, func);

View File

@ -177,8 +177,18 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
}
let value = self.const_uint_big(self.type_ix(bitsize), data);
// TODO(bjorn3): assert size is correct
self.const_bitcast(value, ty)
let bytesize = layout.size(self).bytes();
if bitsize > 1 && ty.is_integral() && bytesize as u32 == ty.get_size() {
// NOTE: since the intrinsic _xabort is called with a bitcast, which
// is non-const, but expects a constant, do a normal cast instead of a bitcast.
// FIXME(antoyo): fix bitcast to work in constant contexts.
// TODO(antoyo): perhaps only use bitcast for pointers?
self.context.new_cast(None, value, ty)
}
else {
// TODO(bjorn3): assert size is correct
self.const_bitcast(value, ty)
}
}
Scalar::Ptr(ptr, _size) => {
let (alloc_id, offset) = ptr.into_parts();

View File

@ -79,12 +79,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
unsafe { std::mem::transmute(func) }
}
pub fn declare_fn(&self, name: &str, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> RValue<'gcc> {
pub fn declare_fn(&self, name: &str, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Function<'gcc> {
let (return_type, params, variadic, on_stack_param_indices) = fn_abi.gcc_type(self);
let func = declare_raw_fn(self, name, () /*fn_abi.llvm_cconv()*/, return_type, &params, variadic);
self.on_stack_function_params.borrow_mut().insert(func, on_stack_param_indices);
// FIXME(antoyo): this is a wrong cast. That requires changing the compiler API.
unsafe { std::mem::transmute(func) }
func
}
pub fn define_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {

View File

@ -287,9 +287,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, mut return_value: RValue<'gcc>, func_name: &str, args: &[RValue<'gcc>], args_adjusted: bool) -> RValue<'gcc> {
match func_name {
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
let zero = builder.context.new_rvalue_zero(builder.int_type);
#[cfg(feature="master")]
{
let zero = builder.context.new_rvalue_zero(builder.int_type);
return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
}
},

View File

@ -1,4 +1,6 @@
use gccjit::{BinaryOp, RValue, Type, ToRValue, ComparisonOp, UnaryOp};
#[cfg_attr(not(feature="master"), allow(unused_imports))]
use gccjit::{ToRValue, ComparisonOp, UnaryOp};
use gccjit::{BinaryOp, RValue, Type};
use rustc_codegen_ssa::base::compare_simd_types;
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
use rustc_codegen_ssa::mir::operand::OperandRef;
@ -216,7 +218,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
let variable = bx.current_func().new_local(None, vector.get_type(), "new_vector");
bx.llbb().add_assignment(None, variable, vector);
let lvalue = bx.context.new_vector_access(None, variable.to_rvalue(), index);
// TODO: if simd_insert is constant, use BIT_REF.
// TODO(antoyo): if simd_insert is constant, use BIT_REF.
bx.llbb().add_assignment(None, lvalue, value);
return Ok(variable.to_rvalue());
}
@ -545,9 +547,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
},
(true, true) => {
// Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
// TODO: improve using conditional operators if possible.
// TODO(antoyo): improve using conditional operators if possible.
let arg_type = lhs.get_type();
// TODO: convert lhs and rhs to unsigned.
// TODO(antoyo): convert lhs and rhs to unsigned.
let sum = lhs + rhs;
let vector_type = arg_type.dyncast_vector().expect("vector type");
let unit = vector_type.get_num_units();
@ -581,7 +583,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
// negative of the right operand. Find a proper subtraction algorithm.
let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
// TODO: convert lhs and rhs to unsigned.
// TODO(antoyo): convert lhs and rhs to unsigned.
let sum = lhs + rhs;
let vector_type = arg_type.dyncast_vector().expect("vector type");
let unit = vector_type.get_num_units();

View File

@ -13,6 +13,7 @@
#![warn(unused_lifetimes)]
extern crate rustc_ast;
extern crate rustc_attr;
extern crate rustc_codegen_ssa;
extern crate rustc_data_structures;
extern crate rustc_errors;
@ -32,6 +33,7 @@ mod abi;
mod allocator;
mod archive;
mod asm;
mod attributes;
mod back;
mod base;
mod builder;
@ -302,9 +304,11 @@ pub fn target_features(sess: &Session) -> Vec<Symbol> {
.filter(|_feature| {
// TODO(antoyo): implement a way to get enabled feature in libgccjit.
// Probably using the equivalent of __builtin_cpu_supports.
// TODO(antoyo): maybe use whatever outputs the following command:
// gcc -march=native -Q --help=target
#[cfg(feature="master")]
{
_feature.contains("sse") || _feature.contains("avx")
(_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512")
}
#[cfg(not(feature="master"))]
{

View File

@ -5,6 +5,7 @@ use rustc_middle::ty::{self, Instance, TypeFoldable};
use rustc_middle::ty::layout::{FnAbiOf, LayoutOf};
use rustc_span::def_id::DefId;
use crate::attributes;
use crate::base;
use crate::context::CodegenCx;
use crate::type_of::LayoutGccExt;
@ -28,9 +29,11 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty());
self.linkage.set(base::linkage_to_gcc(linkage));
let _decl = self.declare_fn(symbol_name, &fn_abi);
let decl = self.declare_fn(symbol_name, &fn_abi);
//let attrs = self.tcx.codegen_fn_attrs(instance.def_id());
attributes::from_fn_attrs(self, decl, instance);
// TODO(antoyo): call set_link_section() to allow initializing argc/argv.
// TODO(antoyo): set unique comdat.
// TODO(antoyo): use inline attribute from there in linkage.set() above.

View File

@ -107,7 +107,7 @@ else
fi
if (( $build_only == 1 )); then
echo "Since it's `build-only`, exiting..."
echo "Since it's 'build-only', exiting..."
exit
fi
@ -324,7 +324,7 @@ function all() {
}
if [ ${#funcs[@]} -eq 0 ]; then
echo "No command passed, running `--all`..."
echo "No command passed, running '--all'..."
all
else
for t in ${funcs[@]}; do