From 8872163b32011dd546e69d349d9c5de22cc218b8 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Sat, 5 Mar 2016 15:52:08 +0100 Subject: [PATCH 1/3] Define x86 fused multiply-add intrinsics This defines the following intrinsics for 128 and 256 bit vectors of f32 and f64: * `fmadd` * `fmaddsub` * `fmsub` * `fmsubadd` * `fnmadd` * `fnmsub` The `_sd` and `_ss` variants are not included yet. Intel intrinsic reference: https://software.intel.com/en-us/node/523929 The intrinsics there are listed under AVX2, but in the Intel Intrinsic Guide they are part of the "FMA" technology, and LLVM puts them under FMA, not AVX2. --- src/etc/platform-intrinsics/x86/fma.json | 47 ++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/etc/platform-intrinsics/x86/fma.json diff --git a/src/etc/platform-intrinsics/x86/fma.json b/src/etc/platform-intrinsics/x86/fma.json new file mode 100644 index 00000000000..c922d166c8f --- /dev/null +++ b/src/etc/platform-intrinsics/x86/fma.json @@ -0,0 +1,47 @@ +{ + "llvm_prefix": "llvm.x86.fma.", + "intrinsics": [ + { + "intrinsic": "{0.width_mm}_fmadd_{0.data_type}", + "width": [128, 256], + "llvm": "vfmadd.{0.data_type_short}{0.width_suffix}", + "ret": "f(32-64)", + "args": ["0", "0", "0"] + }, + { + "intrinsic": "{0.width_mm}_fmaddsub_{0.data_type}", + "width": [128, 256], + "llvm": "vfmaddsub.{0.data_type_short}{0.width_suffix}", + "ret": "f(32-64)", + "args": ["0", "0", "0"] + }, + { + "intrinsic": "{0.width_mm}_fmsub_{0.data_type}", + "width": [128, 256], + "llvm": "vfmsub.{0.data_type_short}{0.width_suffix}", + "ret": "f(32-64)", + "args": ["0", "0", "0"] + }, + { + "intrinsic": "{0.width_mm}_fmsubadd_{0.data_type}", + "width": [128, 256], + "llvm": "vfmsubadd.{0.data_type_short}{0.width_suffix}", + "ret": "f(32-64)", + "args": ["0", "0", "0"] + }, + { + "intrinsic": "{0.width_mm}_fnmadd_{0.data_type}", + "width": [128, 256], + "llvm": "vfnmadd.{0.data_type_short}{0.width_suffix}", + "ret": "f(32-64)", + "args": ["0", "0", "0"] + }, + { + "intrinsic": "{0.width_mm}_fnmsub_{0.data_type}", + "width": [128, 256], + "llvm": "vfnmsub.{0.data_type_short}{0.width_suffix}", + "ret": "f(32-64)", + "args": ["0", "0", "0"] + } + ] +} From 0ce0cf1c87012dca1f21513566eab8b3210b029b Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Sat, 5 Mar 2016 16:17:55 +0100 Subject: [PATCH 2/3] Update platform intrinsic generator script The file it generates had been modified, but instead the generator should have been modified, and the file regenerated. This merges the modifications into the template in the generator. --- src/etc/platform-intrinsics/generator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py index e3aa4e688d3..0e0d4841063 100644 --- a/src/etc/platform-intrinsics/generator.py +++ b/src/etc/platform-intrinsics/generator.py @@ -691,7 +691,7 @@ def parse_args(): parser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help = 'File to output to (default stdout).') parser.add_argument('-i', '--info', type=argparse.FileType('r'), - help = 'File containing platform specific information to merge into' + help = 'File containing platform specific information to merge into ' 'the input files\' header.') parser.add_argument('in_', metavar="FILE", type=argparse.FileType('r'), nargs='+', help = 'JSON files to load') @@ -735,12 +735,12 @@ class CompilerDefs(object): use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}}; use IntrinsicDef::Named; -use rustc::middle::ty; +use rustc::middle::ty::TyCtxt; // The default inlining settings trigger a pathological behaviour in // LLVM, which causes makes compilation very slow. See #28273. #[inline(never)] -pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option {{ +pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option {{ if !name.starts_with("{0}") {{ return None }} Some(match &name["{0}".len()..] {{'''.format(platform.intrinsic_prefix()) From a409076df4ac1e80d0e8b4ed55608cbd354129ef Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Sat, 5 Mar 2016 16:25:58 +0100 Subject: [PATCH 3/3] Regenerate x86 platform intrinsics The exact command used was: $ cd src/etc/platform-intrinsics/x86 $ python2 ../generator.py --format compiler-defs -i info.json \ sse.json sse2.json sse3.json ssse3.json sse41.json sse42.json \ avx.json avx2.json fma.json \ > ../../../librustc_platform_intrinsics/x86.rs --- src/librustc_platform_intrinsics/x86.rs | 120 ++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs index 4a9b9970caf..168ae79ab74 100644 --- a/src/librustc_platform_intrinsics/x86.rs +++ b/src/librustc_platform_intrinsics/x86.rs @@ -1108,6 +1108,126 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option { output: v(u(16), 16), definition: Named("llvm.x86.avx2.psubus.w") }, + "_fmadd_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.fma.vfmadd.ps") + }, + "_fmadd_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.fma.vfmadd.pd") + }, + "256_fmadd_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.fma.vfmadd.ps.256") + }, + "256_fmadd_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.fma.vfmadd.pd.256") + }, + "_fmaddsub_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.fma.vfmaddsub.ps") + }, + "_fmaddsub_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.fma.vfmaddsub.pd") + }, + "256_fmaddsub_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.fma.vfmaddsub.ps.256") + }, + "256_fmaddsub_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.fma.vfmaddsub.pd.256") + }, + "_fmsub_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.fma.vfmsub.ps") + }, + "_fmsub_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.fma.vfmsub.pd") + }, + "256_fmsub_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.fma.vfmsub.ps.256") + }, + "256_fmsub_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.fma.vfmsub.pd.256") + }, + "_fmsubadd_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.fma.vfmsubadd.ps") + }, + "_fmsubadd_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.fma.vfmsubadd.pd") + }, + "256_fmsubadd_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.fma.vfmsubadd.ps.256") + }, + "256_fmsubadd_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.fma.vfmsubadd.pd.256") + }, + "_fnmadd_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.fma.vfnmadd.ps") + }, + "_fnmadd_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.fma.vfnmadd.pd") + }, + "256_fnmadd_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.fma.vfnmadd.ps.256") + }, + "256_fnmadd_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.fma.vfnmadd.pd.256") + }, + "_fnmsub_ps" => Intrinsic { + inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.fma.vfnmsub.ps") + }, + "_fnmsub_pd" => Intrinsic { + inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.fma.vfnmsub.pd") + }, + "256_fnmsub_ps" => Intrinsic { + inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.fma.vfnmsub.ps.256") + }, + "256_fnmsub_pd" => Intrinsic { + inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.fma.vfnmsub.pd.256") + }, _ => return None, }) }