Auto merge of #32066 - ruud-v-a:fma, r=alexcrichton
This adds support for fused multiply-add and multiply-subtract vector intrinsics for 128 and 256-bit vectors of `f32` and `f64`. These correspond to the intrinsics [listed here](https://software.intel.com/en-us/node/523929) except for the `_ss` and `_sd` variants. The intrinsics added are: * `fmadd` * `fmaddsub` * `fmsub` * `fmsubadd` * `fnmadd` * `fnmsub` The “fma” target feature must be enabled by passing `-C target-feature=+fma` to rustc when using these, otherwise LLVM will complain. I verified locally that the `x86_mm256_fmadd_ps` and `x86_mm256_fmsub_ps` work.
This commit is contained in:
commit
6d262db448
@ -691,7 +691,7 @@ def parse_args():
|
||||
parser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout,
|
||||
help = 'File to output to (default stdout).')
|
||||
parser.add_argument('-i', '--info', type=argparse.FileType('r'),
|
||||
help = 'File containing platform specific information to merge into'
|
||||
help = 'File containing platform specific information to merge into '
|
||||
'the input files\' header.')
|
||||
parser.add_argument('in_', metavar="FILE", type=argparse.FileType('r'), nargs='+',
|
||||
help = 'JSON files to load')
|
||||
@ -735,12 +735,12 @@ class CompilerDefs(object):
|
||||
|
||||
use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}};
|
||||
use IntrinsicDef::Named;
|
||||
use rustc::middle::ty;
|
||||
use rustc::middle::ty::TyCtxt;
|
||||
|
||||
// The default inlining settings trigger a pathological behaviour in
|
||||
// LLVM, which causes makes compilation very slow. See #28273.
|
||||
#[inline(never)]
|
||||
pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {{
|
||||
pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {{
|
||||
if !name.starts_with("{0}") {{ return None }}
|
||||
Some(match &name["{0}".len()..] {{'''.format(platform.intrinsic_prefix())
|
||||
|
||||
|
47
src/etc/platform-intrinsics/x86/fma.json
Normal file
47
src/etc/platform-intrinsics/x86/fma.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"llvm_prefix": "llvm.x86.fma.",
|
||||
"intrinsics": [
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_fmadd_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "vfmadd.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["0", "0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_fmaddsub_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "vfmaddsub.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["0", "0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_fmsub_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "vfmsub.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["0", "0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_fmsubadd_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "vfmsubadd.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["0", "0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_fnmadd_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "vfnmadd.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["0", "0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_fnmsub_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "vfnmsub.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["0", "0", "0"]
|
||||
}
|
||||
]
|
||||
}
|
@ -1108,6 +1108,126 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
||||
output: v(u(16), 16),
|
||||
definition: Named("llvm.x86.avx2.psubus.w")
|
||||
},
|
||||
"_fmadd_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.fma.vfmadd.ps")
|
||||
},
|
||||
"_fmadd_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.fma.vfmadd.pd")
|
||||
},
|
||||
"256_fmadd_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.fma.vfmadd.ps.256")
|
||||
},
|
||||
"256_fmadd_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.fma.vfmadd.pd.256")
|
||||
},
|
||||
"_fmaddsub_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.fma.vfmaddsub.ps")
|
||||
},
|
||||
"_fmaddsub_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.fma.vfmaddsub.pd")
|
||||
},
|
||||
"256_fmaddsub_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.fma.vfmaddsub.ps.256")
|
||||
},
|
||||
"256_fmaddsub_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.fma.vfmaddsub.pd.256")
|
||||
},
|
||||
"_fmsub_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.fma.vfmsub.ps")
|
||||
},
|
||||
"_fmsub_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.fma.vfmsub.pd")
|
||||
},
|
||||
"256_fmsub_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.fma.vfmsub.ps.256")
|
||||
},
|
||||
"256_fmsub_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.fma.vfmsub.pd.256")
|
||||
},
|
||||
"_fmsubadd_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.fma.vfmsubadd.ps")
|
||||
},
|
||||
"_fmsubadd_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.fma.vfmsubadd.pd")
|
||||
},
|
||||
"256_fmsubadd_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.fma.vfmsubadd.ps.256")
|
||||
},
|
||||
"256_fmsubadd_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.fma.vfmsubadd.pd.256")
|
||||
},
|
||||
"_fnmadd_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.fma.vfnmadd.ps")
|
||||
},
|
||||
"_fnmadd_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.fma.vfnmadd.pd")
|
||||
},
|
||||
"256_fnmadd_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.fma.vfnmadd.ps.256")
|
||||
},
|
||||
"256_fnmadd_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.fma.vfnmadd.pd.256")
|
||||
},
|
||||
"_fnmsub_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.fma.vfnmsub.ps")
|
||||
},
|
||||
"_fnmsub_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.fma.vfnmsub.pd")
|
||||
},
|
||||
"256_fnmsub_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.fma.vfnmsub.ps.256")
|
||||
},
|
||||
"256_fnmsub_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.fma.vfnmsub.pd.256")
|
||||
},
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user