2019-06-08 18:47:18 +09:00
|
|
|
//! This module implements minimal run-time feature detection for x86.
|
|
|
|
//!
|
|
|
|
//! The features are detected using the `detect_features` function below.
|
|
|
|
//! This function uses the CPUID instruction to read the feature flags from the
|
2021-08-22 18:15:49 +02:00
|
|
|
//! CPU and encodes them in a `usize` where each bit position represents
|
2022-05-20 10:39:10 +09:00
|
|
|
//! whether a feature is available (bit is set) or unavailable (bit is cleared).
|
2019-06-08 18:47:18 +09:00
|
|
|
//!
|
|
|
|
//! The enum `Feature` is used to map bit positions to feature names, and the
|
|
|
|
//! the `__crate::detect::check_for!` macro is used to map string literals (e.g.,
|
|
|
|
//! "avx") to these bit positions (e.g., `Feature::avx`).
|
|
|
|
//!
|
|
|
|
//! The run-time feature detection is performed by the
|
|
|
|
//! `__crate::detect::check_for(Feature) -> bool` function. On its first call,
|
|
|
|
//! this functions queries the CPU for the available features and stores them
|
|
|
|
//! in a global `AtomicUsize` variable. The query is performed by just checking
|
|
|
|
//! whether the feature bit in this global variable is set or cleared.
|
|
|
|
|
|
|
|
/// A macro to test at *runtime* whether a CPU feature is available on
|
|
|
|
/// x86/x86-64 platforms.
|
|
|
|
///
|
|
|
|
/// This macro is provided in the standard library and will detect at runtime
|
|
|
|
/// whether the specified CPU feature is detected. This does **not** resolve at
|
|
|
|
/// compile time unless the specified feature is already enabled for the entire
|
|
|
|
/// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
|
|
|
|
///
|
|
|
|
/// This macro only takes one argument which is a string literal of the feature
|
|
|
|
/// being tested for. The feature names supported are the lowercase versions of
|
|
|
|
/// the ones defined by Intel in [their documentation][docs].
|
|
|
|
///
|
|
|
|
/// ## Supported arguments
|
|
|
|
///
|
|
|
|
/// This macro supports the same names that `#[target_feature]` supports. Unlike
|
|
|
|
/// `#[target_feature]`, however, this macro does not support names separated
|
|
|
|
/// with a comma. Instead testing for multiple features must be done through
|
|
|
|
/// separate macro invocations for now.
|
|
|
|
///
|
|
|
|
/// Supported arguments are:
|
|
|
|
///
|
|
|
|
/// * `"aes"`
|
|
|
|
/// * `"pclmulqdq"`
|
|
|
|
/// * `"rdrand"`
|
|
|
|
/// * `"rdseed"`
|
|
|
|
/// * `"tsc"`
|
|
|
|
/// * `"mmx"`
|
|
|
|
/// * `"sse"`
|
|
|
|
/// * `"sse2"`
|
|
|
|
/// * `"sse3"`
|
|
|
|
/// * `"ssse3"`
|
|
|
|
/// * `"sse4.1"`
|
|
|
|
/// * `"sse4.2"`
|
|
|
|
/// * `"sse4a"`
|
|
|
|
/// * `"sha"`
|
|
|
|
/// * `"avx"`
|
|
|
|
/// * `"avx2"`
|
|
|
|
/// * `"avx512f"`
|
|
|
|
/// * `"avx512cd"`
|
|
|
|
/// * `"avx512er"`
|
|
|
|
/// * `"avx512pf"`
|
|
|
|
/// * `"avx512bw"`
|
|
|
|
/// * `"avx512dq"`
|
|
|
|
/// * `"avx512vl"`
|
|
|
|
/// * `"avx512ifma"`
|
|
|
|
/// * `"avx512vbmi"`
|
|
|
|
/// * `"avx512vpopcntdq"`
|
|
|
|
/// * `"f16c"`
|
|
|
|
/// * `"fma"`
|
|
|
|
/// * `"bmi1"`
|
|
|
|
/// * `"bmi2"`
|
|
|
|
/// * `"abm"`
|
|
|
|
/// * `"lzcnt"`
|
|
|
|
/// * `"tbm"`
|
|
|
|
/// * `"popcnt"`
|
|
|
|
/// * `"fxsr"`
|
|
|
|
/// * `"xsave"`
|
|
|
|
/// * `"xsaveopt"`
|
|
|
|
/// * `"xsaves"`
|
|
|
|
/// * `"xsavec"`
|
|
|
|
/// * `"adx"`
|
|
|
|
/// * `"rtm"`
|
|
|
|
///
|
|
|
|
/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
|
|
|
|
#[macro_export]
|
|
|
|
#[stable(feature = "simd_x86", since = "1.27.0")]
|
|
|
|
#[allow_internal_unstable(stdsimd_internal,stdsimd)]
|
|
|
|
macro_rules! is_x86_feature_detected {
|
|
|
|
("aes") => {
|
|
|
|
cfg!(target_feature = "aes") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::aes) };
|
|
|
|
("pclmulqdq") => {
|
|
|
|
cfg!(target_feature = "pclmulqdq") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::pclmulqdq) };
|
|
|
|
("rdrand") => {
|
|
|
|
cfg!(target_feature = "rdrand") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::rdrand) };
|
|
|
|
("rdseed") => {
|
|
|
|
cfg!(target_feature = "rdseed") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::rdseed) };
|
|
|
|
("tsc") => {
|
|
|
|
cfg!(target_feature = "tsc") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::tsc) };
|
|
|
|
("mmx") => {
|
|
|
|
cfg!(target_feature = "mmx") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::mmx) };
|
|
|
|
("sse") => {
|
|
|
|
cfg!(target_feature = "sse") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::sse) };
|
|
|
|
("sse2") => {
|
|
|
|
cfg!(target_feature = "sse2") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::sse2)
|
|
|
|
};
|
|
|
|
("sse3") => {
|
|
|
|
cfg!(target_feature = "sse3") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::sse3)
|
|
|
|
};
|
|
|
|
("ssse3") => {
|
|
|
|
cfg!(target_feature = "ssse3") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::ssse3)
|
|
|
|
};
|
|
|
|
("sse4.1") => {
|
|
|
|
cfg!(target_feature = "sse4.1") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::sse4_1)
|
|
|
|
};
|
|
|
|
("sse4.2") => {
|
|
|
|
cfg!(target_feature = "sse4.2") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::sse4_2)
|
|
|
|
};
|
|
|
|
("sse4a") => {
|
|
|
|
cfg!(target_feature = "sse4a") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::sse4a)
|
|
|
|
};
|
|
|
|
("sha") => {
|
|
|
|
cfg!(target_feature = "sha") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::sha)
|
|
|
|
};
|
|
|
|
("avx") => {
|
|
|
|
cfg!(target_feature = "avx") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx)
|
|
|
|
};
|
|
|
|
("avx2") => {
|
|
|
|
cfg!(target_feature = "avx2") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx2)
|
|
|
|
};
|
|
|
|
("avx512f") => {
|
|
|
|
cfg!(target_feature = "avx512f") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512f)
|
|
|
|
};
|
|
|
|
("avx512cd") => {
|
|
|
|
cfg!(target_feature = "avx512cd") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512cd)
|
|
|
|
};
|
|
|
|
("avx512er") => {
|
|
|
|
cfg!(target_feature = "avx512er") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512er)
|
|
|
|
};
|
|
|
|
("avx512pf") => {
|
|
|
|
cfg!(target_feature = "avx512pf") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512pf)
|
|
|
|
};
|
|
|
|
("avx512bw") => {
|
|
|
|
cfg!(target_feature = "avx512bw") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512bw)
|
|
|
|
};
|
|
|
|
("avx512dq") => {
|
|
|
|
cfg!(target_feature = "avx512dq") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512dq)
|
|
|
|
};
|
|
|
|
("avx512vl") => {
|
|
|
|
cfg!(target_Feature = "avx512vl") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512vl)
|
|
|
|
};
|
|
|
|
("avx512ifma") => {
|
|
|
|
cfg!(target_feature = "avx512ifma") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512_ifma)
|
|
|
|
};
|
|
|
|
("avx512vbmi") => {
|
|
|
|
cfg!(target_feature = "avx512vbmi") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512_vbmi)
|
|
|
|
};
|
|
|
|
("avx512vpopcntdq") => {
|
|
|
|
cfg!(target_feature = "avx512vpopcntdq") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::avx512_vpopcntdq)
|
|
|
|
};
|
|
|
|
("f16c") => {
|
|
|
|
cfg!(target_feature = "f16c") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::f16c)
|
|
|
|
};
|
|
|
|
("fma") => {
|
|
|
|
cfg!(target_feature = "fma") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::fma)
|
|
|
|
};
|
|
|
|
("bmi1") => {
|
|
|
|
cfg!(target_feature = "bmi1") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::bmi)
|
|
|
|
};
|
|
|
|
("bmi2") => {
|
|
|
|
cfg!(target_feature = "bmi2") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::bmi2)
|
|
|
|
};
|
|
|
|
("abm") => {
|
|
|
|
cfg!(target_feature = "abm") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::abm)
|
|
|
|
};
|
|
|
|
("lzcnt") => {
|
|
|
|
cfg!(target_feature = "lzcnt") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::abm)
|
|
|
|
};
|
|
|
|
("tbm") => {
|
|
|
|
cfg!(target_feature = "tbm") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::tbm)
|
|
|
|
};
|
|
|
|
("popcnt") => {
|
|
|
|
cfg!(target_feature = "popcnt") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::popcnt)
|
|
|
|
};
|
|
|
|
("fxsr") => {
|
|
|
|
cfg!(target_feature = "fxsr") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::fxsr)
|
|
|
|
};
|
|
|
|
("xsave") => {
|
|
|
|
cfg!(target_feature = "xsave") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::xsave)
|
|
|
|
};
|
|
|
|
("xsaveopt") => {
|
|
|
|
cfg!(target_feature = "xsaveopt") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::xsaveopt)
|
|
|
|
};
|
|
|
|
("xsaves") => {
|
|
|
|
cfg!(target_feature = "xsaves") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::xsaves)
|
|
|
|
};
|
|
|
|
("xsavec") => {
|
|
|
|
cfg!(target_feature = "xsavec") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::xsavec)
|
|
|
|
};
|
|
|
|
("cmpxchg16b") => {
|
|
|
|
cfg!(target_feature = "cmpxchg16b") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::cmpxchg16b)
|
|
|
|
};
|
|
|
|
("adx") => {
|
|
|
|
cfg!(target_feature = "adx") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::adx)
|
|
|
|
};
|
|
|
|
("rtm") => {
|
|
|
|
cfg!(target_feature = "rtm") || $crate::detect::check_for(
|
|
|
|
$crate::detect::Feature::rtm)
|
|
|
|
};
|
|
|
|
($t:tt,) => {
|
|
|
|
is_x86_feature_detected!($t);
|
|
|
|
};
|
|
|
|
($t:tt) => {
|
|
|
|
compile_error!(concat!("unknown target feature: ", $t))
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
|
|
|
|
/// particular feature.
|
|
|
|
///
|
|
|
|
/// This is an unstable implementation detail subject to change.
|
|
|
|
#[allow(non_camel_case_types)]
|
|
|
|
#[repr(u8)]
|
|
|
|
#[doc(hidden)]
|
|
|
|
#[unstable(feature = "stdsimd_internal", issue = "0")]
|
|
|
|
pub enum Feature {
|
|
|
|
/// AES (Advanced Encryption Standard New Instructions AES-NI)
|
|
|
|
aes,
|
|
|
|
/// CLMUL (Carry-less Multiplication)
|
|
|
|
pclmulqdq,
|
|
|
|
/// RDRAND
|
|
|
|
rdrand,
|
|
|
|
/// RDSEED
|
|
|
|
rdseed,
|
|
|
|
/// TSC (Time Stamp Counter)
|
|
|
|
tsc,
|
|
|
|
/// MMX
|
|
|
|
mmx,
|
|
|
|
/// SSE (Streaming SIMD Extensions)
|
|
|
|
sse,
|
|
|
|
/// SSE2 (Streaming SIMD Extensions 2)
|
|
|
|
sse2,
|
|
|
|
/// SSE3 (Streaming SIMD Extensions 3)
|
|
|
|
sse3,
|
|
|
|
/// SSSE3 (Supplemental Streaming SIMD Extensions 3)
|
|
|
|
ssse3,
|
|
|
|
/// SSE4.1 (Streaming SIMD Extensions 4.1)
|
|
|
|
sse4_1,
|
|
|
|
/// SSE4.2 (Streaming SIMD Extensions 4.2)
|
|
|
|
sse4_2,
|
|
|
|
/// SSE4a (Streaming SIMD Extensions 4a)
|
|
|
|
sse4a,
|
|
|
|
/// SHA
|
|
|
|
sha,
|
|
|
|
/// AVX (Advanced Vector Extensions)
|
|
|
|
avx,
|
|
|
|
/// AVX2 (Advanced Vector Extensions 2)
|
|
|
|
avx2,
|
|
|
|
/// AVX-512 F (Foundation)
|
|
|
|
avx512f,
|
|
|
|
/// AVX-512 CD (Conflict Detection Instructions)
|
|
|
|
avx512cd,
|
|
|
|
/// AVX-512 ER (Exponential and Reciprocal Instructions)
|
|
|
|
avx512er,
|
|
|
|
/// AVX-512 PF (Prefetch Instructions)
|
|
|
|
avx512pf,
|
|
|
|
/// AVX-512 BW (Byte and Word Instructions)
|
|
|
|
avx512bw,
|
|
|
|
/// AVX-512 DQ (Doubleword and Quadword)
|
|
|
|
avx512dq,
|
|
|
|
/// AVX-512 VL (Vector Length Extensions)
|
|
|
|
avx512vl,
|
|
|
|
/// AVX-512 IFMA (Integer Fused Multiply Add)
|
|
|
|
avx512_ifma,
|
|
|
|
/// AVX-512 VBMI (Vector Byte Manipulation Instructions)
|
|
|
|
avx512_vbmi,
|
|
|
|
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
|
|
|
|
/// Quadword)
|
|
|
|
avx512_vpopcntdq,
|
|
|
|
/// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
|
|
|
|
f16c,
|
|
|
|
/// FMA (Fused Multiply Add)
|
|
|
|
fma,
|
|
|
|
/// BMI1 (Bit Manipulation Instructions 1)
|
|
|
|
bmi,
|
|
|
|
/// BMI1 (Bit Manipulation Instructions 2)
|
|
|
|
bmi2,
|
|
|
|
/// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
|
|
|
|
/// Count) on Intel
|
|
|
|
abm,
|
|
|
|
/// TBM (Trailing Bit Manipulation)
|
|
|
|
tbm,
|
|
|
|
/// POPCNT (Population Count)
|
|
|
|
popcnt,
|
2022-08-07 02:53:03 +03:00
|
|
|
/// FXSR (Floating-point context fast save and restore)
|
2019-06-08 18:47:18 +09:00
|
|
|
fxsr,
|
|
|
|
/// XSAVE (Save Processor Extended States)
|
|
|
|
xsave,
|
|
|
|
/// XSAVEOPT (Save Processor Extended States Optimized)
|
|
|
|
xsaveopt,
|
|
|
|
/// XSAVES (Save Processor Extended States Supervisor)
|
|
|
|
xsaves,
|
|
|
|
/// XSAVEC (Save Processor Extended States Compacted)
|
|
|
|
xsavec,
|
|
|
|
/// CMPXCH16B, a 16-byte compare-and-swap instruction
|
|
|
|
cmpxchg16b,
|
|
|
|
/// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
|
|
|
|
adx,
|
|
|
|
/// RTM, Intel (Restricted Transactional Memory)
|
|
|
|
rtm,
|
|
|
|
}
|