Rollup merge of #92425 - calebzulawski:simd-cast, r=workingjubilee

Improve SIMD casts * Allows `simd_cast` intrinsic to take `usize` and `isize` * Adds `simd_as` intrinsic, which is the same as `simd_cast` except for saturating float-to-int conversions (matching the behavior of `as`). cc `@workingjubilee`
2022-01-18 22:00:45 +01:00 · 2022-01-18 22:00:45 +01:00 · 7889f96103
commit 7889f96103
parent f372476d2c 49d36d733d
8 changed files with 314 additions and 177 deletions
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@ -731,27 +731,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
    }

    fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
-        if !self.fptoint_sat_broken_in_llvm() {
-            let src_ty = self.cx.val_ty(val);
-            let float_width = self.cx.float_width(src_ty);
-            let int_width = self.cx.int_width(dest_ty);
-            let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
-            return Some(self.call_intrinsic(&name, &[val]));
-        }
-
-        None
+        self.fptoint_sat(false, val, dest_ty)
    }

    fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
-        if !self.fptoint_sat_broken_in_llvm() {
-            let src_ty = self.cx.val_ty(val);
-            let float_width = self.cx.float_width(src_ty);
-            let int_width = self.cx.int_width(dest_ty);
-            let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
-            return Some(self.call_intrinsic(&name, &[val]));
-        }
-
-        None
+        self.fptoint_sat(true, val, dest_ty)
    }

    fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
@ -1455,4 +1439,43 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
            _ => false,
        }
    }
+
+    fn fptoint_sat(
+        &mut self,
+        signed: bool,
+        val: &'ll Value,
+        dest_ty: &'ll Type,
+    ) -> Option<&'ll Value> {
+        if !self.fptoint_sat_broken_in_llvm() {
+            let src_ty = self.cx.val_ty(val);
+            let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector
+            {
+                assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
+                (
+                    self.cx.element_type(src_ty),
+                    self.cx.element_type(dest_ty),
+                    Some(self.cx.vector_length(src_ty)),
+                )
+            } else {
+                (src_ty, dest_ty, None)
+            };
+            let float_width = self.cx.float_width(float_ty);
+            let int_width = self.cx.int_width(int_ty);
+
+            let instr = if signed { "fptosi" } else { "fptoui" };
+            let name = if let Some(vector_length) = vector_length {
+                format!(
+                    "llvm.{}.sat.v{}i{}.v{}f{}",
+                    instr, vector_length, int_width, vector_length, float_width
+                )
+            } else {
+                format!("llvm.{}.sat.i{}.f{}", instr, int_width, float_width)
+            };
+            let f =
+                self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));
+            Some(self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None))
+        } else {
+            None
+        }
+    }
 }
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@ -1688,7 +1688,7 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
    bitwise_red!(simd_reduce_all: vector_reduce_and, true);
    bitwise_red!(simd_reduce_any: vector_reduce_or, true);

-    if name == sym::simd_cast {
+    if name == sym::simd_cast || name == sym::simd_as {
        require_simd!(ret_ty, "return");
        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
        require!(
@ -1714,14 +1714,26 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
        let (in_style, in_width) = match in_elem.kind() {
            // vectors of pointer-sized integers should've been
            // disallowed before here, so this unwrap is safe.
-            ty::Int(i) => (Style::Int(true), i.bit_width().unwrap()),
-            ty::Uint(u) => (Style::Int(false), u.bit_width().unwrap()),
+            ty::Int(i) => (
+                Style::Int(true),
+                i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
+            ),
+            ty::Uint(u) => (
+                Style::Int(false),
+                u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
+            ),
            ty::Float(f) => (Style::Float, f.bit_width()),
            _ => (Style::Unsupported, 0),
        };
        let (out_style, out_width) = match out_elem.kind() {
-            ty::Int(i) => (Style::Int(true), i.bit_width().unwrap()),
-            ty::Uint(u) => (Style::Int(false), u.bit_width().unwrap()),
+            ty::Int(i) => (
+                Style::Int(true),
+                i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
+            ),
+            ty::Uint(u) => (
+                Style::Int(false),
+                u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
+            ),
            ty::Float(f) => (Style::Float, f.bit_width()),
            _ => (Style::Unsupported, 0),
        };
@ -1748,10 +1760,10 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
                });
            }
            (Style::Float, Style::Int(out_is_signed)) => {
-                return Ok(if out_is_signed {
-                    bx.fptosi(args[0].immediate(), llret_ty)
-                } else {
-                    bx.fptoui(args[0].immediate(), llret_ty)
+                return Ok(match (out_is_signed, name == sym::simd_as) {
+                    (false, false) => bx.fptoui(args[0].immediate(), llret_ty),
+                    (true, false) => bx.fptosi(args[0].immediate(), llret_ty),
+                    (_, true) => bx.cast_float_to_int(out_is_signed, args[0].immediate(), llret_ty),
                });
            }
            (Style::Float, Style::Float) => {
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@ -3,11 +3,10 @@ use super::place::PlaceRef;
 use super::{FunctionCx, LocalRef};

 use crate::base;
-use crate::common::{self, IntPredicate, RealPredicate};
+use crate::common::{self, IntPredicate};
 use crate::traits::*;
 use crate::MemFlags;

-use rustc_apfloat::{ieee, Float, Round, Status};
 use rustc_middle::mir;
 use rustc_middle::ty::cast::{CastTy, IntTy};
 use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
@ -368,10 +367,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                                bx.inttoptr(usize_llval, ll_t_out)
                            }
                            (CastTy::Float, CastTy::Int(IntTy::I)) => {
-                                cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out)
+                                bx.cast_float_to_int(true, llval, ll_t_out)
                            }
                            (CastTy::Float, CastTy::Int(_)) => {
-                                cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out)
+                                bx.cast_float_to_int(false, llval, ll_t_out)
                            }
                            _ => bug!("unsupported cast: {:?} to {:?}", operand.layout.ty, cast.ty),
                        };
@ -768,146 +767,3 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
        // (*) this is only true if the type is suitable
    }
 }
-
-fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
-    bx: &mut Bx,
-    signed: bool,
-    x: Bx::Value,
-    float_ty: Bx::Type,
-    int_ty: Bx::Type,
-) -> Bx::Value {
-    if let Some(false) = bx.cx().sess().opts.debugging_opts.saturating_float_casts {
-        return if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
-    }
-
-    let try_sat_result = if signed { bx.fptosi_sat(x, int_ty) } else { bx.fptoui_sat(x, int_ty) };
-    if let Some(try_sat_result) = try_sat_result {
-        return try_sat_result;
-    }
-
-    let int_width = bx.cx().int_width(int_ty);
-    let float_width = bx.cx().float_width(float_ty);
-    // LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the
-    // destination integer type after rounding towards zero. This `undef` value can cause UB in
-    // safe code (see issue #10184), so we implement a saturating conversion on top of it:
-    // Semantically, the mathematical value of the input is rounded towards zero to the next
-    // mathematical integer, and then the result is clamped into the range of the destination
-    // integer type. Positive and negative infinity are mapped to the maximum and minimum value of
-    // the destination integer type. NaN is mapped to 0.
-    //
-    // Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to
-    // a value representable in int_ty.
-    // They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits.
-    // Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two.
-    // int_ty::MIN, however, is either zero or a negative power of two and is thus exactly
-    // representable. Note that this only works if float_ty's exponent range is sufficiently large.
-    // f16 or 256 bit integers would break this property. Right now the smallest float type is f32
-    // with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127.
-    // On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
-    // we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
-    // This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
-    let int_max = |signed: bool, int_width: u64| -> u128 {
-        let shift_amount = 128 - int_width;
-        if signed { i128::MAX as u128 >> shift_amount } else { u128::MAX >> shift_amount }
-    };
-    let int_min = |signed: bool, int_width: u64| -> i128 {
-        if signed { i128::MIN >> (128 - int_width) } else { 0 }
-    };
-
-    let compute_clamp_bounds_single = |signed: bool, int_width: u64| -> (u128, u128) {
-        let rounded_min = ieee::Single::from_i128_r(int_min(signed, int_width), Round::TowardZero);
-        assert_eq!(rounded_min.status, Status::OK);
-        let rounded_max = ieee::Single::from_u128_r(int_max(signed, int_width), Round::TowardZero);
-        assert!(rounded_max.value.is_finite());
-        (rounded_min.value.to_bits(), rounded_max.value.to_bits())
-    };
-    let compute_clamp_bounds_double = |signed: bool, int_width: u64| -> (u128, u128) {
-        let rounded_min = ieee::Double::from_i128_r(int_min(signed, int_width), Round::TowardZero);
-        assert_eq!(rounded_min.status, Status::OK);
-        let rounded_max = ieee::Double::from_u128_r(int_max(signed, int_width), Round::TowardZero);
-        assert!(rounded_max.value.is_finite());
-        (rounded_min.value.to_bits(), rounded_max.value.to_bits())
-    };
-
-    let mut float_bits_to_llval = |bits| {
-        let bits_llval = match float_width {
-            32 => bx.cx().const_u32(bits as u32),
-            64 => bx.cx().const_u64(bits as u64),
-            n => bug!("unsupported float width {}", n),
-        };
-        bx.bitcast(bits_llval, float_ty)
-    };
-    let (f_min, f_max) = match float_width {
-        32 => compute_clamp_bounds_single(signed, int_width),
-        64 => compute_clamp_bounds_double(signed, int_width),
-        n => bug!("unsupported float width {}", n),
-    };
-    let f_min = float_bits_to_llval(f_min);
-    let f_max = float_bits_to_llval(f_max);
-    // To implement saturation, we perform the following steps:
-    //
-    // 1. Cast x to an integer with fpto[su]i. This may result in undef.
-    // 2. Compare x to f_min and f_max, and use the comparison results to select:
-    //  a) int_ty::MIN if x < f_min or x is NaN
-    //  b) int_ty::MAX if x > f_max
-    //  c) the result of fpto[su]i otherwise
-    // 3. If x is NaN, return 0.0, otherwise return the result of step 2.
-    //
-    // This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
-    // destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
-    // undef does not introduce any non-determinism either.
-    // More importantly, the above procedure correctly implements saturating conversion.
-    // Proof (sketch):
-    // If x is NaN, 0 is returned by definition.
-    // Otherwise, x is finite or infinite and thus can be compared with f_min and f_max.
-    // This yields three cases to consider:
-    // (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
-    //     saturating conversion for inputs in that range.
-    // (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded
-    //     (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
-    //     than int_ty::MAX. Because x is larger than int_ty::MAX, the return value of int_ty::MAX
-    //     is correct.
-    // (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
-    //     int_ty::MIN and therefore the return value of int_ty::MIN is correct.
-    // QED.
-
-    let int_max = bx.cx().const_uint_big(int_ty, int_max(signed, int_width));
-    let int_min = bx.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128);
-    let zero = bx.cx().const_uint(int_ty, 0);
-
-    // Step 1 ...
-    let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
-    let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min);
-    let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max);
-
-    // Step 2: We use two comparisons and two selects, with %s1 being the
-    // result:
-    //     %less_or_nan = fcmp ult %x, %f_min
-    //     %greater = fcmp olt %x, %f_max
-    //     %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
-    //     %s1 = select %greater, int_ty::MAX, %s0
-    // Note that %less_or_nan uses an *unordered* comparison. This
-    // comparison is true if the operands are not comparable (i.e., if x is
-    // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
-    // x is NaN.
-    //
-    // Performance note: Unordered comparison can be lowered to a "flipped"
-    // comparison and a negation, and the negation can be merged into the
-    // select. Therefore, it not necessarily any more expensive than an
-    // ordered ("normal") comparison. Whether these optimizations will be
-    // performed is ultimately up to the backend, but at least x86 does
-    // perform them.
-    let s0 = bx.select(less_or_nan, int_min, fptosui_result);
-    let s1 = bx.select(greater, int_max, s0);
-
-    // Step 3: NaN replacement.
-    // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
-    // Therefore we only need to execute this step for signed integer types.
-    if signed {
-        // LLVM has no isNaN predicate, so we use (x == x) instead
-        let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x);
-        bx.select(cmp, s1, zero)
-    } else {
-        s1
-    }
-}
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@ -1,18 +1,21 @@
 use super::abi::AbiBuilderMethods;
 use super::asm::AsmBuilderMethods;
+use super::consts::ConstMethods;
 use super::coverageinfo::CoverageInfoBuilderMethods;
 use super::debuginfo::DebugInfoBuilderMethods;
 use super::intrinsic::IntrinsicCallMethods;
-use super::type_::ArgAbiMethods;
+use super::misc::MiscMethods;
+use super::type_::{ArgAbiMethods, BaseTypeMethods};
 use super::{HasCodegen, StaticBuilderMethods};

 use crate::common::{
-    AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope,
+    AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
 };
 use crate::mir::operand::OperandRef;
 use crate::mir::place::PlaceRef;
 use crate::MemFlags;

+use rustc_apfloat::{ieee, Float, Round, Status};
 use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
 use rustc_middle::ty::Ty;
 use rustc_span::Span;
@ -202,6 +205,179 @@ pub trait BuilderMethods<'a, 'tcx>:
    fn intcast(&mut self, val: Self::Value, dest_ty: Self::Type, is_signed: bool) -> Self::Value;
    fn pointercast(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;

+    fn cast_float_to_int(
+        &mut self,
+        signed: bool,
+        x: Self::Value,
+        dest_ty: Self::Type,
+    ) -> Self::Value {
+        let in_ty = self.cx().val_ty(x);
+        let (float_ty, int_ty) = if self.cx().type_kind(dest_ty) == TypeKind::Vector
+            && self.cx().type_kind(in_ty) == TypeKind::Vector
+        {
+            (self.cx().element_type(in_ty), self.cx().element_type(dest_ty))
+        } else {
+            (in_ty, dest_ty)
+        };
+        assert!(matches!(self.cx().type_kind(float_ty), TypeKind::Float | TypeKind::Double));
+        assert_eq!(self.cx().type_kind(int_ty), TypeKind::Integer);
+
+        if let Some(false) = self.cx().sess().opts.debugging_opts.saturating_float_casts {
+            return if signed { self.fptosi(x, dest_ty) } else { self.fptoui(x, dest_ty) };
+        }
+
+        let try_sat_result =
+            if signed { self.fptosi_sat(x, dest_ty) } else { self.fptoui_sat(x, dest_ty) };
+        if let Some(try_sat_result) = try_sat_result {
+            return try_sat_result;
+        }
+
+        let int_width = self.cx().int_width(int_ty);
+        let float_width = self.cx().float_width(float_ty);
+        // LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the
+        // destination integer type after rounding towards zero. This `undef` value can cause UB in
+        // safe code (see issue #10184), so we implement a saturating conversion on top of it:
+        // Semantically, the mathematical value of the input is rounded towards zero to the next
+        // mathematical integer, and then the result is clamped into the range of the destination
+        // integer type. Positive and negative infinity are mapped to the maximum and minimum value of
+        // the destination integer type. NaN is mapped to 0.
+        //
+        // Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to
+        // a value representable in int_ty.
+        // They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits.
+        // Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two.
+        // int_ty::MIN, however, is either zero or a negative power of two and is thus exactly
+        // representable. Note that this only works if float_ty's exponent range is sufficiently large.
+        // f16 or 256 bit integers would break this property. Right now the smallest float type is f32
+        // with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127.
+        // On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
+        // we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
+        // This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
+        let int_max = |signed: bool, int_width: u64| -> u128 {
+            let shift_amount = 128 - int_width;
+            if signed { i128::MAX as u128 >> shift_amount } else { u128::MAX >> shift_amount }
+        };
+        let int_min = |signed: bool, int_width: u64| -> i128 {
+            if signed { i128::MIN >> (128 - int_width) } else { 0 }
+        };
+
+        let compute_clamp_bounds_single = |signed: bool, int_width: u64| -> (u128, u128) {
+            let rounded_min =
+                ieee::Single::from_i128_r(int_min(signed, int_width), Round::TowardZero);
+            assert_eq!(rounded_min.status, Status::OK);
+            let rounded_max =
+                ieee::Single::from_u128_r(int_max(signed, int_width), Round::TowardZero);
+            assert!(rounded_max.value.is_finite());
+            (rounded_min.value.to_bits(), rounded_max.value.to_bits())
+        };
+        let compute_clamp_bounds_double = |signed: bool, int_width: u64| -> (u128, u128) {
+            let rounded_min =
+                ieee::Double::from_i128_r(int_min(signed, int_width), Round::TowardZero);
+            assert_eq!(rounded_min.status, Status::OK);
+            let rounded_max =
+                ieee::Double::from_u128_r(int_max(signed, int_width), Round::TowardZero);
+            assert!(rounded_max.value.is_finite());
+            (rounded_min.value.to_bits(), rounded_max.value.to_bits())
+        };
+        // To implement saturation, we perform the following steps:
+        //
+        // 1. Cast x to an integer with fpto[su]i. This may result in undef.
+        // 2. Compare x to f_min and f_max, and use the comparison results to select:
+        //  a) int_ty::MIN if x < f_min or x is NaN
+        //  b) int_ty::MAX if x > f_max
+        //  c) the result of fpto[su]i otherwise
+        // 3. If x is NaN, return 0.0, otherwise return the result of step 2.
+        //
+        // This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
+        // destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
+        // undef does not introduce any non-determinism either.
+        // More importantly, the above procedure correctly implements saturating conversion.
+        // Proof (sketch):
+        // If x is NaN, 0 is returned by definition.
+        // Otherwise, x is finite or infinite and thus can be compared with f_min and f_max.
+        // This yields three cases to consider:
+        // (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
+        //     saturating conversion for inputs in that range.
+        // (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded
+        //     (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
+        //     than int_ty::MAX. Because x is larger than int_ty::MAX, the return value of int_ty::MAX
+        //     is correct.
+        // (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
+        //     int_ty::MIN and therefore the return value of int_ty::MIN is correct.
+        // QED.
+
+        let float_bits_to_llval = |bx: &mut Self, bits| {
+            let bits_llval = match float_width {
+                32 => bx.cx().const_u32(bits as u32),
+                64 => bx.cx().const_u64(bits as u64),
+                n => bug!("unsupported float width {}", n),
+            };
+            bx.bitcast(bits_llval, float_ty)
+        };
+        let (f_min, f_max) = match float_width {
+            32 => compute_clamp_bounds_single(signed, int_width),
+            64 => compute_clamp_bounds_double(signed, int_width),
+            n => bug!("unsupported float width {}", n),
+        };
+        let f_min = float_bits_to_llval(self, f_min);
+        let f_max = float_bits_to_llval(self, f_max);
+        let int_max = self.cx().const_uint_big(int_ty, int_max(signed, int_width));
+        let int_min = self.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128);
+        let zero = self.cx().const_uint(int_ty, 0);
+
+        // If we're working with vectors, constants must be "splatted": the constant is duplicated
+        // into each lane of the vector.  The algorithm stays the same, we are just using the
+        // same constant across all lanes.
+        let maybe_splat = |bx: &mut Self, val| {
+            if bx.cx().type_kind(dest_ty) == TypeKind::Vector {
+                bx.vector_splat(bx.vector_length(dest_ty), val)
+            } else {
+                val
+            }
+        };
+        let f_min = maybe_splat(self, f_min);
+        let f_max = maybe_splat(self, f_max);
+        let int_max = maybe_splat(self, int_max);
+        let int_min = maybe_splat(self, int_min);
+        let zero = maybe_splat(self, zero);
+
+        // Step 1 ...
+        let fptosui_result = if signed { self.fptosi(x, dest_ty) } else { self.fptoui(x, dest_ty) };
+        let less_or_nan = self.fcmp(RealPredicate::RealULT, x, f_min);
+        let greater = self.fcmp(RealPredicate::RealOGT, x, f_max);
+
+        // Step 2: We use two comparisons and two selects, with %s1 being the
+        // result:
+        //     %less_or_nan = fcmp ult %x, %f_min
+        //     %greater = fcmp olt %x, %f_max
+        //     %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
+        //     %s1 = select %greater, int_ty::MAX, %s0
+        // Note that %less_or_nan uses an *unordered* comparison. This
+        // comparison is true if the operands are not comparable (i.e., if x is
+        // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
+        // x is NaN.
+        //
+        // Performance note: Unordered comparison can be lowered to a "flipped"
+        // comparison and a negation, and the negation can be merged into the
+        // select. Therefore, it not necessarily any more expensive than an
+        // ordered ("normal") comparison. Whether these optimizations will be
+        // performed is ultimately up to the backend, but at least x86 does
+        // perform them.
+        let s0 = self.select(less_or_nan, int_min, fptosui_result);
+        let s1 = self.select(greater, int_max, s0);
+
+        // Step 3: NaN replacement.
+        // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
+        // Therefore we only need to execute this step for signed integer types.
+        if signed {
+            // LLVM has no isNaN predicate, so we use (x == x) instead
+            let cmp = self.fcmp(RealPredicate::RealOEQ, x, x);
+            self.select(cmp, s1, zero)
+        } else {
+            s1
+        }
+    }
+
    fn icmp(&mut self, op: IntPredicate, lhs: Self::Value, rhs: Self::Value) -> Self::Value;
    fn fcmp(&mut self, op: RealPredicate, lhs: Self::Value, rhs: Self::Value) -> Self::Value;

--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@ -1223,6 +1223,7 @@ symbols! {
        simd,
        simd_add,
        simd_and,
+        simd_as,
        simd_bitmask,
        simd_cast,
        simd_ceil,
--- a/compiler/rustc_typeck/src/check/intrinsic.rs
+++ b/compiler/rustc_typeck/src/check/intrinsic.rs
@ -453,7 +453,7 @@ pub fn check_platform_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>)
        sym::simd_scatter => (3, vec![param(0), param(1), param(2)], tcx.mk_unit()),
        sym::simd_insert => (2, vec![param(0), tcx.types.u32, param(1)], param(0)),
        sym::simd_extract => (2, vec![param(0), tcx.types.u32], param(1)),
-        sym::simd_cast => (2, vec![param(0)], param(1)),
+        sym::simd_cast | sym::simd_as => (2, vec![param(0)], param(1)),
        sym::simd_bitmask => (2, vec![param(0)], param(1)),
        sym::simd_select | sym::simd_select_bitmask => {
            (2, vec![param(0), param(1), param(1)], param(1))
--- a/src/test/ui/simd/intrinsic/generic-as.rs
+++ b/src/test/ui/simd/intrinsic/generic-as.rs
@ -0,0 +1,48 @@
+// run-pass
+
+#![feature(repr_simd, platform_intrinsics)]
+
+extern "platform-intrinsic" {
+    fn simd_as<T, U>(x: T) -> U;
+}
+
+#[derive(Copy, Clone)]
+#[repr(simd)]
+struct V<T>([T; 2]);
+
+fn main() {
+    unsafe {
+        let u = V::<u32>([u32::MIN, u32::MAX]);
+        let i: V<i16> = simd_as(u);
+        assert_eq!(i.0[0], u.0[0] as i16);
+        assert_eq!(i.0[1], u.0[1] as i16);
+    }
+
+    unsafe {
+        let f = V::<f32>([f32::MIN, f32::MAX]);
+        let i: V<i16> = simd_as(f);
+        assert_eq!(i.0[0], f.0[0] as i16);
+        assert_eq!(i.0[1], f.0[1] as i16);
+    }
+
+    unsafe {
+        let f = V::<f32>([f32::MIN, f32::MAX]);
+        let u: V<u8> = simd_as(f);
+        assert_eq!(u.0[0], f.0[0] as u8);
+        assert_eq!(u.0[1], f.0[1] as u8);
+    }
+
+    unsafe {
+        let f = V::<f64>([f64::MIN, f64::MAX]);
+        let i: V<isize> = simd_as(f);
+        assert_eq!(i.0[0], f.0[0] as isize);
+        assert_eq!(i.0[1], f.0[1] as isize);
+    }
+
+    unsafe {
+        let f = V::<f64>([f64::MIN, f64::MAX]);
+        let u: V<usize> = simd_as(f);
+        assert_eq!(u.0[0], f.0[0] as usize);
+        assert_eq!(u.0[1], f.0[1] as usize);
+    }
+}
--- a/src/test/ui/simd/intrinsic/generic-cast-pointer-width.rs
+++ b/src/test/ui/simd/intrinsic/generic-cast-pointer-width.rs
@ -0,0 +1,21 @@
+// run-pass
+#![feature(repr_simd, platform_intrinsics)]
+
+extern "platform-intrinsic" {
+    fn simd_cast<T, U>(x: T) -> U;
+}
+
+#[derive(Copy, Clone)]
+#[repr(simd)]
+struct V<T>([T; 4]);
+
+fn main() {
+    let u = V::<usize>([0, 1, 2, 3]);
+    let uu32: V<u32> = unsafe { simd_cast(u) };
+    let ui64: V<i64> = unsafe { simd_cast(u) };
+
+    for (u, (uu32, ui64)) in u.0.iter().zip(uu32.0.iter().zip(ui64.0.iter())) {
+        assert_eq!(*u as u32, *uu32);
+        assert_eq!(*u as i64, *ui64);
+    }
+}