From abc99c62595831127e8ac4dfd6ccacd7ccbf9942 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Fri, 27 Mar 2020 20:55:54 +0100
Subject: [PATCH 1/6] Allow storing SIMD vectors in SSA values

---
 src/common.rs          | 12 ++++++++++++
 src/lib.rs             |  9 ++++++---
 src/pretty_clif.rs     |  3 ++-
 src/value_and_place.rs |  2 ++
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/common.rs b/src/common.rs
index c807bde15c3..166486c49a1 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -62,6 +62,18 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Typ
                 pointer_ty(tcx)
             }
         }
+        ty::Adt(adt_def, _) if adt_def.repr.simd() => {
+            let (element, count) = match &tcx.layout_of(ParamEnv::reveal_all().and(ty)).unwrap().abi {
+                Abi::Vector { element, count } => (element.clone(), *count),
+                _ => unreachable!(),
+            };
+
+            match scalar_to_clif_type(tcx, element).by(u16::try_from(count).unwrap()) {
+                // Cranelift currently only implements icmp for 128bit vectors.
+                Some(vector_ty) if vector_ty.bits() == 128 => vector_ty,
+                _ => return None,
+            }
+        }
         ty::Param(_) => bug!("ty param {:?}", ty),
         _ => return None,
     })
diff --git a/src/lib.rs b/src/lib.rs
index 1096dbd227d..0376672fba7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -299,6 +299,8 @@ fn build_isa(sess: &Session, enable_pic: bool) -> Box<dyn isa::TargetIsa + 'stat
     };
     flags_builder.set("tls_model", tls_model).unwrap();
 
+    flags_builder.set("enable_simd", "true").unwrap();
+
     // FIXME(CraneStation/cranelift#732) fix LICM in presence of jump tables
     /*
     use rustc_session::config::OptLevel;
@@ -316,9 +318,10 @@ fn build_isa(sess: &Session, enable_pic: bool) -> Box<dyn isa::TargetIsa + 'stat
     }*/
 
     let flags = settings::Flags::new(flags_builder);
-    cranelift_codegen::isa::lookup(target_triple)
-        .unwrap()
-        .finish(flags)
+
+    let mut isa_builder = cranelift_codegen::isa::lookup(target_triple).unwrap();
+    isa_builder.enable("haswell").unwrap();
+    isa_builder.finish(flags)
 }
 
 /// This is the entrypoint for a hot plugged rustc_codegen_cranelift
diff --git a/src/pretty_clif.rs b/src/pretty_clif.rs
index 0dcd2c269e8..64346c423c8 100644
--- a/src/pretty_clif.rs
+++ b/src/pretty_clif.rs
@@ -248,7 +248,8 @@ pub(crate) fn write_clif_file<'tcx>(
             let target_triple = crate::target_triple(tcx.sess);
             writeln!(file, "test compile").unwrap();
             writeln!(file, "set is_pic").unwrap();
-            writeln!(file, "target {}", target_triple).unwrap();
+            writeln!(file, "set enable_simd").unwrap();
+            writeln!(file, "target {} haswell", target_triple).unwrap();
             writeln!(file, "").unwrap();
             file.write(clif.as_bytes()).unwrap();
         }
diff --git a/src/value_and_place.rs b/src/value_and_place.rs
index c9516a98bfe..477a417639e 100644
--- a/src/value_and_place.rs
+++ b/src/value_and_place.rs
@@ -589,6 +589,8 @@ impl<'tcx> CPlace<'tcx> {
         fx: &mut FunctionCx<'_, 'tcx, impl Backend>,
         field: mir::Field,
     ) -> CPlace<'tcx> {
+        // FIXME handle simd values
+
         let layout = self.layout();
         if let CPlaceInner::VarPair(local, var1, var2) = self.inner {
             let layout = layout.field(&*fx, field.index());

From 08fc673190ef43b111a6fea2fadf7de25bf7f282 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sat, 28 Mar 2020 15:21:58 +0100
Subject: [PATCH 2/6] Handle SIMD vectors in CPlace::place_field

---
 src/abi/comments.rs    |  1 +
 src/debuginfo/mod.rs   |  5 ++++
 src/value_and_place.rs | 64 ++++++++++++++++++++++++++++++++----------
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/src/abi/comments.rs b/src/abi/comments.rs
index 7c4d7b12f6e..357c28c832a 100644
--- a/src/abi/comments.rs
+++ b/src/abi/comments.rs
@@ -82,6 +82,7 @@ pub(super) fn add_local_place_comments<'tcx>(
             assert_eq!(local, place_local);
             ("ssa", Cow::Owned(format!(",var=({}, {})", var1.index(), var2.index())))
         }
+        CPlaceInner::VarLane(_local, _var, _lane) => unreachable!(),
         CPlaceInner::Addr(ptr, meta) => {
             let meta = if let Some(meta) = meta {
                 Cow::Owned(format!(",meta={}", meta))
diff --git a/src/debuginfo/mod.rs b/src/debuginfo/mod.rs
index 37dc9f4e3c5..2c92fd0e47c 100644
--- a/src/debuginfo/mod.rs
+++ b/src/debuginfo/mod.rs
@@ -411,6 +411,11 @@ fn place_location<'tcx>(
 
             AttributeValue::Exprloc(Expression::new())
         }
+        CPlaceInner::VarLane(_, _, _) => {
+            // FIXME implement this
+
+            AttributeValue::Exprloc(Expression::new())
+        }
         CPlaceInner::Addr(_, _) => {
             // FIXME implement this (used by arguments and returns)
 
diff --git a/src/value_and_place.rs b/src/value_and_place.rs
index 477a417639e..d0e8a4b0c9f 100644
--- a/src/value_and_place.rs
+++ b/src/value_and_place.rs
@@ -272,6 +272,7 @@ pub(crate) struct CPlace<'tcx> {
 pub(crate) enum CPlaceInner {
     Var(Local, Variable),
     VarPair(Local, Variable, Variable),
+    VarLane(Local, Variable, u8),
     Addr(Pointer, Option<Value>),
 }
 
@@ -374,6 +375,12 @@ impl<'tcx> CPlace<'tcx> {
                 fx.bcx.set_val_label(val2, cranelift_codegen::ir::ValueLabel::new(var2.index()));
                 CValue::by_val_pair(val1, val2, layout)
             }
+            CPlaceInner::VarLane(_local, var, lane) => {
+                let val = fx.bcx.use_var(var);
+                fx.bcx.set_val_label(val, cranelift_codegen::ir::ValueLabel::new(var.index()));
+                let val = fx.bcx.ins().extractlane(val, lane);
+                CValue::by_val(val, layout)
+            }
             CPlaceInner::Addr(ptr, extra) => {
                 if let Some(extra) = extra {
                     CValue::by_ref_unsized(ptr, extra, layout)
@@ -395,7 +402,8 @@ impl<'tcx> CPlace<'tcx> {
         match self.inner {
             CPlaceInner::Addr(ptr, extra) => (ptr, extra),
             CPlaceInner::Var(_, _)
-            | CPlaceInner::VarPair(_, _, _) => bug!("Expected CPlace::Addr, found {:?}", self),
+            | CPlaceInner::VarPair(_, _, _)
+            | CPlaceInner::VarLane(_, _, _) => bug!("Expected CPlace::Addr, found {:?}", self),
         }
     }
 
@@ -527,6 +535,22 @@ impl<'tcx> CPlace<'tcx> {
                 transmute_value(fx, var2, data2, dst_ty2);
                 return;
             }
+            CPlaceInner::VarLane(_local, var, lane) => {
+                let data = from.load_scalar(fx);
+
+                // First get the old vector
+                let vector = fx.bcx.use_var(var);
+                fx.bcx.set_val_label(vector, cranelift_codegen::ir::ValueLabel::new(var.index()));
+
+                // Next insert the written lane into the vector
+                let vector = fx.bcx.ins().insertlane(vector, data, lane);
+
+                // Finally write the new vector
+                fx.bcx.set_val_label(vector, cranelift_codegen::ir::ValueLabel::new(var.index()));
+                fx.bcx.def_var(var, vector);
+
+                return;
+            }
             CPlaceInner::Addr(ptr, None) => {
                 if dst_layout.size == Size::ZERO || dst_layout.abi == Abi::Uninhabited {
                     return;
@@ -589,23 +613,33 @@ impl<'tcx> CPlace<'tcx> {
         fx: &mut FunctionCx<'_, 'tcx, impl Backend>,
         field: mir::Field,
     ) -> CPlace<'tcx> {
-        // FIXME handle simd values
-
         let layout = self.layout();
-        if let CPlaceInner::VarPair(local, var1, var2) = self.inner {
-            let layout = layout.field(&*fx, field.index());
 
-            match field.as_u32() {
-                0 => return CPlace {
-                    inner: CPlaceInner::Var(local, var1),
-                    layout,
-                },
-                1 => return CPlace {
-                    inner: CPlaceInner::Var(local, var2),
-                    layout,
-                },
-                _ => unreachable!("field should be 0 or 1"),
+        match self.inner {
+            CPlaceInner::Var(local, var) => {
+                if let Abi::Vector { .. } = layout.abi {
+                    return CPlace {
+                        inner: CPlaceInner::VarLane(local, var, field.as_u32().try_into().unwrap()),
+                        layout: layout.field(fx, field.as_u32().try_into().unwrap()),
+                    };
+                }
             }
+            CPlaceInner::VarPair(local, var1, var2) => {
+                let layout = layout.field(&*fx, field.index());
+
+                match field.as_u32() {
+                    0 => return CPlace {
+                        inner: CPlaceInner::Var(local, var1),
+                        layout,
+                    },
+                    1 => return CPlace {
+                        inner: CPlaceInner::Var(local, var2),
+                        layout,
+                    },
+                    _ => unreachable!("field should be 0 or 1"),
+                }
+            }
+            _ => {}
         }
 
         let (base, extra) = self.to_ptr_maybe_unsized();

From 67028cee5177aa5e5f9300114c9a8449258adb3a Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sat, 28 Mar 2020 15:33:50 +0100
Subject: [PATCH 3/6] Use PassMode::ByVal for Abi::Vector

---
 src/abi/pass_mode.rs  | 8 +++++++-
 src/intrinsics/mod.rs | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/abi/pass_mode.rs b/src/abi/pass_mode.rs
index 390637e8e44..02604a37572 100644
--- a/src/abi/pass_mode.rs
+++ b/src/abi/pass_mode.rs
@@ -100,7 +100,13 @@ pub(super) fn get_pass_mode<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>)
             }
 
             // FIXME implement Vector Abi in a cg_llvm compatible way
-            Abi::Vector { .. } => PassMode::ByRef { size: Some(layout.size) },
+            Abi::Vector { .. } => {
+                if let Some(vector_ty) = crate::intrinsics::clif_vector_type(tcx, layout) {
+                    PassMode::ByVal(vector_ty)
+                } else {
+                    PassMode::ByRef { size: Some(layout.size) }
+                }
+            }
 
             Abi::Aggregate { sized: true } => PassMode::ByRef { size: Some(layout.size) },
             Abi::Aggregate { sized: false } => PassMode::ByRef { size: None },
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index ae2fe30989a..ea7326edc04 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -175,7 +175,7 @@ fn lane_type_and_count<'tcx>(
     (lane_layout, lane_count)
 }
 
-fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Option<Type> {
+pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Option<Type> {
     let (element, count) = match &layout.abi {
         Abi::Vector { element, count } => (element.clone(), *count),
         _ => unreachable!(),

From e8f1c5c53ab10a72f17ca0816b5890377ba5fc9c Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 29 Mar 2020 12:02:00 +0200
Subject: [PATCH 4/6] Don't forbid i64x2 as simd type

---
 src/intrinsics/mod.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index ea7326edc04..546864fc477 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -182,10 +182,8 @@ pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx
     };
 
     match scalar_to_clif_type(tcx, element).by(u16::try_from(count).unwrap()) {
-        // Cranelift currently only implements icmp for 128bit vectors. While 64bit lanes are
-        // supported, this needs either the `use_sse41_simd` or `use_sse42_simd` target flag
-        // to be enabled.
-        Some(vector_ty) if vector_ty.bits() == 128 && vector_ty.lane_type() != types::I64 => Some(vector_ty),
+        // Cranelift currently only implements icmp for 128bit vectors.
+        Some(vector_ty) if vector_ty.bits() == 128 => Some(vector_ty),
         _ => None,
     }
 }

From c1efc33941c53720f31a2667176e82deddad6a51 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sat, 25 Jul 2020 13:17:49 +0200
Subject: [PATCH 5/6] Fix panic

---
 src/common.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/common.rs b/src/common.rs
index 166486c49a1..42131581088 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -83,10 +83,12 @@ fn clif_pair_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<(type
     Some(match ty.kind {
         ty::Tuple(substs) if substs.len() == 2 => {
             let mut types = substs.types();
-            (
-                clif_type_from_ty(tcx, types.next().unwrap())?,
-                clif_type_from_ty(tcx, types.next().unwrap())?,
-            )
+            let a = clif_type_from_ty(tcx, types.next().unwrap())?;
+            let b = clif_type_from_ty(tcx, types.next().unwrap())?;
+            if a.is_vector() || b.is_vector() {
+                return None;
+            }
+            (a, b)
         }
         ty::RawPtr(TypeAndMut { ty: pointee_ty, mutbl: _ }) | ty::Ref(_, pointee_ty, _) => {
             if has_ptr_meta(tcx, pointee_ty) {

From e02ffdf7956b3a36b2e45ada9e8dc2e3ab302c4f Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sat, 25 Jul 2020 16:15:42 +0200
Subject: [PATCH 6/6] Use nehalem instead of haswell as target cpu

---
 src/lib.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index 0376672fba7..38ff7e90b3b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -320,7 +320,9 @@ fn build_isa(sess: &Session, enable_pic: bool) -> Box<dyn isa::TargetIsa + 'stat
     let flags = settings::Flags::new(flags_builder);
 
     let mut isa_builder = cranelift_codegen::isa::lookup(target_triple).unwrap();
-    isa_builder.enable("haswell").unwrap();
+    // Don't use "haswell", as it implies `has_lzcnt`.macOS CI is still at Ivy Bridge EP, so `lzcnt`
+    // is interpreted as `bsr`.
+    isa_builder.enable("nehalem").unwrap();
     isa_builder.finish(flags)
 }