Merge pull request #951 from bjorn3/simd_improvements

Simd improvements
This commit is contained in:
bjorn3 2020-07-25 16:31:43 +02:00 committed by GitHub
commit ce04770124
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 93 additions and 27 deletions

View File

@ -82,6 +82,7 @@ pub(super) fn add_local_place_comments<'tcx>(
assert_eq!(local, place_local);
("ssa", Cow::Owned(format!(",var=({}, {})", var1.index(), var2.index())))
}
CPlaceInner::VarLane(_local, _var, _lane) => unreachable!(),
CPlaceInner::Addr(ptr, meta) => {
let meta = if let Some(meta) = meta {
Cow::Owned(format!(",meta={}", meta))

View File

@ -100,7 +100,13 @@ pub(super) fn get_pass_mode<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>)
}
// FIXME implement Vector Abi in a cg_llvm compatible way
Abi::Vector { .. } => PassMode::ByRef { size: Some(layout.size) },
Abi::Vector { .. } => {
if let Some(vector_ty) = crate::intrinsics::clif_vector_type(tcx, layout) {
PassMode::ByVal(vector_ty)
} else {
PassMode::ByRef { size: Some(layout.size) }
}
}
Abi::Aggregate { sized: true } => PassMode::ByRef { size: Some(layout.size) },
Abi::Aggregate { sized: false } => PassMode::ByRef { size: None },

View File

@ -62,6 +62,18 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Typ
pointer_ty(tcx)
}
}
ty::Adt(adt_def, _) if adt_def.repr.simd() => {
let (element, count) = match &tcx.layout_of(ParamEnv::reveal_all().and(ty)).unwrap().abi {
Abi::Vector { element, count } => (element.clone(), *count),
_ => unreachable!(),
};
match scalar_to_clif_type(tcx, element).by(u16::try_from(count).unwrap()) {
// Cranelift currently only implements icmp for 128bit vectors.
Some(vector_ty) if vector_ty.bits() == 128 => vector_ty,
_ => return None,
}
}
ty::Param(_) => bug!("ty param {:?}", ty),
_ => return None,
})
@ -71,10 +83,12 @@ fn clif_pair_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<(type
Some(match ty.kind {
ty::Tuple(substs) if substs.len() == 2 => {
let mut types = substs.types();
(
clif_type_from_ty(tcx, types.next().unwrap())?,
clif_type_from_ty(tcx, types.next().unwrap())?,
)
let a = clif_type_from_ty(tcx, types.next().unwrap())?;
let b = clif_type_from_ty(tcx, types.next().unwrap())?;
if a.is_vector() || b.is_vector() {
return None;
}
(a, b)
}
ty::RawPtr(TypeAndMut { ty: pointee_ty, mutbl: _ }) | ty::Ref(_, pointee_ty, _) => {
if has_ptr_meta(tcx, pointee_ty) {

View File

@ -411,6 +411,11 @@ fn place_location<'tcx>(
AttributeValue::Exprloc(Expression::new())
}
CPlaceInner::VarLane(_, _, _) => {
// FIXME implement this
AttributeValue::Exprloc(Expression::new())
}
CPlaceInner::Addr(_, _) => {
// FIXME implement this (used by arguments and returns)

View File

@ -175,17 +175,15 @@ fn lane_type_and_count<'tcx>(
(lane_layout, lane_count)
}
fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Option<Type> {
pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Option<Type> {
let (element, count) = match &layout.abi {
Abi::Vector { element, count } => (element.clone(), *count),
_ => unreachable!(),
};
match scalar_to_clif_type(tcx, element).by(u16::try_from(count).unwrap()) {
// Cranelift currently only implements icmp for 128bit vectors. While 64bit lanes are
// supported, this needs either the `use_sse41_simd` or `use_sse42_simd` target flag
// to be enabled.
Some(vector_ty) if vector_ty.bits() == 128 && vector_ty.lane_type() != types::I64 => Some(vector_ty),
// Cranelift currently only implements icmp for 128bit vectors.
Some(vector_ty) if vector_ty.bits() == 128 => Some(vector_ty),
_ => None,
}
}

View File

@ -299,6 +299,8 @@ fn build_isa(sess: &Session, enable_pic: bool) -> Box<dyn isa::TargetIsa + 'stat
};
flags_builder.set("tls_model", tls_model).unwrap();
flags_builder.set("enable_simd", "true").unwrap();
// FIXME(CraneStation/cranelift#732) fix LICM in presence of jump tables
/*
use rustc_session::config::OptLevel;
@ -316,9 +318,12 @@ fn build_isa(sess: &Session, enable_pic: bool) -> Box<dyn isa::TargetIsa + 'stat
}*/
let flags = settings::Flags::new(flags_builder);
cranelift_codegen::isa::lookup(target_triple)
.unwrap()
.finish(flags)
let mut isa_builder = cranelift_codegen::isa::lookup(target_triple).unwrap();
// Don't use "haswell", as it implies `has_lzcnt`.macOS CI is still at Ivy Bridge EP, so `lzcnt`
// is interpreted as `bsr`.
isa_builder.enable("nehalem").unwrap();
isa_builder.finish(flags)
}
/// This is the entrypoint for a hot plugged rustc_codegen_cranelift

View File

@ -248,7 +248,8 @@ pub(crate) fn write_clif_file<'tcx>(
let target_triple = crate::target_triple(tcx.sess);
writeln!(file, "test compile").unwrap();
writeln!(file, "set is_pic").unwrap();
writeln!(file, "target {}", target_triple).unwrap();
writeln!(file, "set enable_simd").unwrap();
writeln!(file, "target {} haswell", target_triple).unwrap();
writeln!(file, "").unwrap();
file.write(clif.as_bytes()).unwrap();
}

View File

@ -272,6 +272,7 @@ pub(crate) struct CPlace<'tcx> {
pub(crate) enum CPlaceInner {
Var(Local, Variable),
VarPair(Local, Variable, Variable),
VarLane(Local, Variable, u8),
Addr(Pointer, Option<Value>),
}
@ -374,6 +375,12 @@ impl<'tcx> CPlace<'tcx> {
fx.bcx.set_val_label(val2, cranelift_codegen::ir::ValueLabel::new(var2.index()));
CValue::by_val_pair(val1, val2, layout)
}
CPlaceInner::VarLane(_local, var, lane) => {
let val = fx.bcx.use_var(var);
fx.bcx.set_val_label(val, cranelift_codegen::ir::ValueLabel::new(var.index()));
let val = fx.bcx.ins().extractlane(val, lane);
CValue::by_val(val, layout)
}
CPlaceInner::Addr(ptr, extra) => {
if let Some(extra) = extra {
CValue::by_ref_unsized(ptr, extra, layout)
@ -395,7 +402,8 @@ impl<'tcx> CPlace<'tcx> {
match self.inner {
CPlaceInner::Addr(ptr, extra) => (ptr, extra),
CPlaceInner::Var(_, _)
| CPlaceInner::VarPair(_, _, _) => bug!("Expected CPlace::Addr, found {:?}", self),
| CPlaceInner::VarPair(_, _, _)
| CPlaceInner::VarLane(_, _, _) => bug!("Expected CPlace::Addr, found {:?}", self),
}
}
@ -527,6 +535,22 @@ impl<'tcx> CPlace<'tcx> {
transmute_value(fx, var2, data2, dst_ty2);
return;
}
CPlaceInner::VarLane(_local, var, lane) => {
let data = from.load_scalar(fx);
// First get the old vector
let vector = fx.bcx.use_var(var);
fx.bcx.set_val_label(vector, cranelift_codegen::ir::ValueLabel::new(var.index()));
// Next insert the written lane into the vector
let vector = fx.bcx.ins().insertlane(vector, data, lane);
// Finally write the new vector
fx.bcx.set_val_label(vector, cranelift_codegen::ir::ValueLabel::new(var.index()));
fx.bcx.def_var(var, vector);
return;
}
CPlaceInner::Addr(ptr, None) => {
if dst_layout.size == Size::ZERO || dst_layout.abi == Abi::Uninhabited {
return;
@ -590,20 +614,32 @@ impl<'tcx> CPlace<'tcx> {
field: mir::Field,
) -> CPlace<'tcx> {
let layout = self.layout();
if let CPlaceInner::VarPair(local, var1, var2) = self.inner {
let layout = layout.field(&*fx, field.index());
match field.as_u32() {
0 => return CPlace {
inner: CPlaceInner::Var(local, var1),
layout,
},
1 => return CPlace {
inner: CPlaceInner::Var(local, var2),
layout,
},
_ => unreachable!("field should be 0 or 1"),
match self.inner {
CPlaceInner::Var(local, var) => {
if let Abi::Vector { .. } = layout.abi {
return CPlace {
inner: CPlaceInner::VarLane(local, var, field.as_u32().try_into().unwrap()),
layout: layout.field(fx, field.as_u32().try_into().unwrap()),
};
}
}
CPlaceInner::VarPair(local, var1, var2) => {
let layout = layout.field(&*fx, field.index());
match field.as_u32() {
0 => return CPlace {
inner: CPlaceInner::Var(local, var1),
layout,
},
1 => return CPlace {
inner: CPlaceInner::Var(local, var2),
layout,
},
_ => unreachable!("field should be 0 or 1"),
}
}
_ => {}
}
let (base, extra) = self.to_ptr_maybe_unsized();