From 80a7d5f61d981170c98f86d1716e2f325c86a1e2 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 24 Jul 2024 20:55:43 +0200 Subject: [PATCH 1/2] nontemporal_store: make sure that the intrinsic is truly just a hint --- src/intrinsics/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index b21c559e668..29deac60730 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -725,7 +725,8 @@ fn codegen_regular_intrinsic_call<'tcx>( // Cranelift treats stores as volatile by default // FIXME correctly handle unaligned_volatile_store - // FIXME actually do nontemporal stores if requested + // FIXME actually do nontemporal stores if requested (but do not just emit MOVNT on x86; + // see the LLVM backend for details) let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout()); dest.write_cvalue(fx, val); } From b8b3a9328f8449bd18a1af09ca93d6cf114720df Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 8 Aug 2024 17:18:20 +1000 Subject: [PATCH 2/2] Shrink `TyKind::FnPtr`. By splitting the `FnSig` within `TyKind::FnPtr` into `FnSigTys` and `FnHeader`, which can be packed more efficiently. This reduces the size of the hot `TyKind` type from 32 bytes to 24 bytes on 64-bit platforms. This reduces peak memory usage by a few percent on some benchmarks. It also reduces cache misses and page faults similarly, though this doesn't translate to clear cycles or wall-time improvements on CI. --- src/common.rs | 2 +- src/value_and_place.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common.rs b/src/common.rs index 09317139936..cbede8bc579 100644 --- a/src/common.rs +++ b/src/common.rs @@ -69,7 +69,7 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option types::F64, FloatTy::F128 => unimplemented!("f16_f128"), }, - ty::FnPtr(_) => pointer_ty(tcx), + ty::FnPtr(..) => pointer_ty(tcx), ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => { if has_ptr_meta(tcx, *pointee_ty) { return None; diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 1aa28daeafc..c2aa1f20648 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -872,7 +872,7 @@ pub(crate) fn assert_assignable<'tcx>( (ty::Ref(_, a, _), ty::RawPtr(b, _)) | (ty::RawPtr(a, _), ty::Ref(_, b, _)) => { assert_assignable(fx, *a, *b, limit - 1); } - (ty::FnPtr(_), ty::FnPtr(_)) => { + (ty::FnPtr(..), ty::FnPtr(..)) => { let from_sig = fx.tcx.normalize_erasing_late_bound_regions( ParamEnv::reveal_all(), from_ty.fn_sig(fx.tcx),