From f1b86baf8453733c72e196ce2c08b4d85e94d81a Mon Sep 17 00:00:00 2001 From: Markus Everling Date: Sat, 22 Apr 2023 23:22:39 +0000 Subject: [PATCH] Use pointer reads for better codegen in debug mode --- crates/core_simd/src/lib.rs | 1 - crates/core_simd/src/vector.rs | 18 ++++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index a372e2e40c4..e054d483ca5 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -2,7 +2,6 @@ #![feature( const_ptr_read, const_refs_to_cell, - const_transmute_copy, convert_float_to_int, decl_macro, intra_doc_pointers, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index eee105ff5fc..a38d701588c 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -163,21 +163,31 @@ impl Swizzle<1, LANES> for Splat { /// Converts an array to a SIMD vector. pub const fn from_array(array: [T; LANES]) -> Self { // SAFETY: Transmuting between `Simd` and `[T; LANES]` - // is always valid. + // is always valid. We need to use `read_unaligned` here, since + // the array may have a lower alignment than the vector. + // + // FIXME: We currently use a pointer read instead of `transmute_copy` because + // it results in better codegen with optimizations disabled, but we should + // probably just use `transmute` once that works on const generic types. // // NOTE: This deliberately doesn't just use `Self(array)`, see the comment // on the struct definition for details. - unsafe { core::mem::transmute_copy(&array) } + unsafe { (&array as *const [T; LANES] as *const Self).read_unaligned() } } /// Converts a SIMD vector to an array. pub const fn to_array(self) -> [T; LANES] { // SAFETY: Transmuting between `Simd` and `[T; LANES]` - // is always valid. + // is always valid. No need to use `read_unaligned` here, since + // the vector never has a lower alignment than the array. + // + // FIXME: We currently use a pointer read instead of `transmute_copy` because + // it results in better codegen with optimizations disabled, but we should + // probably just use `transmute` once that works on const generic types. // // NOTE: This deliberately doesn't just use `self.0`, see the comment // on the struct definition for details. - unsafe { core::mem::transmute_copy(&self) } + unsafe { (&self as *const Self as *const [T; LANES]).read() } } /// Converts a slice to a SIMD vector containing `slice[..LANES]`.