From a5fcfad890b8b35aa5c4efeff0e5eae5ebc504c1 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 21 Nov 2023 17:00:43 +0100 Subject: [PATCH] Fix _mm_sad_epu8 implementation Fixes rust-lang/rustc_codegen_cranelift#1426 --- src/intrinsics/llvm_x86.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 8dd2b6ed014..78a0a347cfc 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -364,9 +364,11 @@ fn select4( for out_lane_idx in 0..lane_count / 8 { let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0); - for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 { + for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 { let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx); + let a_lane = fx.bcx.ins().uextend(types::I16, a_lane); let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx); + let b_lane = fx.bcx.ins().uextend(types::I16, b_lane); let lane_diff = fx.bcx.ins().isub(a_lane, b_lane); let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);