diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index a6a3f0f9646..d034f9b5256 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -226,7 +226,8 @@ fn store(
                 //   when passed by value, making it smaller.
                 // - On some ABIs, the Rust layout { u16, u16, u16 } may be padded up to 8 bytes
                 //   when passed by value, making it larger.
-                let copy_bytes = cmp::min(scratch_size.bytes(), self.layout.size.bytes());
+                let copy_bytes =
+                    cmp::min(cast.unaligned_size(bx).bytes(), self.layout.size.bytes());
                 // Allocate some scratch space...
                 let llscratch = bx.alloca(scratch_size, scratch_align);
                 bx.lifetime_start(llscratch, scratch_size);
diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs
index 57138d3b9db..d4da13068d2 100644
--- a/compiler/rustc_codegen_ssa/src/mir/block.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/block.rs
@@ -1540,7 +1540,7 @@ fn codegen_argument(
                 //   when passed by value, making it smaller.
                 // - On some ABIs, the Rust layout { u16, u16, u16 } may be padded up to 8 bytes
                 //   when passed by value, making it larger.
-                let copy_bytes = cmp::min(scratch_size.bytes(), arg.layout.size.bytes());
+                let copy_bytes = cmp::min(cast.unaligned_size(bx).bytes(), arg.layout.size.bytes());
                 // Allocate some scratch space...
                 let llscratch = bx.alloca(scratch_size, scratch_align);
                 bx.lifetime_start(llscratch, scratch_size);
diff --git a/compiler/rustc_target/src/abi/call/mod.rs b/compiler/rustc_target/src/abi/call/mod.rs
index 5713542c17d..8058130f441 100644
--- a/compiler/rustc_target/src/abi/call/mod.rs
+++ b/compiler/rustc_target/src/abi/call/mod.rs
@@ -339,7 +339,9 @@ pub fn pair(a: Reg, b: Reg) -> CastTarget {
         }
     }
 
-    pub fn size<C: HasDataLayout>(&self, _cx: &C) -> Size {
+    /// When you only access the range containing valid data, you can use this unaligned size;
+    /// otherwise, use the safer `size` method.
+    pub fn unaligned_size<C: HasDataLayout>(&self, _cx: &C) -> Size {
         // Prefix arguments are passed in specific designated registers
         let prefix_size = self
             .prefix
@@ -353,6 +355,10 @@ pub fn size<C: HasDataLayout>(&self, _cx: &C) -> Size {
         prefix_size + rest_size
     }
 
+    pub fn size<C: HasDataLayout>(&self, cx: &C) -> Size {
+        self.unaligned_size(cx).align_to(self.align(cx))
+    }
+
     pub fn align<C: HasDataLayout>(&self, cx: &C) -> Align {
         self.prefix
             .iter()
diff --git a/tests/codegen/cast-target-abi.rs b/tests/codegen/cast-target-abi.rs
index e5ff64699df..13b74d68f76 100644
--- a/tests/codegen/cast-target-abi.rs
+++ b/tests/codegen/cast-target-abi.rs
@@ -128,11 +128,7 @@ pub extern "C" fn returns_twou16s() -> TwoU16s {
 #[no_mangle]
 #[inline(never)]
 pub extern "C" fn receives_fiveu16s(x: FiveU16s) {
-    // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // powerpc64:   [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // sparc64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [10 x i8], align [[ABI_ALIGN:8]]
+    // CHECK: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
     // CHECK: [[RUST_ALLOCA:%.+]] = alloca [10 x i8], align [[RUST_ALIGN:2]]
 
@@ -218,11 +214,7 @@ pub extern "C" fn returns_doubledouble() -> DoubleDouble {
 #[no_mangle]
 #[inline(never)]
 pub extern "C" fn receives_three32s(x: Three32s) {
-    // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // powerpc64:   [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // sparc64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [12 x i8], align [[ABI_ALIGN:8]]
+    // CHECK: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
     // CHECK: [[RUST_ALLOCA:%.+]] = alloca [12 x i8], align [[RUST_ALIGN:4]]
 
@@ -271,7 +263,7 @@ pub extern "C" fn returns_three32s() -> Three32s {
 #[inline(never)]
 pub extern "C" fn receives_doublefloat(x: DoubleFloat) {
     // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [12 x i8], align [[ABI_ALIGN:8]]
+    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // powerpc64:   [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
@@ -382,11 +374,7 @@ pub fn return_twou16s() -> TwoU16s {
 // CHECK-LABEL: @call_fiveu16s
 #[no_mangle]
 pub fn call_fiveu16s() {
-    // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // powerpc64:   [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // sparc64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [10 x i8], align [[ABI_ALIGN:8]]
+    // CHECK: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
     // CHECK: [[RUST_ALLOCA:%.+]] = alloca [10 x i8], align 2
 
@@ -416,7 +404,7 @@ pub fn return_fiveu16s() -> FiveU16s {
     // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // sparc64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [10 x i8], align [[ABI_ALIGN:8]]
+    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
     // aarch64:     [[ABI_VALUE:%.+]] = call [[ABI_TYPE:\[2 x i64\]]] @returns_fiveu16s()
     // loongarch64: [[ABI_VALUE:%.+]] = call [[ABI_TYPE:\[2 x i64\]]] @returns_fiveu16s()
@@ -501,7 +489,7 @@ pub fn return_doubledouble() -> DoubleDouble {
 #[no_mangle]
 pub fn call_doublefloat() {
     // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [12 x i8], align [[ABI_ALIGN:8]]
+    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // powerpc64:   [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
@@ -544,7 +532,7 @@ pub fn return_doublefloat() -> DoubleFloat {
     // The other targets copy the cast ABI type to an alloca.
 
     // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [12 x i8], align [[ABI_ALIGN:8]]
+    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
     // aarch64:     [[RUST_ALLOCA:%.+]] = alloca [16 x i8], align [[RUST_ALIGN:8]]
@@ -568,12 +556,7 @@ pub fn return_doublefloat() -> DoubleFloat {
 // CHECK-LABEL: @call_three32s
 #[no_mangle]
 pub fn call_three32s() {
-    // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // powerpc64:   [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // sparc64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [12 x i8], align [[ABI_ALIGN:8]]
-
+    // CHECK: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // CHECK: [[RUST_ALLOCA:%.+]] = alloca [12 x i8], align [[RUST_ALIGN:4]]
     // CHECK: call void @llvm.memcpy.{{.+}}(ptr align [[ABI_ALIGN]] [[ABI_ALLOCA]], ptr align [[RUST_ALIGN]] [[RUST_ALLOCA]], i64 12, i1 false)
 
@@ -600,7 +583,7 @@ pub fn return_three32s() -> Three32s {
     // aarch64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // loongarch64: [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
     // sparc64:     [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
-    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [12 x i8], align [[ABI_ALIGN:8]]
+    // x86_64:      [[ABI_ALLOCA:%.+]] = alloca [16 x i8], align [[ABI_ALIGN:8]]
 
     // aarch64:     [[ABI_VALUE:%.+]] = call [[ABI_TYPE:\[2 x i64\]]] @returns_three32s()
     // loongarch64: [[ABI_VALUE:%.+]] = call [[ABI_TYPE:\[2 x i64\]]] @returns_three32s()
diff --git a/tests/codegen/cffi/ffi-out-of-bounds-loads.rs b/tests/codegen/cffi/ffi-out-of-bounds-loads.rs
index 614d5d94f62..a4b7c0caa6d 100644
--- a/tests/codegen/cffi/ffi-out-of-bounds-loads.rs
+++ b/tests/codegen/cffi/ffi-out-of-bounds-loads.rs
@@ -1,5 +1,5 @@
 //@ revisions: linux apple
-//@ compile-flags: -C opt-level=0 -C no-prepopulate-passes
+//@ compile-flags: -C opt-level=0 -C no-prepopulate-passes -C passes=lint
 
 //@[linux] compile-flags: --target x86_64-unknown-linux-gnu
 //@[linux] needs-llvm-components: x86
@@ -36,7 +36,7 @@ struct S {
 pub fn test() {
     let s = S { f1: 1, f2: 2, f3: 3 };
     unsafe {
-        // CHECK: [[ALLOCA:%.+]] = alloca [12 x i8], align 8
+        // CHECK: [[ALLOCA:%.+]] = alloca [16 x i8], align 8
         // CHECK: [[LOAD:%.+]] = load { i64, i32 }, ptr [[ALLOCA]], align 8
         // CHECK: call void @foo({ i64, i32 } [[LOAD]])
         foo(s);