rustc_target: Add alignment to indirectly-passed by-value types, correcting the

alignment of `byval` on x86 in the process.

Commit 88e4d2c2918428d55e34cd57c11279ea839c8822 from five years ago removed
support for alignment on indirectly-passed arguments because of problems with
the `i686-pc-windows-msvc` target. Unfortunately, the `memcpy` optimizations I
recently added to LLVM 16 depend on this to forward `memcpy`s. This commit
attempts to fix the problems with `byval` parameters on that target and now
correctly adds the `align` attribute.

The problem is summarized in [this comment] by @eddyb. Briefly, 32-bit x86 has
special alignment rules for `byval` parameters: for the most part, their
alignment is forced to 4. This is not well-documented anywhere but in the Clang
source. I looked at the logic in Clang `TargetInfo.cpp` and tried to replicate
it here. The relevant methods in that file are
`X86_32ABIInfo::getIndirectResult()` and
`X86_32ABIInfo::getTypeStackAlignInBytes()`. The `align` parameter attribute
for `byval` parameters in LLVM must match the platform ABI, or miscompilations
will occur. Note that this doesn't use the approach suggested by eddyb, because
I felt it was overkill to store the alignment in `on_stack` when special
handling is really only needed for 32-bit x86.

As a side effect, this should fix #80127, because it will make the `align`
parameter attribute for `byval` parameters match the platform ABI on LLVM
x86-64.

[this comment]: https://github.com/rust-lang/rust/pull/80822#issuecomment-829985417
This commit is contained in:
Patrick Walton 2022-10-31 20:38:40 -07:00 committed by Erik Desjardins
parent 8ca44ef9ca
commit 0becc89d4a
11 changed files with 208 additions and 17 deletions

View File

@ -10,7 +10,7 @@ fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
fn classify_arg<Ty>(arg: &mut ArgAbi<'_, Ty>) {
if arg.layout.is_aggregate() {
arg.make_indirect_byval();
arg.make_indirect_byval(None);
} else {
arg.extend_integer_width_to(32);
}

View File

@ -494,9 +494,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
.set(ArgAttribute::NonNull)
.set(ArgAttribute::NoUndef);
attrs.pointee_size = layout.size;
// FIXME(eddyb) We should be doing this, but at least on
// i686-pc-windows-msvc, it results in wrong stack offsets.
// attrs.pointee_align = Some(layout.align.abi);
attrs.pointee_align = Some(layout.align.abi);
let extra_attrs = layout.is_unsized().then_some(ArgAttributes::new());
@ -513,11 +511,19 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
self.mode = Self::indirect_pass_mode(&self.layout);
}
pub fn make_indirect_byval(&mut self) {
pub fn make_indirect_byval(&mut self, byval_align: Option<Align>) {
self.make_indirect();
match self.mode {
PassMode::Indirect { attrs: _, extra_attrs: _, ref mut on_stack } => {
PassMode::Indirect { ref mut attrs, extra_attrs: _, ref mut on_stack } => {
*on_stack = true;
// Some platforms, like 32-bit x86, change the alignment of the type when passing
// `byval`. Account for that.
if let Some(byval_align) = byval_align {
// On all targets with byval align this is currently true, so let's assert it.
debug_assert!(byval_align >= Align::from_bytes(4).unwrap());
attrs.pointee_align = Some(byval_align);
}
}
_ => unreachable!(),
}
@ -644,7 +650,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
{
if abi == spec::abi::Abi::X86Interrupt {
if let Some(arg) = self.args.first_mut() {
arg.make_indirect_byval();
// FIXME(pcwalton): This probably should use the x86 `byval` ABI...
arg.make_indirect_byval(None);
}
return Ok(());
}

View File

@ -36,7 +36,7 @@ where
{
arg.extend_integer_width_to(32);
if arg.layout.is_aggregate() && !unwrap_trivial_aggregate(cx, arg) {
arg.make_indirect_byval();
arg.make_indirect_byval(None);
}
}

View File

@ -1,5 +1,5 @@
use crate::abi::call::{ArgAttribute, FnAbi, PassMode, Reg, RegKind};
use crate::abi::{HasDataLayout, TyAbiInterface};
use crate::abi::{Align, HasDataLayout, TyAbiInterface};
use crate::spec::HasTargetSpec;
#[derive(PartialEq)]
@ -53,11 +53,38 @@ where
if arg.is_ignore() {
continue;
}
if arg.layout.is_aggregate() {
arg.make_indirect_byval();
} else {
if !arg.layout.is_aggregate() {
arg.extend_integer_width_to(32);
continue;
}
// We need to compute the alignment of the `byval` argument. The rules can be found in
// `X86_32ABIInfo::getTypeStackAlignInBytes` in Clang's `TargetInfo.cpp`. Summarized here,
// they are:
//
// 1. If the natural alignment of the type is less than or equal to 4, the alignment is 4.
//
// 2. Otherwise, on Linux, the alignment of any vector type is the natural alignment.
// (This doesn't matter here because we ensure we have an aggregate with the check above.)
//
// 3. Otherwise, on Apple platforms, the alignment of anything that contains a vector type
// is 16.
//
// 4. If none of these conditions are true, the alignment is 4.
let t = cx.target_spec();
let align_4 = Align::from_bytes(4).unwrap();
let align_16 = Align::from_bytes(16).unwrap();
let byval_align = if arg.layout.align.abi < align_4 {
align_4
} else if t.is_like_osx && arg.layout.align.abi >= align_16 {
// FIXME(pcwalton): This is dubious--we should actually be looking inside the type to
// determine if it contains SIMD vector values--but I think it's fine?
align_16
} else {
align_4
};
arg.make_indirect_byval(Some(byval_align));
}
if flavor == Flavor::FastcallOrVectorcall {

View File

@ -213,7 +213,7 @@ where
match cls_or_mem {
Err(Memory) => {
if is_arg {
arg.make_indirect_byval();
arg.make_indirect_byval(None);
} else {
// `sret` parameter thus one less integer register available
arg.make_indirect();

View File

@ -0,0 +1,56 @@
// ignore-x86
// ignore-aarch64
// ignore-aarch64_be
// ignore-arm
// ignore-armeb
// ignore-avr
// ignore-bpfel
// ignore-bpfeb
// ignore-hexagon
// ignore-mips
// ignore-mips64
// ignore-msp430
// ignore-powerpc64
// ignore-powerpc64le
// ignore-powerpc
// ignore-r600
// ignore-amdgcn
// ignore-sparc
// ignore-sparcv9
// ignore-sparcel
// ignore-s390x
// ignore-tce
// ignore-thumb
// ignore-thumbeb
// ignore-xcore
// ignore-nvptx
// ignore-nvptx64
// ignore-le32
// ignore-le64
// ignore-amdil
// ignore-amdil64
// ignore-hsail
// ignore-hsail64
// ignore-spir
// ignore-spir64
// ignore-kalimba
// ignore-shave
//
// Tests that `byval` alignment is properly specified (#80127).
// The only targets that use `byval` are m68k, wasm, x86-64, and x86. Note that
// x86 has special rules (see #103830), and it's therefore ignored here.
#[repr(C)]
#[repr(align(16))]
struct Foo {
a: [i32; 16],
}
extern "C" {
// CHECK: declare void @f({{.*}}byval(%Foo) align 16{{.*}})
fn f(foo: Foo);
}
pub fn main() {
unsafe { f(Foo { a: [1; 16] }) }
}

View File

@ -42,7 +42,7 @@ pub fn borrow_call(x: &i32, f: fn(&i32) -> &i32) -> &i32 {
f(x)
}
// CHECK: void @struct_({{%S\*|ptr}} sret(%S){{( %_0)?}}, {{%S\*|ptr}} %x)
// CHECK: void @struct_({{%S\*|ptr}} sret(%S) align 4{{( %_0)?}}, {{%S\*|ptr}} align 4 %x)
#[no_mangle]
pub fn struct_(x: S) -> S {
x
@ -51,7 +51,7 @@ pub fn struct_(x: S) -> S {
// CHECK-LABEL: @struct_call
#[no_mangle]
pub fn struct_call(x: S, f: fn(S) -> S) -> S {
// CHECK: call void %f({{%S\*|ptr}} sret(%S){{( %_0)?}}, {{%S\*|ptr}} %{{.+}})
// CHECK: call void %f({{%S\*|ptr}} sret(%S) align 4{{( %_0)?}}, {{%S\*|ptr}} align 4 %{{.+}})
f(x)
}

View File

@ -142,7 +142,7 @@ pub fn mutable_notunpin_borrow(_: &mut NotUnpin) {
pub fn notunpin_borrow(_: &NotUnpin) {
}
// CHECK: @indirect_struct({{%S\*|ptr}} noalias nocapture noundef readonly dereferenceable(32) %_1)
// CHECK: @indirect_struct({{%S\*|ptr}} noalias nocapture noundef readonly align 4 dereferenceable(32) %_1)
#[no_mangle]
pub fn indirect_struct(_: S) {
}
@ -188,7 +188,7 @@ pub fn notunpin_box(x: Box<NotUnpin>) -> Box<NotUnpin> {
x
}
// CHECK: @struct_return({{%S\*|ptr}} noalias nocapture noundef sret(%S) dereferenceable(32){{( %_0)?}})
// CHECK: @struct_return({{%S\*|ptr}} noalias nocapture noundef sret(%S) align 4 dereferenceable(32){{( %_0)?}})
#[no_mangle]
pub fn struct_return() -> S {
S {

View File

@ -0,0 +1,5 @@
include ../tools.mk
all: $(call NATIVE_STATICLIB,test)
$(RUSTC) test.rs
$(call RUN,test) || exit 1

View File

@ -0,0 +1,35 @@
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
struct TwoU64s
{
uint64_t a;
uint64_t b;
} __attribute__((aligned(16)));
struct BoolAndU32
{
bool a;
uint32_t b;
};
int32_t many_args(
void *a,
void *b,
const char *c,
uint64_t d,
bool e,
struct BoolAndU32 f,
void *g,
struct TwoU64s h,
void *i,
void *j,
void *k,
void *l,
const char *m)
{
assert(strcmp(m, "Hello world") == 0);
return 0;
}

View File

@ -0,0 +1,61 @@
// Issue #80127: Passing structs via FFI should work with explicit alignment.
use std::ffi::CString;
use std::ptr::null_mut;
#[derive(Clone, Copy, Debug, PartialEq)]
#[repr(C)]
#[repr(align(16))]
pub struct TwoU64s {
pub a: u64,
pub b: u64,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct BoolAndU32 {
pub a: bool,
pub b: u32,
}
#[link(name = "test", kind = "static")]
extern "C" {
fn many_args(
a: *mut (),
b: *mut (),
c: *const i8,
d: u64,
e: bool,
f: BoolAndU32,
g: *mut (),
h: TwoU64s,
i: *mut (),
j: *mut (),
k: *mut (),
l: *mut (),
m: *const i8,
) -> i32;
}
fn main() {
let two_u64s = TwoU64s { a: 1, b: 2 };
let bool_and_u32 = BoolAndU32 { a: true, b: 3 };
let string = CString::new("Hello world").unwrap();
unsafe {
many_args(
null_mut(),
null_mut(),
null_mut(),
4,
true,
bool_and_u32,
null_mut(),
two_u64s,
null_mut(),
null_mut(),
null_mut(),
null_mut(),
string.as_ptr(),
);
}
}