rust/tests/assembly/align_offset.rs

//@ assembly-output: emit-asm
//@ compile-flags: -Copt-level=1
//@ only-x86_64
#![crate_type = "rlib"]

// CHECK-LABEL: align_offset_byte_ptr
// CHECK: leaq 31
// CHECK: andq $-32
// CHECK: subq
#[no_mangle]
pub fn align_offset_byte_ptr(ptr: *const u8) -> usize {
    ptr.align_offset(32)
}

// CHECK-LABEL: align_offset_byte_slice
// CHECK: leaq 31
// CHECK: andq $-32
// CHECK: subq
#[no_mangle]
pub fn align_offset_byte_slice(slice: &[u8]) -> usize {
    slice.as_ptr().align_offset(32)
}

// CHECK-LABEL: align_offset_word_ptr
// CHECK: leaq 31
// CHECK: andq $-32
// CHECK: subq
// CHECK: shrq
// This `ptr` is not known to be aligned, so it is required to check if it is at all possible to
// align. LLVM applies a simple mask.
// CHECK: orq
#[no_mangle]
pub fn align_offset_word_ptr(ptr: *const u32) -> usize {
    ptr.align_offset(32)
}

// CHECK-LABEL: align_offset_word_slice
// CHECK: leaq 31
// CHECK: andq $-32
// CHECK: subq
// CHECK: shrq
// `slice` is known to be aligned, so `!0` is not possible as a return
// CHECK-NOT: orq
#[no_mangle]
pub fn align_offset_word_slice(slice: &[u32]) -> usize {
    slice.as_ptr().align_offset(32)
}
Add a special case for align_offset /w stride != 1 This generalizes the previous `stride == 1` special case to apply to any situation where the requested alignment is divisible by the stride. This in turn allows the test case from #98809 produce ideal assembly, along the lines of: leaq 15(%rdi), %rax andq $-16, %rax This also produces pretty high quality code for situations where the alignment of the input pointer isn’t known: pub unsafe fn ptr_u32(slice: const u32) -> const u32 { slice.offset(slice.align_offset(16) as isize) } // => movl %edi, %eax andl $3, %eax leaq 15(%rdi), %rcx andq $-16, %rcx subq %rdi, %rcx shrq $2, %rcx negq %rax sbbq %rax, %rax orq %rcx, %rax leaq (%rdi,%rax,4), %rax Here LLVM is smart enough to replace the `usize::MAX` special case with a branch-less bitwise-OR approach, where the mask is constructed using the neg and sbb instructions. This appears to work across various architectures I’ve tried. This change ends up introducing more branches and code in situations where there is less knowledge of the arguments. For example when the requested alignment is entirely unknown. This use-case was never really a focus of this function, so I’m not particularly worried, especially since llvm-mca is saying that the new code is still appreciably faster, despite all the new branching. Fixes #98809. Sadly, this does not help with #72356. 2022-07-03 16:23:31 -05:00			`//@ assembly-output: emit-asm`
			`//@ compile-flags: -Copt-level=1`
			`//@ only-x86_64`
Run rustfmt on `tests/assembly/`. 2024-05-28 22:57:23 -05:00			`#![crate_type = "rlib"]`
Add a special case for align_offset /w stride != 1 This generalizes the previous `stride == 1` special case to apply to any situation where the requested alignment is divisible by the stride. This in turn allows the test case from #98809 produce ideal assembly, along the lines of: leaq 15(%rdi), %rax andq $-16, %rax This also produces pretty high quality code for situations where the alignment of the input pointer isn’t known: pub unsafe fn ptr_u32(slice: const u32) -> const u32 { slice.offset(slice.align_offset(16) as isize) } // => movl %edi, %eax andl $3, %eax leaq 15(%rdi), %rcx andq $-16, %rcx subq %rdi, %rcx shrq $2, %rcx negq %rax sbbq %rax, %rax orq %rcx, %rax leaq (%rdi,%rax,4), %rax Here LLVM is smart enough to replace the `usize::MAX` special case with a branch-less bitwise-OR approach, where the mask is constructed using the neg and sbb instructions. This appears to work across various architectures I’ve tried. This change ends up introducing more branches and code in situations where there is less knowledge of the arguments. For example when the requested alignment is entirely unknown. This use-case was never really a focus of this function, so I’m not particularly worried, especially since llvm-mca is saying that the new code is still appreciably faster, despite all the new branching. Fixes #98809. Sadly, this does not help with #72356. 2022-07-03 16:23:31 -05:00
			`// CHECK-LABEL: align_offset_byte_ptr`
			`// CHECK: leaq 31`
			`// CHECK: andq $-32`
			`// CHECK: subq`
			`#[no_mangle]`
			`pub fn align_offset_byte_ptr(ptr: *const u8) -> usize {`
			`ptr.align_offset(32)`
			`}`

			`// CHECK-LABEL: align_offset_byte_slice`
			`// CHECK: leaq 31`
			`// CHECK: andq $-32`
			`// CHECK: subq`
			`#[no_mangle]`
			`pub fn align_offset_byte_slice(slice: &[u8]) -> usize {`
			`slice.as_ptr().align_offset(32)`
			`}`

			`// CHECK-LABEL: align_offset_word_ptr`
			`// CHECK: leaq 31`
			`// CHECK: andq $-32`
			`// CHECK: subq`
			`// CHECK: shrq`
			// This `ptr` is not known to be aligned, so it is required to check if it is at all possible to
			`// align. LLVM applies a simple mask.`
			`// CHECK: orq`
			`#[no_mangle]`
			`pub fn align_offset_word_ptr(ptr: *const u32) -> usize {`
			`ptr.align_offset(32)`
			`}`

			`// CHECK-LABEL: align_offset_word_slice`
			`// CHECK: leaq 31`
			`// CHECK: andq $-32`
			`// CHECK: subq`
			`// CHECK: shrq`
			// `slice` is known to be aligned, so `!0` is not possible as a return
			`// CHECK-NOT: orq`
			`#[no_mangle]`
			`pub fn align_offset_word_slice(slice: &[u32]) -> usize {`
			`slice.as_ptr().align_offset(32)`
			`}`