Auto merge of #80200 - mahkoh:dst-offset, r=nagisa
Optimize DST field access For struct X<T: ?Sized>(T) struct Y<T: ?Sized>(u8, T) the offset of the unsized field is 0 mem::align_of_val(&self.1) respectively. This patch changes the expression used to compute these offsets so that the optimizer can perform this optimization. Consider ```rust fn f(x: &X<dyn Any>) -> &dyn Any { &x.0 } ``` Before: ```asm test: movq %rsi, %rdx movq 16(%rsi), %rax leaq -1(%rax), %rcx negq %rax andq %rcx, %rax addq %rdi, %rax retq ``` After: ```asm test: movq %rsi, %rdx movq %rdi, %rax retq ```
This commit is contained in:
commit
dfdfaa1f04
@ -178,16 +178,8 @@ pub fn project_field<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
|
||||
// Get the alignment of the field
|
||||
let (_, unsized_align) = glue::size_and_align_of_dst(bx, field.ty, meta);
|
||||
|
||||
// Bump the unaligned offset up to the appropriate alignment using the
|
||||
// following expression:
|
||||
//
|
||||
// (unaligned offset + (align - 1)) & -align
|
||||
|
||||
// Calculate offset.
|
||||
let align_sub_1 = bx.sub(unsized_align, bx.cx().const_usize(1u64));
|
||||
let and_lhs = bx.add(unaligned_offset, align_sub_1);
|
||||
let and_rhs = bx.neg(unsized_align);
|
||||
let offset = bx.and(and_lhs, and_rhs);
|
||||
// Bump the unaligned offset up to the appropriate alignment
|
||||
let offset = round_up_const_value_to_alignment(bx, unaligned_offset, unsized_align);
|
||||
|
||||
debug!("struct_field_ptr: DST field offset: {:?}", offset);
|
||||
|
||||
@ -518,3 +510,45 @@ pub fn monomorphized_place_ty(&self, place_ref: mir::PlaceRef<'tcx>) -> Ty<'tcx>
|
||||
self.monomorphize(place_ty.ty)
|
||||
}
|
||||
}
|
||||
|
||||
fn round_up_const_value_to_alignment<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
||||
bx: &mut Bx,
|
||||
value: Bx::Value,
|
||||
align: Bx::Value,
|
||||
) -> Bx::Value {
|
||||
// In pseudo code:
|
||||
//
|
||||
// if value & (align - 1) == 0 {
|
||||
// value
|
||||
// } else {
|
||||
// (value & !(align - 1)) + align
|
||||
// }
|
||||
//
|
||||
// Usually this is written without branches as
|
||||
//
|
||||
// (value + align - 1) & !(align - 1)
|
||||
//
|
||||
// But this formula cannot take advantage of constant `value`. E.g. if `value` is known
|
||||
// at compile time to be `1`, this expression should be optimized to `align`. However,
|
||||
// optimization only holds if `align` is a power of two. Since the optimizer doesn't know
|
||||
// that `align` is a power of two, it cannot perform this optimization.
|
||||
//
|
||||
// Instead we use
|
||||
//
|
||||
// value + (-value & (align - 1))
|
||||
//
|
||||
// Since `align` is used only once, the expression can be optimized. For `value = 0`
|
||||
// its optimized to `0` even in debug mode.
|
||||
//
|
||||
// NB: The previous version of this code used
|
||||
//
|
||||
// (value + align - 1) & -align
|
||||
//
|
||||
// Even though `-align == !(align - 1)`, LLVM failed to optimize this even for
|
||||
// `value = 0`. Bug report: https://bugs.llvm.org/show_bug.cgi?id=48559
|
||||
let one = bx.const_usize(1);
|
||||
let align_minus_1 = bx.sub(align, one);
|
||||
let neg_value = bx.neg(value);
|
||||
let offset = bx.and(neg_value, align_minus_1);
|
||||
bx.add(value, offset)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user