From 5e78b9cdb39f2cd47a0518ba568e7bfd0d1f8d95 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 14 Oct 2023 12:12:54 +0000 Subject: [PATCH] Disambiguate non-deterministic constants. --- compiler/rustc_middle/src/mir/consts.rs | 34 ++++++ compiler/rustc_mir_transform/src/gvn.rs | 111 +++++++++++------- .../gvn.duplicate_slice.GVN.panic-abort.diff | 38 ++++++ .../gvn.duplicate_slice.GVN.panic-unwind.diff | 38 ++++++ tests/mir-opt/gvn.rs | 54 ++++++++- tests/mir-opt/gvn.slices.GVN.panic-abort.diff | 8 +- .../mir-opt/gvn.slices.GVN.panic-unwind.diff | 8 +- 7 files changed, 238 insertions(+), 53 deletions(-) create mode 100644 tests/mir-opt/gvn.duplicate_slice.GVN.panic-abort.diff create mode 100644 tests/mir-opt/gvn.duplicate_slice.GVN.panic-unwind.diff diff --git a/compiler/rustc_middle/src/mir/consts.rs b/compiler/rustc_middle/src/mir/consts.rs index 9e3ee3f2bd3..375870a4fb5 100644 --- a/compiler/rustc_middle/src/mir/consts.rs +++ b/compiler/rustc_middle/src/mir/consts.rs @@ -497,6 +497,40 @@ impl<'tcx> Const<'tcx> { _ => Self::Ty(c), } } + + /// Return true if any evaluation of this constant returns the same value. + pub fn is_deterministic(&self) -> bool { + // Some constants may contain pointers. We need to preserve the provenance of these + // pointers, but not all constants guarantee this: + // - valtrees purposefully do not; + // - ConstValue::Slice does not either. + match self { + Const::Ty(c) => match c.kind() { + ty::ConstKind::Value(valtree) => match valtree { + // This is just an integer, keep it. + ty::ValTree::Leaf(_) => true, + // This branch may be a reference. Valtree references correspond to a + // different allocation each time they are evaluated. + ty::ValTree::Branch(_) => false, + }, + ty::ConstKind::Param(..) => true, + ty::ConstKind::Unevaluated(..) | ty::ConstKind::Expr(..) => false, + // Should not appear in runtime MIR. + ty::ConstKind::Infer(..) + | ty::ConstKind::Bound(..) + | ty::ConstKind::Placeholder(..) + | ty::ConstKind::Error(..) => bug!(), + }, + Const::Unevaluated(..) => false, + // If the same slice appears twice in the MIR, we cannot guarantee that we will + // give the same `AllocId` to the data. + Const::Val(ConstValue::Slice { .. }, _) => false, + Const::Val( + ConstValue::ZeroSized | ConstValue::Scalar(_) | ConstValue::Indirect { .. }, + _, + ) => true, + } + } } /// An unevaluated (potentially generic) constant used in MIR. diff --git a/compiler/rustc_mir_transform/src/gvn.rs b/compiler/rustc_mir_transform/src/gvn.rs index 58844aed783..de61571fdfc 100644 --- a/compiler/rustc_mir_transform/src/gvn.rs +++ b/compiler/rustc_mir_transform/src/gvn.rs @@ -52,6 +52,35 @@ //! _a = *_b // _b is &Freeze //! _c = *_b // replaced by _c = _a //! ``` +//! +//! # Determinism of constant propagation +//! +//! When registering a new `Value`, we attempt to opportunistically evaluate it as a constant. +//! The evaluated form is inserted in `evaluated` as an `OpTy` or `None` if evaluation failed. +//! +//! The difficulty is non-deterministic evaluation of MIR constants. Some `Const` can have +//! different runtime values each time they are evaluated. This is the case with +//! `Const::Slice` which have a new pointer each time they are evaluated, and constants that +//! contain a fn pointer (`AllocId` pointing to a `GlobalAlloc::Function`) pointing to a different +//! symbol in each codegen unit. +//! +//! Meanwhile, we want to be able to read indirect constants. For instance: +//! ``` +//! static A: &'static &'static u8 = &&63; +//! fn foo() -> u8 { +//! **A // We want to replace by 63. +//! } +//! fn bar() -> u8 { +//! b"abc"[1] // We want to replace by 'b'. +//! } +//! ``` +//! +//! The `Value::Constant` variant stores a possibly unevaluated constant. Evaluating that constant +//! may be non-deterministic. When that happens, we assign a disambiguator to ensure that we do not +//! merge the constants. See `duplicate_slice` test in `gvn.rs`. +//! +//! Second, when writing constants in MIR, we do not write `Const::Slice` or `Const` +//! that contain `AllocId`s. use rustc_const_eval::interpret::{intern_const_alloc_for_constprop, MemoryKind}; use rustc_const_eval::interpret::{ImmTy, InterpCx, OpTy, Projectable, Scalar}; @@ -161,7 +190,12 @@ enum Value<'tcx> { /// The `usize` is a counter incremented by `new_opaque`. Opaque(usize), /// Evaluated or unevaluated constant value. - Constant(Const<'tcx>), + Constant { + value: Const<'tcx>, + /// Some constants do not have a deterministic value. To avoid merging two instances of the + /// same `Const`, we assign them an additional integer index. + disambiguator: usize, + }, /// An aggregate value, either tuple/closure/struct/enum. /// This does not contain unions, as we cannot reason with the value. Aggregate(AggregateTy<'tcx>, VariantIdx, Vec), @@ -288,8 +322,24 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { } } + fn insert_constant(&mut self, value: Const<'tcx>) -> Option { + let disambiguator = if value.is_deterministic() { + // The constant is deterministic, no need to disambiguate. + 0 + } else { + // Multiple mentions of this constant will yield different values, + // so assign a different `disambiguator` to ensure they do not get the same `VnIndex`. + let next_opaque = self.next_opaque.as_mut()?; + let disambiguator = *next_opaque; + *next_opaque += 1; + disambiguator + }; + Some(self.insert(Value::Constant { value, disambiguator })) + } + fn insert_scalar(&mut self, scalar: Scalar, ty: Ty<'tcx>) -> VnIndex { - self.insert(Value::Constant(Const::from_scalar(self.tcx, scalar, ty))) + self.insert_constant(Const::from_scalar(self.tcx, scalar, ty)) + .expect("scalars are deterministic") } #[instrument(level = "trace", skip(self), ret)] @@ -300,7 +350,9 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { // Do not bother evaluating repeat expressions. This would uselessly consume memory. Repeat(..) => return None, - Constant(ref constant) => self.ecx.eval_mir_constant(constant, None, None).ok()?, + Constant { ref value, disambiguator: _ } => { + self.ecx.eval_mir_constant(value, None, None).ok()? + } Aggregate(kind, variant, ref fields) => { let fields = fields .iter() @@ -657,7 +709,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { match *operand { Operand::Constant(ref mut constant) => { let const_ = constant.const_.normalize(self.tcx, self.param_env); - Some(self.insert(Value::Constant(const_))) + self.insert_constant(const_) } Operand::Copy(ref mut place) | Operand::Move(ref mut place) => { let value = self.simplify_place_value(place, location)?; @@ -691,7 +743,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { Value::Repeat(op, amount) } Rvalue::NullaryOp(op, ty) => Value::NullaryOp(op, ty), - Rvalue::Aggregate(..) => self.simplify_aggregate(rvalue, location)?, + Rvalue::Aggregate(..) => return self.simplify_aggregate(rvalue, location), Rvalue::Ref(_, borrow_kind, ref mut place) => { self.simplify_place_projection(place, location); return self.new_pointer(*place, AddressKind::Ref(borrow_kind)); @@ -757,7 +809,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { &mut self, rvalue: &mut Rvalue<'tcx>, location: Location, - ) -> Option> { + ) -> Option { let Rvalue::Aggregate(box ref kind, ref mut fields) = *rvalue else { bug!() }; let tcx = self.tcx; @@ -774,8 +826,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { if is_zst { let ty = rvalue.ty(self.local_decls, tcx); - let value = Value::Constant(Const::zero_sized(ty)); - return Some(value); + return self.insert_constant(Const::zero_sized(ty)); } } @@ -814,12 +865,11 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { *rvalue = Rvalue::Repeat(Operand::Copy(local.into()), len); self.reused_locals.insert(local); } - return Some(Value::Repeat(first, len)); + return Some(self.insert(Value::Repeat(first, len))); } } - let value = Value::Aggregate(ty, variant_index, fields); - Some(value) + Some(self.insert(Value::Aggregate(ty, variant_index, fields))) } } @@ -882,39 +932,12 @@ impl<'tcx> VnState<'_, 'tcx> { /// If `index` is a `Value::Constant`, return the `Constant` to be put in the MIR. fn try_as_constant(&mut self, index: VnIndex) -> Option> { // This was already constant in MIR, do not change it. - if let Value::Constant(const_) = *self.get(index) { - // Some constants may contain pointers. We need to preserve the provenance of these - // pointers, but not all constants guarantee this: - // - valtrees purposefully do not; - // - ConstValue::Slice does not either. - let const_ok = match const_ { - Const::Ty(c) => match c.kind() { - ty::ConstKind::Value(valtree) => match valtree { - // This is just an integer, keep it. - ty::ValTree::Leaf(_) => true, - ty::ValTree::Branch(_) => false, - }, - ty::ConstKind::Param(..) - | ty::ConstKind::Unevaluated(..) - | ty::ConstKind::Expr(..) => true, - // Should not appear in runtime MIR. - ty::ConstKind::Infer(..) - | ty::ConstKind::Bound(..) - | ty::ConstKind::Placeholder(..) - | ty::ConstKind::Error(..) => bug!(), - }, - Const::Unevaluated(..) => true, - // If the same slice appears twice in the MIR, we cannot guarantee that we will - // give the same `AllocId` to the data. - Const::Val(ConstValue::Slice { .. }, _) => false, - Const::Val( - ConstValue::ZeroSized | ConstValue::Scalar(_) | ConstValue::Indirect { .. }, - _, - ) => true, - }; - if const_ok { - return Some(ConstOperand { span: rustc_span::DUMMY_SP, user_ty: None, const_ }); - } + if let Value::Constant { value, disambiguator: _ } = *self.get(index) + // If the constant is not deterministic, adding an additional mention of it in MIR will + // not give the same value as the former mention. + && value.is_deterministic() + { + return Some(ConstOperand { span: rustc_span::DUMMY_SP, user_ty: None, const_: value }); } let op = self.evaluated[index].as_ref()?; diff --git a/tests/mir-opt/gvn.duplicate_slice.GVN.panic-abort.diff b/tests/mir-opt/gvn.duplicate_slice.GVN.panic-abort.diff new file mode 100644 index 00000000000..7ae1fae68e8 --- /dev/null +++ b/tests/mir-opt/gvn.duplicate_slice.GVN.panic-abort.diff @@ -0,0 +1,38 @@ +- // MIR for `duplicate_slice` before GVN ++ // MIR for `duplicate_slice` after GVN + + fn duplicate_slice() -> (bool, bool) { + let mut _0: (bool, bool); + let mut _1: u128; + let mut _2: u128; + let mut _3: u128; + let mut _4: u128; + let mut _5: &str; + let mut _6: &str; + let mut _7: (&str,); + let mut _8: &str; + let mut _9: bool; + let mut _10: bool; + + bb0: { + _7 = (const "a",); + _1 = (_7.0: &str) as u128 (Transmute); + _5 = identity::<&str>((_7.0: &str)) -> [return: bb1, unwind unreachable]; + } + + bb1: { + _3 = _5 as u128 (Transmute); + _8 = const "a"; + _2 = _8 as u128 (Transmute); + _6 = identity::<&str>(_8) -> [return: bb2, unwind unreachable]; + } + + bb2: { + _4 = _6 as u128 (Transmute); + _9 = Eq(_1, _2); + _10 = Eq(_3, _4); + _0 = (_9, _10); + return; + } + } + diff --git a/tests/mir-opt/gvn.duplicate_slice.GVN.panic-unwind.diff b/tests/mir-opt/gvn.duplicate_slice.GVN.panic-unwind.diff new file mode 100644 index 00000000000..8c96edaa280 --- /dev/null +++ b/tests/mir-opt/gvn.duplicate_slice.GVN.panic-unwind.diff @@ -0,0 +1,38 @@ +- // MIR for `duplicate_slice` before GVN ++ // MIR for `duplicate_slice` after GVN + + fn duplicate_slice() -> (bool, bool) { + let mut _0: (bool, bool); + let mut _1: u128; + let mut _2: u128; + let mut _3: u128; + let mut _4: u128; + let mut _5: &str; + let mut _6: &str; + let mut _7: (&str,); + let mut _8: &str; + let mut _9: bool; + let mut _10: bool; + + bb0: { + _7 = (const "a",); + _1 = (_7.0: &str) as u128 (Transmute); + _5 = identity::<&str>((_7.0: &str)) -> [return: bb1, unwind continue]; + } + + bb1: { + _3 = _5 as u128 (Transmute); + _8 = const "a"; + _2 = _8 as u128 (Transmute); + _6 = identity::<&str>(_8) -> [return: bb2, unwind continue]; + } + + bb2: { + _4 = _6 as u128 (Transmute); + _9 = Eq(_1, _2); + _10 = Eq(_3, _4); + _0 = (_9, _10); + return; + } + } + diff --git a/tests/mir-opt/gvn.rs b/tests/mir-opt/gvn.rs index 406fe106add..55de186edf1 100644 --- a/tests/mir-opt/gvn.rs +++ b/tests/mir-opt/gvn.rs @@ -1,11 +1,17 @@ // skip-filecheck // unit-test: GVN // EMIT_MIR_FOR_EACH_PANIC_STRATEGY +// only-64bit #![feature(raw_ref_op)] #![feature(rustc_attrs)] +#![feature(custom_mir)] +#![feature(core_intrinsics)] #![allow(unconditional_panic)] +use std::intrinsics::mir::*; +use std::mem::transmute; + struct S(T); fn subexpression_elimination(x: u64, y: u64, mut z: u64) { @@ -225,11 +231,49 @@ fn slices() { let t = s; // This should be the same pointer, so cannot be a `Const::Slice`. opaque(t); assert_eq!(s.as_ptr(), t.as_ptr()); - let u = unsafe { std::mem::transmute::<&str, &[u8]>(s) }; + let u = unsafe { transmute::<&str, &[u8]>(s) }; opaque(u); assert_eq!(s.as_ptr(), u.as_ptr()); } +#[custom_mir(dialect = "analysis")] +fn duplicate_slice() -> (bool, bool) { + mir!( + let au: u128; + let bu: u128; + let cu: u128; + let du: u128; + let c: &str; + let d: &str; + { + let a = ("a",); + Call(au = transmute::<_, u128>(a.0), bb1) + } + bb1 = { + Call(c = identity(a.0), bb2) + } + bb2 = { + Call(cu = transmute::<_, u128>(c), bb3) + } + bb3 = { + let b = "a"; // This slice is different from `a.0`. + Call(bu = transmute::<_, u128>(b), bb4) // Hence `bu` is not `au`. + } + bb4 = { + Call(d = identity(b), bb5) // This returns a copy of `b`, which is not `a`. + } + bb5 = { + Call(du = transmute::<_, u128>(d), bb6) + } + bb6 = { + let direct = au == bu; // Must not fold to `true`... + let indirect = cu == du; // ...as this will not. + RET = (direct, indirect); + Return() + } + ) +} + fn aggregates() { let a_array: S<[u8; 0]> = S([]); let b_array: S<[u16; 0]> = S([]); // This must not be merged with `a_array`. @@ -253,12 +297,19 @@ fn main() { references(5); dereferences(&mut 5, &6, &S(7)); slices(); + let (direct, indirect) = duplicate_slice(); + assert_eq!(direct, indirect); aggregates(); } #[inline(never)] fn opaque(_: impl Sized) {} +#[inline(never)] +fn identity(x: T) -> T { + x +} + // EMIT_MIR gvn.subexpression_elimination.GVN.diff // EMIT_MIR gvn.wrap_unwrap.GVN.diff // EMIT_MIR gvn.repeated_index.GVN.diff @@ -270,4 +321,5 @@ fn opaque(_: impl Sized) {} // EMIT_MIR gvn.references.GVN.diff // EMIT_MIR gvn.dereferences.GVN.diff // EMIT_MIR gvn.slices.GVN.diff +// EMIT_MIR gvn.duplicate_slice.GVN.diff // EMIT_MIR gvn.aggregates.GVN.diff diff --git a/tests/mir-opt/gvn.slices.GVN.panic-abort.diff b/tests/mir-opt/gvn.slices.GVN.panic-abort.diff index 9db6e068fa7..ec449980312 100644 --- a/tests/mir-opt/gvn.slices.GVN.panic-abort.diff +++ b/tests/mir-opt/gvn.slices.GVN.panic-abort.diff @@ -207,8 +207,8 @@ _26 = &(*_27); StorageLive(_28); _28 = Option::>::None; -- _22 = core::panicking::assert_failed::<*const u8, *const u8>(move _23, move _24, move _26, move _28) -> unwind unreachable; -+ _22 = core::panicking::assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _24, move _26, move _28) -> unwind unreachable; +- _22 = assert_failed::<*const u8, *const u8>(move _23, move _24, move _26, move _28) -> unwind unreachable; ++ _22 = assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _24, move _26, move _28) -> unwind unreachable; } bb7: { @@ -306,8 +306,8 @@ _52 = &(*_53); StorageLive(_54); _54 = Option::>::None; -- _48 = core::panicking::assert_failed::<*const u8, *const u8>(move _49, move _50, move _52, move _54) -> unwind unreachable; -+ _48 = core::panicking::assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _50, move _52, move _54) -> unwind unreachable; +- _48 = assert_failed::<*const u8, *const u8>(move _49, move _50, move _52, move _54) -> unwind unreachable; ++ _48 = assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _50, move _52, move _54) -> unwind unreachable; } } diff --git a/tests/mir-opt/gvn.slices.GVN.panic-unwind.diff b/tests/mir-opt/gvn.slices.GVN.panic-unwind.diff index ac7a3e74688..56a78ca8694 100644 --- a/tests/mir-opt/gvn.slices.GVN.panic-unwind.diff +++ b/tests/mir-opt/gvn.slices.GVN.panic-unwind.diff @@ -207,8 +207,8 @@ _26 = &(*_27); StorageLive(_28); _28 = Option::>::None; -- _22 = core::panicking::assert_failed::<*const u8, *const u8>(move _23, move _24, move _26, move _28) -> unwind continue; -+ _22 = core::panicking::assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _24, move _26, move _28) -> unwind continue; +- _22 = assert_failed::<*const u8, *const u8>(move _23, move _24, move _26, move _28) -> unwind continue; ++ _22 = assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _24, move _26, move _28) -> unwind continue; } bb7: { @@ -306,8 +306,8 @@ _52 = &(*_53); StorageLive(_54); _54 = Option::>::None; -- _48 = core::panicking::assert_failed::<*const u8, *const u8>(move _49, move _50, move _52, move _54) -> unwind continue; -+ _48 = core::panicking::assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _50, move _52, move _54) -> unwind continue; +- _48 = assert_failed::<*const u8, *const u8>(move _49, move _50, move _52, move _54) -> unwind continue; ++ _48 = assert_failed::<*const u8, *const u8>(const core::panicking::AssertKind::Eq, move _50, move _52, move _54) -> unwind continue; } }