Auto merge of #77257 - ecstatic-morse:optimize-int-range-from-pat, r=Mark-Simulacrum
Optimize `IntRange::from_pat`, then shrink `ParamEnv` Resolves #77058. r? `@Mark-Simulacrum` cc `@vandenheuvel` Looking at the output of `perf report` for #76244, the hot instructions seemed to be around the call to `pat_constructor` in `IntRange::from_pat`. I carried out an obvious optimization, but it actually made the instruction count higher (see #77075). However, it seems to have mitigated whatever was causing the pipeline stalls, so when combined with #76244, it's a net win. As you can see below, the regression in #76244 seems to have originated from something measured by `stalled-cycles-backend`. I'll try to collect some finer-grained stats to see if I can isolate it. I wish I had a better idea of what was going on here. I'd like to prevent the regression from reappearing in the future due to small changes in unrelated code. <details> <summary>Current `master`:</summary> ``` Performance counter stats for 'cargo +baseline-stage1 check': 2,275.67 msec task-clock:u # 0.998 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 49,826 page-faults:u # 0.022 M/sec 5,117,221,678 cycles:u # 2.249 GHz 299,655,943 stalled-cycles-frontend:u # 5.86% frontend cycles idle 2,284,213,395 stalled-cycles-backend:u # 44.64% backend cycles idle 8,051,871,959 instructions:u # 1.57 insn per cycle # 0.28 stalled cycles per insn 1,359,589,402 branches:u # 597.447 M/sec 7,359,347 branch-misses:u # 0.54% of all branches 2.281030026 seconds time elapsed 2.108197000 seconds user 0.164183000 seconds sys ``` </details> <details> <summary>Shrink `ParamEnv` without changing `IntRange::from_pat`:</summary> ``` Performance counter stats for 'cargo +perf-stage1 check': 2,751.79 msec task-clock:u # 0.996 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 50,103 page-faults:u # 0.018 M/sec 6,260,590,019 cycles:u # 2.275 GHz 317,355,920 stalled-cycles-frontend:u # 5.07% frontend cycles idle 3,397,743,582 stalled-cycles-backend:u # 54.27% backend cycles idle 8,276,224,367 instructions:u # 1.32 insn per cycle # 0.41 stalled cycles per insn 1,370,453,386 branches:u # 498.023 M/sec 7,281,031 branch-misses:u # 0.53% of all branches 2.763265838 seconds time elapsed 2.544578000 seconds user 0.204548000 seconds sys ``` </details> <details> <summary>Shrink `ParamEnv` and change `IntRange::from_pat`: </summary> ``` Performance counter stats for 'cargo +perf-stage1 check': 2,295.57 msec task-clock:u # 0.996 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 49,959 page-faults:u # 0.022 M/sec 5,151,407,066 cycles:u # 2.244 GHz 324,517,829 stalled-cycles-frontend:u # 6.30% frontend cycles idle 2,301,671,001 stalled-cycles-backend:u # 44.68% backend cycles idle 8,130,868,329 instructions:u # 1.58 insn per cycle # 0.28 stalled cycles per insn 1,356,618,512 branches:u # 590.972 M/sec 7,323,800 branch-misses:u # 0.54% of all branches 2.304509653 seconds time elapsed 2.128090000 seconds user 0.163909000 seconds sys ``` </details>
This commit is contained in:
commit
48cab67447
@ -57,7 +57,7 @@ pub type TraitObligation<'tcx> = Obligation<'tcx, ty::PolyTraitPredicate<'tcx>>;
|
||||
|
||||
// `PredicateObligation` is used a lot. Make sure it doesn't unintentionally get bigger.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
static_assert_size!(PredicateObligation<'_>, 40);
|
||||
static_assert_size!(PredicateObligation<'_>, 32);
|
||||
|
||||
pub type Obligations<'tcx, O> = Vec<Obligation<'tcx, O>>;
|
||||
pub type PredicateObligations<'tcx> = Vec<PredicateObligation<'tcx>>;
|
||||
|
@ -1751,9 +1751,6 @@ pub struct ParamEnv<'tcx> {
|
||||
///
|
||||
/// Note: This is packed, use the reveal() method to access it.
|
||||
packed: CopyTaggedPtr<&'tcx List<Predicate<'tcx>>, traits::Reveal, true>,
|
||||
|
||||
/// FIXME: This field is not used, but removing it causes a performance degradation. See #76913.
|
||||
unused_field: Option<DefId>,
|
||||
}
|
||||
|
||||
unsafe impl rustc_data_structures::tagged_ptr::Tag for traits::Reveal {
|
||||
@ -1834,7 +1831,7 @@ impl<'tcx> ParamEnv<'tcx> {
|
||||
/// Construct a trait environment with the given set of predicates.
|
||||
#[inline]
|
||||
pub fn new(caller_bounds: &'tcx List<Predicate<'tcx>>, reveal: Reveal) -> Self {
|
||||
ty::ParamEnv { packed: CopyTaggedPtr::new(caller_bounds, reveal), unused_field: None }
|
||||
ty::ParamEnv { packed: CopyTaggedPtr::new(caller_bounds, reveal) }
|
||||
}
|
||||
|
||||
pub fn with_user_facing(mut self) -> Self {
|
||||
|
@ -1787,9 +1787,32 @@ impl<'tcx> IntRange<'tcx> {
|
||||
param_env: ty::ParamEnv<'tcx>,
|
||||
pat: &Pat<'tcx>,
|
||||
) -> Option<IntRange<'tcx>> {
|
||||
match pat_constructor(tcx, param_env, pat)? {
|
||||
IntRange(range) => Some(range),
|
||||
_ => None,
|
||||
// This MUST be kept in sync with `pat_constructor`.
|
||||
match *pat.kind {
|
||||
PatKind::AscribeUserType { .. } => bug!(), // Handled by `expand_pattern`
|
||||
PatKind::Or { .. } => bug!("Or-pattern should have been expanded earlier on."),
|
||||
|
||||
PatKind::Binding { .. }
|
||||
| PatKind::Wild
|
||||
| PatKind::Leaf { .. }
|
||||
| PatKind::Deref { .. }
|
||||
| PatKind::Variant { .. }
|
||||
| PatKind::Array { .. }
|
||||
| PatKind::Slice { .. } => None,
|
||||
|
||||
PatKind::Constant { value } => Self::from_const(tcx, param_env, value, pat.span),
|
||||
|
||||
PatKind::Range(PatRange { lo, hi, end }) => {
|
||||
let ty = lo.ty;
|
||||
Self::from_range(
|
||||
tcx,
|
||||
lo.eval_bits(tcx, param_env, lo.ty),
|
||||
hi.eval_bits(tcx, param_env, hi.ty),
|
||||
ty,
|
||||
&end,
|
||||
pat.span,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2196,6 +2219,7 @@ fn pat_constructor<'tcx>(
|
||||
param_env: ty::ParamEnv<'tcx>,
|
||||
pat: &Pat<'tcx>,
|
||||
) -> Option<Constructor<'tcx>> {
|
||||
// This MUST be kept in sync with `IntRange::from_pat`.
|
||||
match *pat.kind {
|
||||
PatKind::AscribeUserType { .. } => bug!(), // Handled by `expand_pattern`
|
||||
PatKind::Binding { .. } | PatKind::Wild => None,
|
||||
|
@ -87,7 +87,7 @@ pub struct PendingPredicateObligation<'tcx> {
|
||||
|
||||
// `PendingPredicateObligation` is used a lot. Make sure it doesn't unintentionally get bigger.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
static_assert_size!(PendingPredicateObligation<'_>, 64);
|
||||
static_assert_size!(PendingPredicateObligation<'_>, 56);
|
||||
|
||||
impl<'a, 'tcx> FulfillmentContext<'tcx> {
|
||||
/// Creates a new fulfillment context.
|
||||
|
Loading…
x
Reference in New Issue
Block a user