From 3b7e4bc77a83c1101e8799739504b0a53f0d0619 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Thu, 18 Jul 2024 20:27:15 +1000 Subject: [PATCH 1/5] Split out `remove_never_subcandidates` --- .../rustc_mir_build/src/build/matches/mod.rs | 84 +++++++++++-------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/compiler/rustc_mir_build/src/build/matches/mod.rs b/compiler/rustc_mir_build/src/build/matches/mod.rs index b531a392efa..6bd2c308128 100644 --- a/compiler/rustc_mir_build/src/build/matches/mod.rs +++ b/compiler/rustc_mir_build/src/build/matches/mod.rs @@ -1706,6 +1706,7 @@ fn finalize_or_candidate( } self.merge_trivial_subcandidates(candidate); + self.remove_never_subcandidates(candidate); if !candidate.match_pairs.is_empty() { let or_span = candidate.or_span.unwrap_or(candidate.extra_data.span); @@ -1753,44 +1754,53 @@ fn merge_trivial_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) { let can_merge = candidate.subcandidates.iter().all(|subcandidate| { subcandidate.subcandidates.is_empty() && subcandidate.extra_data.is_empty() }); - if can_merge { - let mut last_otherwise = None; - let any_matches = self.cfg.start_new_block(); - let or_span = candidate.or_span.take().unwrap(); - let source_info = self.source_info(or_span); - if candidate.false_edge_start_block.is_none() { - candidate.false_edge_start_block = - candidate.subcandidates[0].false_edge_start_block; - } - for subcandidate in mem::take(&mut candidate.subcandidates) { - let or_block = subcandidate.pre_binding_block.unwrap(); - self.cfg.goto(or_block, source_info, any_matches); - last_otherwise = subcandidate.otherwise_block; - } - candidate.pre_binding_block = Some(any_matches); - assert!(last_otherwise.is_some()); - candidate.otherwise_block = last_otherwise; - } else { - // Never subcandidates may have a set of bindings inconsistent with their siblings, - // which would break later code. So we filter them out. Note that we can't filter out - // top-level candidates this way. - candidate.subcandidates.retain_mut(|candidate| { - if candidate.extra_data.is_never { - candidate.visit_leaves(|subcandidate| { - let block = subcandidate.pre_binding_block.unwrap(); - // That block is already unreachable but needs a terminator to make the MIR well-formed. - let source_info = self.source_info(subcandidate.extra_data.span); - self.cfg.terminate(block, source_info, TerminatorKind::Unreachable); - }); - false - } else { - true - } - }); - if candidate.subcandidates.is_empty() { - // If `candidate` has become a leaf candidate, ensure it has a `pre_binding_block`. - candidate.pre_binding_block = Some(self.cfg.start_new_block()); + if !can_merge { + return; + } + + let mut last_otherwise = None; + let any_matches = self.cfg.start_new_block(); + let or_span = candidate.or_span.take().unwrap(); + let source_info = self.source_info(or_span); + + if candidate.false_edge_start_block.is_none() { + candidate.false_edge_start_block = candidate.subcandidates[0].false_edge_start_block; + } + + for subcandidate in mem::take(&mut candidate.subcandidates) { + let or_block = subcandidate.pre_binding_block.unwrap(); + self.cfg.goto(or_block, source_info, any_matches); + last_otherwise = subcandidate.otherwise_block; + } + candidate.pre_binding_block = Some(any_matches); + assert!(last_otherwise.is_some()); + candidate.otherwise_block = last_otherwise; + } + + /// Never subcandidates may have a set of bindings inconsistent with their siblings, + /// which would break later code. So we filter them out. Note that we can't filter out + /// top-level candidates this way. + fn remove_never_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) { + if candidate.subcandidates.is_empty() { + return; + } + + candidate.subcandidates.retain_mut(|candidate| { + if candidate.extra_data.is_never { + candidate.visit_leaves(|subcandidate| { + let block = subcandidate.pre_binding_block.unwrap(); + // That block is already unreachable but needs a terminator to make the MIR well-formed. + let source_info = self.source_info(subcandidate.extra_data.span); + self.cfg.terminate(block, source_info, TerminatorKind::Unreachable); + }); + false + } else { + true } + }); + if candidate.subcandidates.is_empty() { + // If `candidate` has become a leaf candidate, ensure it has a `pre_binding_block`. + candidate.pre_binding_block = Some(self.cfg.start_new_block()); } } From e091c356fa0e47251f6c7d11cbc7043213df48c6 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Thu, 18 Jul 2024 20:58:52 +1000 Subject: [PATCH 2/5] Improve `merge_trivial_subcandidates` --- .../rustc_mir_build/src/build/matches/mod.rs | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_mir_build/src/build/matches/mod.rs b/compiler/rustc_mir_build/src/build/matches/mod.rs index 6bd2c308128..d18e59759e2 100644 --- a/compiler/rustc_mir_build/src/build/matches/mod.rs +++ b/compiler/rustc_mir_build/src/build/matches/mod.rs @@ -1744,8 +1744,12 @@ fn finalize_or_candidate( /// Try to merge all of the subcandidates of the given candidate into one. This avoids /// exponentially large CFGs in cases like `(1 | 2, 3 | 4, ...)`. The candidate should have been /// expanded with `create_or_subcandidates`. + /// + /// Note that this takes place _after_ the subcandidates have participated + /// in match tree lowering. fn merge_trivial_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) { - if candidate.subcandidates.is_empty() || candidate.has_guard { + assert!(!candidate.subcandidates.is_empty()); + if candidate.has_guard { // FIXME(or_patterns; matthewjasper) Don't give up if we have a guard. return; } @@ -1759,7 +1763,8 @@ fn merge_trivial_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) { } let mut last_otherwise = None; - let any_matches = self.cfg.start_new_block(); + let shared_pre_binding_block = self.cfg.start_new_block(); + // This candidate is about to become a leaf, so unset `or_span`. let or_span = candidate.or_span.take().unwrap(); let source_info = self.source_info(or_span); @@ -1767,12 +1772,17 @@ fn merge_trivial_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) { candidate.false_edge_start_block = candidate.subcandidates[0].false_edge_start_block; } + // Remove the (known-trivial) subcandidates from the candidate tree, + // so that they aren't visible after match tree lowering, and wire them + // all to join up at a single shared pre-binding block. + // (Note that the subcandidates have already had their part of the match + // tree lowered by this point, which is why we can add a goto to them.) for subcandidate in mem::take(&mut candidate.subcandidates) { - let or_block = subcandidate.pre_binding_block.unwrap(); - self.cfg.goto(or_block, source_info, any_matches); + let subcandidate_block = subcandidate.pre_binding_block.unwrap(); + self.cfg.goto(subcandidate_block, source_info, shared_pre_binding_block); last_otherwise = subcandidate.otherwise_block; } - candidate.pre_binding_block = Some(any_matches); + candidate.pre_binding_block = Some(shared_pre_binding_block); assert!(last_otherwise.is_some()); candidate.otherwise_block = last_otherwise; } From e60c5c1a77a4bc6a2540437969ec0dc6e213d712 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Thu, 18 Jul 2024 22:11:09 +1000 Subject: [PATCH 3/5] Split out `test_remaining_match_pairs_after_or` Only the last candidate can possibly have more match pairs, so this can be separate from the main or-candidate postprocessing loop. --- .../rustc_mir_build/src/build/matches/mod.rs | 101 ++++++++++-------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/compiler/rustc_mir_build/src/build/matches/mod.rs b/compiler/rustc_mir_build/src/build/matches/mod.rs index d18e59759e2..8bba9329085 100644 --- a/compiler/rustc_mir_build/src/build/matches/mod.rs +++ b/compiler/rustc_mir_build/src/build/matches/mod.rs @@ -1598,6 +1598,9 @@ fn expand_and_match_or_candidates<'pat, 'b, 'c>( for subcandidate in candidate.subcandidates.iter_mut() { expanded_candidates.push(subcandidate); } + // Note that the subcandidates have been added to `expanded_candidates`, + // but `candidate` itself has not. If the last candidate has more match pairs, + // they are handled separately by `test_remaining_match_pairs_after_or`. } else { // A candidate that doesn't start with an or-pattern has nothing to // expand, so it is included in the post-expansion list as-is. @@ -1613,12 +1616,20 @@ fn expand_and_match_or_candidates<'pat, 'b, 'c>( expanded_candidates.as_mut_slice(), ); - // Simplify subcandidates and process any leftover match pairs. - for candidate in candidates_to_expand { + // Postprocess subcandidates, and process any leftover match pairs. + // (Only the last candidate can possibly have more match pairs.) + debug_assert!({ + let mut all_except_last = candidates_to_expand.iter().rev().skip(1); + all_except_last.all(|candidate| candidate.match_pairs.is_empty()) + }); + for candidate in candidates_to_expand.iter_mut() { if !candidate.subcandidates.is_empty() { - self.finalize_or_candidate(span, scrutinee_span, candidate); + self.finalize_or_candidate(candidate); } } + if let Some(last_candidate) = candidates_to_expand.last_mut() { + self.test_remaining_match_pairs_after_or(span, scrutinee_span, last_candidate); + } remainder_start.and(remaining_candidates) } @@ -1642,8 +1653,8 @@ fn create_or_subcandidates<'pat>( candidate.subcandidates[0].false_edge_start_block = candidate.false_edge_start_block; } - /// Simplify subcandidates and process any leftover match pairs. The candidate should have been - /// expanded with `create_or_subcandidates`. + /// Simplify subcandidates and remove `is_never` subcandidates. + /// The candidate should have been expanded with `create_or_subcandidates`. /// /// Given a pattern `(P | Q, R | S)` we (in principle) generate a CFG like /// so: @@ -1695,50 +1706,13 @@ fn create_or_subcandidates<'pat>( /// | /// ... /// ``` - fn finalize_or_candidate( - &mut self, - span: Span, - scrutinee_span: Span, - candidate: &mut Candidate<'_, 'tcx>, - ) { + fn finalize_or_candidate(&mut self, candidate: &mut Candidate<'_, 'tcx>) { if candidate.subcandidates.is_empty() { return; } self.merge_trivial_subcandidates(candidate); self.remove_never_subcandidates(candidate); - - if !candidate.match_pairs.is_empty() { - let or_span = candidate.or_span.unwrap_or(candidate.extra_data.span); - let source_info = self.source_info(or_span); - // If more match pairs remain, test them after each subcandidate. - // We could add them to the or-candidates before the call to `test_or_pattern` but this - // would make it impossible to detect simplifiable or-patterns. That would guarantee - // exponentially large CFGs for cases like `(1 | 2, 3 | 4, ...)`. - let mut last_otherwise = None; - candidate.visit_leaves(|leaf_candidate| { - last_otherwise = leaf_candidate.otherwise_block; - }); - let remaining_match_pairs = mem::take(&mut candidate.match_pairs); - candidate.visit_leaves(|leaf_candidate| { - assert!(leaf_candidate.match_pairs.is_empty()); - leaf_candidate.match_pairs.extend(remaining_match_pairs.iter().cloned()); - let or_start = leaf_candidate.pre_binding_block.unwrap(); - let otherwise = - self.match_candidates(span, scrutinee_span, or_start, &mut [leaf_candidate]); - // In a case like `(P | Q, R | S)`, if `P` succeeds and `R | S` fails, we know `(Q, - // R | S)` will fail too. If there is no guard, we skip testing of `Q` by branching - // directly to `last_otherwise`. If there is a guard, - // `leaf_candidate.otherwise_block` can be reached by guard failure as well, so we - // can't skip `Q`. - let or_otherwise = if leaf_candidate.has_guard { - leaf_candidate.otherwise_block.unwrap() - } else { - last_otherwise.unwrap() - }; - self.cfg.goto(otherwise, source_info, or_otherwise); - }); - } } /// Try to merge all of the subcandidates of the given candidate into one. This avoids @@ -1814,6 +1788,47 @@ fn remove_never_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) { } } + /// If more match pairs remain, test them after each subcandidate. + /// We could have added them to the or-candidates during or-pattern expansion, but that + /// would make it impossible to detect simplifiable or-patterns. That would guarantee + /// exponentially large CFGs for cases like `(1 | 2, 3 | 4, ...)`. + fn test_remaining_match_pairs_after_or( + &mut self, + span: Span, + scrutinee_span: Span, + candidate: &mut Candidate<'_, 'tcx>, + ) { + if candidate.match_pairs.is_empty() { + return; + } + + let or_span = candidate.or_span.unwrap_or(candidate.extra_data.span); + let source_info = self.source_info(or_span); + let mut last_otherwise = None; + candidate.visit_leaves(|leaf_candidate| { + last_otherwise = leaf_candidate.otherwise_block; + }); + let remaining_match_pairs = mem::take(&mut candidate.match_pairs); + candidate.visit_leaves(|leaf_candidate| { + assert!(leaf_candidate.match_pairs.is_empty()); + leaf_candidate.match_pairs.extend(remaining_match_pairs.iter().cloned()); + let or_start = leaf_candidate.pre_binding_block.unwrap(); + let otherwise = + self.match_candidates(span, scrutinee_span, or_start, &mut [leaf_candidate]); + // In a case like `(P | Q, R | S)`, if `P` succeeds and `R | S` fails, we know `(Q, + // R | S)` will fail too. If there is no guard, we skip testing of `Q` by branching + // directly to `last_otherwise`. If there is a guard, + // `leaf_candidate.otherwise_block` can be reached by guard failure as well, so we + // can't skip `Q`. + let or_otherwise = if leaf_candidate.has_guard { + leaf_candidate.otherwise_block.unwrap() + } else { + last_otherwise.unwrap() + }; + self.cfg.goto(otherwise, source_info, or_otherwise); + }); + } + /// Pick a test to run. Which test doesn't matter as long as it is guaranteed to fully match at /// least one match pair. We currently simply pick the test corresponding to the first match /// pair of the first candidate in the list. From 886668cc2ea22a3d52f775961bac06d9499d9e26 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Thu, 18 Jul 2024 22:34:49 +1000 Subject: [PATCH 4/5] Improve `test_remaining_match_pairs_after_or` --- compiler/rustc_mir_build/src/build/matches/mod.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compiler/rustc_mir_build/src/build/matches/mod.rs b/compiler/rustc_mir_build/src/build/matches/mod.rs index 8bba9329085..d8db08bfb8a 100644 --- a/compiler/rustc_mir_build/src/build/matches/mod.rs +++ b/compiler/rustc_mir_build/src/build/matches/mod.rs @@ -1808,10 +1808,23 @@ fn test_remaining_match_pairs_after_or( candidate.visit_leaves(|leaf_candidate| { last_otherwise = leaf_candidate.otherwise_block; }); + let remaining_match_pairs = mem::take(&mut candidate.match_pairs); + // We're testing match pairs that remained after an `Or`, so the remaining + // pairs should all be `Or` too, due to the sorting invariant. + debug_assert!( + remaining_match_pairs + .iter() + .all(|match_pair| matches!(match_pair.test_case, TestCase::Or { .. })) + ); + candidate.visit_leaves(|leaf_candidate| { + // At this point the leaf's own match pairs have all been lowered + // and removed, so `extend` and assignment are equivalent, + // but extending can also recycle any existing vector capacity. assert!(leaf_candidate.match_pairs.is_empty()); leaf_candidate.match_pairs.extend(remaining_match_pairs.iter().cloned()); + let or_start = leaf_candidate.pre_binding_block.unwrap(); let otherwise = self.match_candidates(span, scrutinee_span, or_start, &mut [leaf_candidate]); From 239037ecde3d884ad09bfb95c950021f3bc78da1 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Thu, 18 Jul 2024 22:51:17 +1000 Subject: [PATCH 5/5] Inline `finalize_or_candidate` --- .../rustc_mir_build/src/build/matches/mod.rs | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_mir_build/src/build/matches/mod.rs b/compiler/rustc_mir_build/src/build/matches/mod.rs index d8db08bfb8a..db7aed4306b 100644 --- a/compiler/rustc_mir_build/src/build/matches/mod.rs +++ b/compiler/rustc_mir_build/src/build/matches/mod.rs @@ -1624,7 +1624,8 @@ fn expand_and_match_or_candidates<'pat, 'b, 'c>( }); for candidate in candidates_to_expand.iter_mut() { if !candidate.subcandidates.is_empty() { - self.finalize_or_candidate(candidate); + self.merge_trivial_subcandidates(candidate); + self.remove_never_subcandidates(candidate); } } if let Some(last_candidate) = candidates_to_expand.last_mut() { @@ -1635,8 +1636,8 @@ fn expand_and_match_or_candidates<'pat, 'b, 'c>( } /// Given a match-pair that corresponds to an or-pattern, expand each subpattern into a new - /// subcandidate. Any candidate that has been expanded that way should be passed to - /// `finalize_or_candidate` after its subcandidates have been processed. + /// subcandidate. Any candidate that has been expanded this way should also be postprocessed + /// at the end of [`Self::expand_and_match_or_candidates`]. fn create_or_subcandidates<'pat>( &mut self, candidate: &mut Candidate<'pat, 'tcx>, @@ -1653,8 +1654,9 @@ fn create_or_subcandidates<'pat>( candidate.subcandidates[0].false_edge_start_block = candidate.false_edge_start_block; } - /// Simplify subcandidates and remove `is_never` subcandidates. - /// The candidate should have been expanded with `create_or_subcandidates`. + /// Try to merge all of the subcandidates of the given candidate into one. This avoids + /// exponentially large CFGs in cases like `(1 | 2, 3 | 4, ...)`. The candidate should have been + /// expanded with `create_or_subcandidates`. /// /// Given a pattern `(P | Q, R | S)` we (in principle) generate a CFG like /// so: @@ -1706,18 +1708,6 @@ fn create_or_subcandidates<'pat>( /// | /// ... /// ``` - fn finalize_or_candidate(&mut self, candidate: &mut Candidate<'_, 'tcx>) { - if candidate.subcandidates.is_empty() { - return; - } - - self.merge_trivial_subcandidates(candidate); - self.remove_never_subcandidates(candidate); - } - - /// Try to merge all of the subcandidates of the given candidate into one. This avoids - /// exponentially large CFGs in cases like `(1 | 2, 3 | 4, ...)`. The candidate should have been - /// expanded with `create_or_subcandidates`. /// /// Note that this takes place _after_ the subcandidates have participated /// in match tree lowering.