Generate MIR thats easier for llvm for str matches

LLVM appears to prefer (spend less time optimizing) long if chains if
it receives them in approzimately source order.
This fixes a ~10% regression for optimized builds of the encoding benchmark
on perf.rlo due to the changes to decision tree construction.
This commit is contained in:
Matthew Jasper 2019-05-25 17:11:27 +01:00
parent ef1fc86b52
commit a1d0266878
2 changed files with 82 additions and 68 deletions

View File

@ -1198,13 +1198,18 @@ fn test_candidates<'pat, 'b, 'c>(
debug!("tested_candidates: {}", total_candidate_count - candidates.len());
debug!("untested_candidates: {}", candidates.len());
// HACK(matthewjasper) This is a closure so that we can let the test
// create its blocks before the rest of the match. This currently
// improves the speed of llvm when optimizing long string literal
// matches
let make_target_blocks = move |this: &mut Self| -> Vec<BasicBlock> {
// For each outcome of test, process the candidates that still
// apply. Collect a list of blocks where control flow will
// branch if one of the `target_candidate` sets is not
// exhaustive.
if !candidates.is_empty() {
let remainder_start = &mut None;
self.match_candidates(
this.match_candidates(
span,
remainder_start,
otherwise_block,
@ -1213,10 +1218,11 @@ fn test_candidates<'pat, 'b, 'c>(
);
otherwise_block = Some(remainder_start.unwrap());
};
let target_blocks: Vec<_> = target_candidates.into_iter().map(|mut candidates| {
target_candidates.into_iter().map(|mut candidates| {
if candidates.len() != 0 {
let candidate_start = &mut None;
self.match_candidates(
this.match_candidates(
span,
candidate_start,
otherwise_block,
@ -1226,9 +1232,9 @@ fn test_candidates<'pat, 'b, 'c>(
candidate_start.unwrap()
} else {
*otherwise_block.get_or_insert_with(|| {
let unreachable = self.cfg.start_new_block();
let source_info = self.source_info(span);
self.cfg.terminate(
let unreachable = this.cfg.start_new_block();
let source_info = this.source_info(span);
this.cfg.terminate(
unreachable,
source_info,
TerminatorKind::Unreachable,
@ -1236,13 +1242,14 @@ fn test_candidates<'pat, 'b, 'c>(
unreachable
})
}
}).collect();
}).collect()
};
self.perform_test(
block,
&match_place,
&test,
target_blocks,
make_target_blocks,
);
}

View File

@ -168,7 +168,7 @@ pub fn perform_test(
block: BasicBlock,
place: &Place<'tcx>,
test: &Test<'tcx>,
target_blocks: Vec<BasicBlock>,
make_target_blocks: impl FnOnce(&mut Self) -> Vec<BasicBlock>,
) {
debug!("perform_test({:?}, {:?}: {:?}, {:?})",
block,
@ -179,6 +179,7 @@ pub fn perform_test(
let source_info = self.source_info(test.span);
match test.kind {
TestKind::Switch { adt_def, ref variants } => {
let target_blocks = make_target_blocks(self);
// Variants is a BitVec of indexes into adt_def.variants.
let num_enum_variants = adt_def.variants.len();
let used_variants = variants.count();
@ -223,6 +224,7 @@ pub fn perform_test(
}
TestKind::SwitchInt { switch_ty, ref options, indices: _ } => {
let target_blocks = make_target_blocks(self);
let terminator = if switch_ty.sty == ty::Bool {
assert!(options.len() > 0 && options.len() <= 2);
if let [first_bb, second_bb] = *target_blocks {
@ -254,38 +256,38 @@ pub fn perform_test(
}
TestKind::Eq { value, ty } => {
if let [success, fail] = *target_blocks {
if !ty.is_scalar() {
// Use `PartialEq::eq` instead of `BinOp::Eq`
// (the binop can only handle primitives)
self.non_scalar_compare(
block,
success,
fail,
make_target_blocks,
source_info,
value,
place,
ty,
);
} else {
if let [success, fail] = *make_target_blocks(self) {
let val = Operand::Copy(place.clone());
let expect = self.literal_operand(test.span, ty, value);
self.compare(block, success, fail, source_info, BinOp::Eq, expect, val);
}
} else {
bug!("`TestKind::Eq` should have two target blocks")
};
bug!("`TestKind::Eq` should have two target blocks");
}
}
}
TestKind::Range(PatternRange { ref lo, ref hi, ty, ref end }) => {
let lower_bound_success = self.cfg.start_new_block();
let target_blocks = make_target_blocks(self);
// Test `val` by computing `lo <= val && val <= hi`, using primitive comparisons.
let lo = self.literal_operand(test.span, ty, lo);
let hi = self.literal_operand(test.span, ty, hi);
let val = Operand::Copy(place.clone());
if let [success, fail] = *target_blocks {
let lower_bound_success = self.cfg.start_new_block();
self.compare(
block,
lower_bound_success,
@ -306,6 +308,8 @@ pub fn perform_test(
}
TestKind::Len { len, op } => {
let target_blocks = make_target_blocks(self);
let usize_ty = self.hir.usize_ty();
let actual = self.temp(usize_ty, test.span);
@ -374,8 +378,7 @@ fn compare(
fn non_scalar_compare(
&mut self,
block: BasicBlock,
success_block: BasicBlock,
fail_block: BasicBlock,
make_target_blocks: impl FnOnce(&mut Self) -> Vec<BasicBlock>,
source_info: SourceInfo,
value: &'tcx ty::Const<'tcx>,
place: &Place<'tcx>,
@ -461,6 +464,7 @@ fn non_scalar_compare(
from_hir_call: false,
});
if let [success_block, fail_block] = *make_target_blocks(self) {
// check the result
self.cfg.terminate(
eq_block,
@ -472,6 +476,9 @@ fn non_scalar_compare(
fail_block,
),
);
} else {
bug!("`TestKind::Eq` should have two target blocks")
}
}
/// Given that we are performing `test` against `test_place`, this job