Auto merge of #117722 - okaneco:binarysearch, r=thomcc
Refactor `binary_search_by` to use conditional moves Refactor the if/else checking on `cmp::Ordering` variants to a "branchless" reassignment of left and right. This change results in fewer branches and instructions. https://rust.godbolt.org/z/698eYffTx --- I saw consistent benchmark improvements locally. Performance of worst case seems about the same, maybe slightly faster for the L3 test. Current ``` slice::binary_search_l1 43.00ns/iter +/- 3.00ns slice::binary_search_l1_with_dups 25.00ns/iter +/- 0.00ns slice::binary_search_l1_worst_case 10.00ns/iter +/- 0.00ns slice::binary_search_l2 64.00ns/iter +/- 1.00ns slice::binary_search_l2_with_dups 42.00ns/iter +/- 0.00ns slice::binary_search_l2_worst_case 16.00ns/iter +/- 0.00ns slice::binary_search_l3 132.00ns/iter +/- 2.00ns slice::binary_search_l3_with_dups 108.00ns/iter +/- 2.00ns slice::binary_search_l3_worst_case 33.00ns/iter +/- 3.00ns ``` This PR ``` slice::binary_search_l1 21.00ns/iter +/- 0.00ns slice::binary_search_l1_with_dups 14.00ns/iter +/- 0.00ns slice::binary_search_l1_worst_case 9.00ns/iter +/- 0.00ns slice::binary_search_l2 34.00ns/iter +/- 0.00ns slice::binary_search_l2_with_dups 23.00ns/iter +/- 0.00ns slice::binary_search_l2_worst_case 16.00ns/iter +/- 0.00ns slice::binary_search_l3 92.00ns/iter +/- 3.00ns slice::binary_search_l3_with_dups 63.00ns/iter +/- 1.00ns slice::binary_search_l3_worst_case 29.00ns/iter +/- 0.00ns ```
This commit is contained in:
commit
8abf920985
@ -6,7 +6,7 @@
|
||||
|
||||
#![stable(feature = "rust1", since = "1.0.0")]
|
||||
|
||||
use crate::cmp::Ordering::{self, Greater, Less};
|
||||
use crate::cmp::Ordering::{self, Equal, Greater, Less};
|
||||
use crate::fmt;
|
||||
use crate::intrinsics::{assert_unsafe_precondition, exact_div};
|
||||
use crate::marker::Copy;
|
||||
@ -2854,14 +2854,13 @@ pub fn binary_search_by<'a, F>(&'a self, mut f: F) -> Result<usize, usize>
|
||||
// we have `left + size/2 < self.len()`, and this is in-bounds.
|
||||
let cmp = f(unsafe { self.get_unchecked(mid) });
|
||||
|
||||
// The reason why we use if/else control flow rather than match
|
||||
// is because match reorders comparison operations, which is perf sensitive.
|
||||
// This is x86 asm for u8: https://rust.godbolt.org/z/8Y8Pra.
|
||||
if cmp == Less {
|
||||
left = mid + 1;
|
||||
} else if cmp == Greater {
|
||||
right = mid;
|
||||
} else {
|
||||
// This control flow produces conditional moves, which results in
|
||||
// fewer branches and instructions than if/else or matching on
|
||||
// cmp::Ordering.
|
||||
// This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx.
|
||||
left = if cmp == Less { mid + 1 } else { left };
|
||||
right = if cmp == Greater { mid } else { right };
|
||||
if cmp == Equal {
|
||||
// SAFETY: same as the `get_unchecked` above
|
||||
unsafe { crate::intrinsics::assume(mid < self.len()) };
|
||||
return Ok(mid);
|
||||
|
Loading…
Reference in New Issue
Block a user