Rewrite binary search implementation

This restores the original binary search implementation from #45333
which has the nice property of having a loop count that only depends on
the size of the slice. This, along with explicit conditional moves
from #128250, means that the entire binary search loop can be perfectly
predicted by the branch predictor.

Additionally, LLVM is able to unroll the loop when the slice length is
known at compile-time. This results in a very compact code sequence of
3-4 instructions per binary search step and zero branches.

Fixes #53823
This commit is contained in:
Amanieu d'Antras 2024-07-26 23:35:48 +01:00
parent 595316b400
commit bb58488207
2 changed files with 48 additions and 35 deletions

View File

@ -7,7 +7,7 @@
#![stable(feature = "rust1", since = "1.0.0")] #![stable(feature = "rust1", since = "1.0.0")]
use crate::cmp::Ordering::{self, Equal, Greater, Less}; use crate::cmp::Ordering::{self, Equal, Greater, Less};
use crate::intrinsics::{exact_div, unchecked_sub}; use crate::intrinsics::{exact_div, select_unpredictable, unchecked_sub};
use crate::mem::{self, SizedTypeProperties}; use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZero; use crate::num::NonZero;
use crate::ops::{Bound, OneSidedRange, Range, RangeBounds}; use crate::ops::{Bound, OneSidedRange, Range, RangeBounds};
@ -2770,41 +2770,54 @@ pub fn binary_search_by<'a, F>(&'a self, mut f: F) -> Result<usize, usize>
where where
F: FnMut(&'a T) -> Ordering, F: FnMut(&'a T) -> Ordering,
{ {
// INVARIANTS:
// - 0 <= left <= left + size = right <= self.len()
// - f returns Less for everything in self[..left]
// - f returns Greater for everything in self[right..]
let mut size = self.len(); let mut size = self.len();
let mut left = 0; if size == 0 {
let mut right = size; return Err(0);
while left < right { }
let mid = left + size / 2; let mut base = 0usize;
// SAFETY: the while condition means `size` is strictly positive, so // This loop intentionally doesn't have an early exit if the comparison
// `size/2 < size`. Thus `left + size/2 < left + size`, which // returns Equal. We want the number of loop iterations to depend *only*
// coupled with the `left + size <= self.len()` invariant means // on the size of the input slice so that the CPU can reliably predict
// we have `left + size/2 < self.len()`, and this is in-bounds. // the loop count.
while size > 1 {
let half = size / 2;
let mid = base + half;
// SAFETY: the call is made safe by the following inconstants:
// - `mid >= 0`: by definition
// - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...`
let cmp = f(unsafe { self.get_unchecked(mid) }); let cmp = f(unsafe { self.get_unchecked(mid) });
// This control flow produces conditional moves, which results in // Binary search interacts poorly with branch prediction, so force
// fewer branches and instructions than if/else or matching on // the compiler to use conditional moves if supported by the target
// cmp::Ordering. // architecture.
// This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx. base = select_unpredictable(cmp == Greater, base, mid);
left = if cmp == Less { mid + 1 } else { left };
right = if cmp == Greater { mid } else { right }; // This is imprecise in the case where `size` is odd and the
// comparison returns Greater: the mid element still gets included
// by `size` even though it's known to be larger than the element
// being searched for.
//
// This is fine though: we gain more performance by keeping the
// loop iteration count invariant (and thus predictable) than we
// lose from considering one additional element.
size -= half;
}
// SAFETY: base is always in [0, size) because base <= mid.
let cmp = f(unsafe { self.get_unchecked(base) });
if cmp == Equal { if cmp == Equal {
// SAFETY: same as the `get_unchecked` above // SAFETY: same as the `get_unchecked` above.
unsafe { hint::assert_unchecked(mid < self.len()) }; unsafe { hint::assert_unchecked(base < self.len()) };
return Ok(mid); Ok(base)
} } else {
let result = base + (cmp == Less) as usize;
size = right - left; // SAFETY: same as the `get_unchecked` above.
}
// SAFETY: directly true from the overall invariant.
// Note that this is `<=`, unlike the assume in the `Ok` path. // Note that this is `<=`, unlike the assume in the `Ok` path.
unsafe { hint::assert_unchecked(left <= self.len()) }; unsafe { hint::assert_unchecked(result <= self.len()) };
Err(left) Err(result)
}
} }
/// Binary searches this slice with a key extraction function. /// Binary searches this slice with a key extraction function.

View File

@ -69,13 +69,13 @@ fn test_binary_search() {
assert_eq!(b.binary_search(&8), Err(5)); assert_eq!(b.binary_search(&8), Err(5));
let b = [(); usize::MAX]; let b = [(); usize::MAX];
assert_eq!(b.binary_search(&()), Ok(usize::MAX / 2)); assert_eq!(b.binary_search(&()), Ok(usize::MAX - 1));
} }
#[test] #[test]
fn test_binary_search_by_overflow() { fn test_binary_search_by_overflow() {
let b = [(); usize::MAX]; let b = [(); usize::MAX];
assert_eq!(b.binary_search_by(|_| Ordering::Equal), Ok(usize::MAX / 2)); assert_eq!(b.binary_search_by(|_| Ordering::Equal), Ok(usize::MAX - 1));
assert_eq!(b.binary_search_by(|_| Ordering::Greater), Err(0)); assert_eq!(b.binary_search_by(|_| Ordering::Greater), Err(0));
assert_eq!(b.binary_search_by(|_| Ordering::Less), Err(usize::MAX)); assert_eq!(b.binary_search_by(|_| Ordering::Less), Err(usize::MAX));
} }
@ -87,13 +87,13 @@ fn test_binary_search_implementation_details() {
let b = [1, 1, 2, 2, 3, 3, 3]; let b = [1, 1, 2, 2, 3, 3, 3];
assert_eq!(b.binary_search(&1), Ok(1)); assert_eq!(b.binary_search(&1), Ok(1));
assert_eq!(b.binary_search(&2), Ok(3)); assert_eq!(b.binary_search(&2), Ok(3));
assert_eq!(b.binary_search(&3), Ok(5)); assert_eq!(b.binary_search(&3), Ok(6));
let b = [1, 1, 1, 1, 1, 3, 3, 3, 3]; let b = [1, 1, 1, 1, 1, 3, 3, 3, 3];
assert_eq!(b.binary_search(&1), Ok(4)); assert_eq!(b.binary_search(&1), Ok(4));
assert_eq!(b.binary_search(&3), Ok(7)); assert_eq!(b.binary_search(&3), Ok(8));
let b = [1, 1, 1, 1, 3, 3, 3, 3, 3]; let b = [1, 1, 1, 1, 3, 3, 3, 3, 3];
assert_eq!(b.binary_search(&1), Ok(2)); assert_eq!(b.binary_search(&1), Ok(3));
assert_eq!(b.binary_search(&3), Ok(4)); assert_eq!(b.binary_search(&3), Ok(8));
} }
#[test] #[test]