Auto merge of #90596 - the8472:path-hash-opt, r=Mark-Simulacrum
Optimize Eq and Hash for Path/PathBuf ``` # new test path::tests::bench_hash_path_long ... bench: 86 ns/iter (+/- 1) test path::tests::bench_hash_path_short ... bench: 13 ns/iter (+/- 1) test path::tests::bench_path_hashset ... bench: 197 ns/iter (+/- 6) test path::tests::bench_path_hashset_miss ... bench: 94 ns/iter (+/- 4) # old test path::tests::bench_hash_path_long ... bench: 192 ns/iter (+/- 2) test path::tests::bench_hash_path_short ... bench: 33 ns/iter (+/- 1) test path::tests::bench_path_hashset ... bench: 1,121 ns/iter (+/- 24) test path::tests::bench_path_hashset_miss ... bench: 273 ns/iter (+/- 6) ```
This commit is contained in:
commit
c8e94975a6
@ -979,6 +979,25 @@ impl FusedIterator for Components<'_> {}
|
||||
impl<'a> cmp::PartialEq for Components<'a> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Components<'a>) -> bool {
|
||||
let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self;
|
||||
|
||||
// Fast path for exact matches, e.g. for hashmap lookups.
|
||||
// Don't explicitly compare the prefix or has_physical_root fields since they'll
|
||||
// either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`.
|
||||
if self.path.len() == other.path.len()
|
||||
&& self.front == other.front
|
||||
&& self.back == State::Body
|
||||
&& other.back == State::Body
|
||||
&& self.prefix_verbatim() == other.prefix_verbatim()
|
||||
{
|
||||
// possible future improvement: this could bail out earlier if there were a
|
||||
// reverse memcmp/bcmp comparing back to front
|
||||
if self.path == other.path {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// compare back to front since absolute paths often share long prefixes
|
||||
Iterator::eq(self.clone().rev(), other.clone().rev())
|
||||
}
|
||||
}
|
||||
@ -1013,13 +1032,12 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm
|
||||
// The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
|
||||
// the middle of one
|
||||
if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
|
||||
// this might benefit from a [u8]::first_mismatch simd implementation, if it existed
|
||||
let first_difference =
|
||||
match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
|
||||
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
|
||||
None => left.path.len().min(right.path.len()),
|
||||
Some(diff) => diff,
|
||||
};
|
||||
// possible future improvement: a [u8]::first_mismatch simd implementation
|
||||
let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) {
|
||||
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
|
||||
None => left.path.len().min(right.path.len()),
|
||||
Some(diff) => diff,
|
||||
};
|
||||
|
||||
if let Some(previous_sep) =
|
||||
left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
|
||||
@ -2873,9 +2891,43 @@ impl cmp::PartialEq for Path {
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl Hash for Path {
|
||||
fn hash<H: Hasher>(&self, h: &mut H) {
|
||||
for component in self.components() {
|
||||
component.hash(h);
|
||||
let bytes = self.as_u8_slice();
|
||||
let prefix_len = match parse_prefix(&self.inner) {
|
||||
Some(prefix) => {
|
||||
prefix.hash(h);
|
||||
prefix.len()
|
||||
}
|
||||
None => 0,
|
||||
};
|
||||
let bytes = &bytes[prefix_len..];
|
||||
|
||||
let mut component_start = 0;
|
||||
let mut bytes_hashed = 0;
|
||||
|
||||
for i in 0..bytes.len() {
|
||||
if is_sep_byte(bytes[i]) {
|
||||
if i > component_start {
|
||||
let to_hash = &bytes[component_start..i];
|
||||
h.write(to_hash);
|
||||
bytes_hashed += to_hash.len();
|
||||
}
|
||||
|
||||
// skip over separator and optionally a following CurDir item
|
||||
// since components() would normalize these away
|
||||
component_start = i + match bytes[i..] {
|
||||
[_, b'.', b'/', ..] | [_, b'.'] => 2,
|
||||
_ => 1,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if component_start < bytes.len() {
|
||||
let to_hash = &bytes[component_start..];
|
||||
h.write(to_hash);
|
||||
bytes_hashed += to_hash.len();
|
||||
}
|
||||
|
||||
h.write_usize(bytes_hashed);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
use super::*;
|
||||
|
||||
use crate::collections::BTreeSet;
|
||||
use crate::collections::hash_map::DefaultHasher;
|
||||
use crate::collections::{BTreeSet, HashSet};
|
||||
use crate::hash::Hasher;
|
||||
use crate::rc::Rc;
|
||||
use crate::sync::Arc;
|
||||
use core::hint::black_box;
|
||||
@ -1632,7 +1634,25 @@ fn into_rc() {
|
||||
fn test_ord() {
|
||||
macro_rules! ord(
|
||||
($ord:ident, $left:expr, $right:expr) => ( {
|
||||
assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
|
||||
use core::cmp::Ordering;
|
||||
|
||||
let left = Path::new($left);
|
||||
let right = Path::new($right);
|
||||
assert_eq!(left.cmp(&right), Ordering::$ord);
|
||||
if (core::cmp::Ordering::$ord == Ordering::Equal) {
|
||||
assert_eq!(left, right);
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
left.hash(&mut hasher);
|
||||
let left_hash = hasher.finish();
|
||||
hasher = DefaultHasher::new();
|
||||
right.hash(&mut hasher);
|
||||
let right_hash = hasher.finish();
|
||||
|
||||
assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right);
|
||||
} else {
|
||||
assert_ne!(left, right);
|
||||
}
|
||||
});
|
||||
);
|
||||
|
||||
@ -1693,3 +1713,59 @@ fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
|
||||
set.insert(paths[500].as_path());
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_path_hashset(b: &mut test::Bencher) {
|
||||
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
|
||||
let paths: Vec<_> =
|
||||
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
|
||||
|
||||
let mut set = HashSet::new();
|
||||
|
||||
paths.iter().for_each(|p| {
|
||||
set.insert(p.as_path());
|
||||
});
|
||||
|
||||
b.iter(|| {
|
||||
set.remove(paths[500].as_path());
|
||||
set.insert(black_box(paths[500].as_path()))
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_path_hashset_miss(b: &mut test::Bencher) {
|
||||
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
|
||||
let paths: Vec<_> =
|
||||
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
|
||||
|
||||
let mut set = HashSet::new();
|
||||
|
||||
paths.iter().for_each(|p| {
|
||||
set.insert(p.as_path());
|
||||
});
|
||||
|
||||
let probe = PathBuf::from(prefix).join("other");
|
||||
|
||||
b.iter(|| set.remove(black_box(probe.as_path())));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_hash_path_short(b: &mut test::Bencher) {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
let path = Path::new("explorer.exe");
|
||||
|
||||
b.iter(|| black_box(path).hash(&mut hasher));
|
||||
|
||||
black_box(hasher.finish());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_hash_path_long(b: &mut test::Bencher) {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
let path =
|
||||
Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff");
|
||||
|
||||
b.iter(|| black_box(path).hash(&mut hasher));
|
||||
|
||||
black_box(hasher.finish());
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ pub fn is_verbatim_sep(b: u8) -> bool {
|
||||
b == b'/'
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn parse_prefix(_: &OsStr) -> Option<Prefix<'_>> {
|
||||
None
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user