From 5dcfec332ce9e003f100b4cf9dec895e5634edc3 Mon Sep 17 00:00:00 2001 From: The8472 Date: Tue, 6 Jul 2021 00:46:40 +0200 Subject: [PATCH 1/3] use Eq::eq instead of Iterator::eq implementation --- library/std/src/path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index c71751efb9f..e75d1d38f8f 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -2686,7 +2686,7 @@ impl fmt::Display for Display<'_> { impl cmp::PartialEq for Path { #[inline] fn eq(&self, other: &Path) -> bool { - self.components().eq(other.components()) + self.components() == other.components() } } From 5e877109b400a6f251dbefbbec94f2464c6f1b55 Mon Sep 17 00:00:00 2001 From: The8472 Date: Tue, 6 Jul 2021 01:17:20 +0200 Subject: [PATCH 2/3] optimize {Path,PathBuf,Components}::{cmp,partial_cmp} for shared prefixes --- library/std/src/path.rs | 45 ++++++++++++++++++++++++++----- library/std/src/path/tests.rs | 51 +++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 6 deletions(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index e75d1d38f8f..7875c9c9967 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -962,7 +962,7 @@ impl cmp::Eq for Components<'_> {} impl<'a> cmp::PartialOrd for Components<'a> { #[inline] fn partial_cmp(&self, other: &Components<'a>) -> Option { - Iterator::partial_cmp(self.clone(), other.clone()) + Some(compare_components(self.clone(), other.clone())) } } @@ -970,10 +970,43 @@ impl<'a> cmp::PartialOrd for Components<'a> { impl cmp::Ord for Components<'_> { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { - Iterator::cmp(self.clone(), other.clone()) + compare_components(self.clone(), other.clone()) } } +fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cmp::Ordering { + // Fast path for long shared prefixes + // + // - compare raw bytes to find first mismatch + // - backtrack to find separator before mismatch to avoid ambiguous parsings of '.' or '..' characters + // - if found update state to only do a component-wise comparison on the remainder, + // otherwise do it on the full path + // + // The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into + // the middle of one + if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front { + // this might benefit from a [u8]::first_mismatch simd implementation, if it existed + let first_difference = + match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) { + None if left.path.len() == right.path.len() => return cmp::Ordering::Equal, + None => left.path.len().min(right.path.len()), + Some(diff) => diff, + }; + + if let Some(previous_sep) = + left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b)) + { + let mismatched_component_start = previous_sep + 1; + left.path = &left.path[mismatched_component_start..]; + left.front = State::Body; + right.path = &right.path[mismatched_component_start..]; + right.front = State::Body; + } + } + + Iterator::cmp(left, right) +} + /// An iterator over [`Path`] and its ancestors. /// /// This `struct` is created by the [`ancestors`] method on [`Path`]. @@ -1704,7 +1737,7 @@ impl cmp::Eq for PathBuf {} impl cmp::PartialOrd for PathBuf { #[inline] fn partial_cmp(&self, other: &PathBuf) -> Option { - self.components().partial_cmp(other.components()) + Some(compare_components(self.components(), other.components())) } } @@ -1712,7 +1745,7 @@ impl cmp::PartialOrd for PathBuf { impl cmp::Ord for PathBuf { #[inline] fn cmp(&self, other: &PathBuf) -> cmp::Ordering { - self.components().cmp(other.components()) + compare_components(self.components(), other.components()) } } @@ -2706,7 +2739,7 @@ impl cmp::Eq for Path {} impl cmp::PartialOrd for Path { #[inline] fn partial_cmp(&self, other: &Path) -> Option { - self.components().partial_cmp(other.components()) + Some(compare_components(self.components(), other.components())) } } @@ -2714,7 +2747,7 @@ impl cmp::PartialOrd for Path { impl cmp::Ord for Path { #[inline] fn cmp(&self, other: &Path) -> cmp::Ordering { - self.components().cmp(other.components()) + compare_components(self.components(), other.components()) } } diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index 896d6c2a64c..6b7df78d3d7 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -1,7 +1,9 @@ use super::*; +use crate::collections::BTreeSet; use crate::rc::Rc; use crate::sync::Arc; +use core::hint::black_box; macro_rules! t( ($path:expr, iter: $iter:expr) => ( @@ -1392,3 +1394,52 @@ fn into_rc() { assert_eq!(&*rc2, path); assert_eq!(&*arc2, path); } + +#[bench] +fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) { + let prefix = "my/home"; + let mut paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect(); + + paths.sort(); + + b.iter(|| { + black_box(paths.as_mut_slice()).sort_unstable(); + }); +} + +#[bench] +fn bench_path_cmp_fast_path_long(b: &mut test::Bencher) { + let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect(); + + let mut set = BTreeSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + b.iter(|| { + set.remove(paths[500].as_path()); + set.insert(paths[500].as_path()); + }); +} + +#[bench] +fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) { + let prefix = "my/home"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect(); + + let mut set = BTreeSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + b.iter(|| { + set.remove(paths[500].as_path()); + set.insert(paths[500].as_path()); + }); +} From dfdf3610184b949891b6f8c1b597d07c77293325 Mon Sep 17 00:00:00 2001 From: The8472 Date: Tue, 6 Jul 2021 20:18:52 +0200 Subject: [PATCH 3/3] add Ord tests for Path comparisons --- library/std/src/path/tests.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index 6b7df78d3d7..c10ded08f2c 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -1395,6 +1395,23 @@ fn into_rc() { assert_eq!(&*arc2, path); } +#[test] +fn test_ord() { + macro_rules! ord( + ($ord:ident, $left:expr, $right:expr) => ( { + assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord); + }); + ); + + ord!(Less, "1", "2"); + ord!(Less, "/foo/bar", "/foo./bar"); + ord!(Less, "foo/bar", "foo/bar."); + ord!(Equal, "foo/./bar", "foo/bar/"); + ord!(Equal, "foo/bar", "foo/bar/"); + ord!(Equal, "foo/bar", "foo/bar/."); + ord!(Equal, "foo/bar", "foo/bar//"); +} + #[bench] fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) { let prefix = "my/home";