implement version-sorting algorithm for rust identifiers
The algorithm is described in the [style guide] and was introduced in `r-l/rust 115046`. [style guide]: https://doc.rust-lang.org/nightly/style-guide/#sorting
This commit is contained in:
parent
5d30ce6844
commit
1681c91a35
@ -93,6 +93,7 @@ macro_rules! static_regex {
|
||||
pub(crate) mod rustfmt_diff;
|
||||
mod shape;
|
||||
mod skip;
|
||||
mod sort;
|
||||
pub(crate) mod source_file;
|
||||
pub(crate) mod source_map;
|
||||
mod spanned;
|
||||
|
368
src/sort.rs
Normal file
368
src/sort.rs
Normal file
@ -0,0 +1,368 @@
|
||||
use itertools::EitherOrBoth;
|
||||
use itertools::Itertools;
|
||||
|
||||
/// Iterator which breaks an identifier into various [VersionChunk]s.
|
||||
struct VersionChunkIter<'a> {
|
||||
ident: &'a str,
|
||||
start: usize,
|
||||
}
|
||||
|
||||
impl<'a> VersionChunkIter<'a> {
|
||||
pub(crate) fn new(ident: &'a str) -> Self {
|
||||
Self { ident, start: 0 }
|
||||
}
|
||||
|
||||
fn parse_numeric_chunk(
|
||||
&mut self,
|
||||
mut chars: std::str::CharIndices<'a>,
|
||||
) -> Option<VersionChunk<'a>> {
|
||||
let mut end = self.start;
|
||||
let mut is_end_of_chunk = false;
|
||||
|
||||
while let Some((idx, c)) = chars.next() {
|
||||
end = self.start + idx;
|
||||
|
||||
if c.is_ascii_digit() {
|
||||
continue;
|
||||
}
|
||||
|
||||
is_end_of_chunk = true;
|
||||
break;
|
||||
}
|
||||
|
||||
let source = if is_end_of_chunk {
|
||||
let value = &self.ident[self.start..end];
|
||||
self.start = end;
|
||||
value
|
||||
} else {
|
||||
let value = &self.ident[self.start..];
|
||||
self.start = self.ident.len();
|
||||
value
|
||||
};
|
||||
|
||||
let zeros = source.chars().take_while(|c| *c == '0').count();
|
||||
let value = source.parse::<usize>().ok()?;
|
||||
|
||||
Some(VersionChunk::Number {
|
||||
value,
|
||||
zeros,
|
||||
source,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_str_chunk(
|
||||
&mut self,
|
||||
mut chars: std::str::CharIndices<'a>,
|
||||
) -> Option<VersionChunk<'a>> {
|
||||
let mut end = self.start;
|
||||
let mut is_end_of_chunk = false;
|
||||
|
||||
while let Some((idx, c)) = chars.next() {
|
||||
end = self.start + idx;
|
||||
|
||||
if c == '_' {
|
||||
is_end_of_chunk = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if !c.is_numeric() {
|
||||
continue;
|
||||
}
|
||||
|
||||
is_end_of_chunk = true;
|
||||
break;
|
||||
}
|
||||
|
||||
let source = if is_end_of_chunk {
|
||||
let value = &self.ident[self.start..end];
|
||||
self.start = end;
|
||||
value
|
||||
} else {
|
||||
let value = &self.ident[self.start..];
|
||||
self.start = self.ident.len();
|
||||
value
|
||||
};
|
||||
|
||||
Some(VersionChunk::Str(source))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for VersionChunkIter<'a> {
|
||||
type Item = VersionChunk<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut chars = self.ident[self.start..].char_indices();
|
||||
let (_, next) = chars.next()?;
|
||||
|
||||
if next == '_' {
|
||||
self.start = self.start + next.len_utf8();
|
||||
return Some(VersionChunk::Underscore);
|
||||
}
|
||||
|
||||
if next.is_ascii_digit() {
|
||||
return self.parse_numeric_chunk(chars);
|
||||
}
|
||||
|
||||
self.parse_str_chunk(chars)
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a chunk in the version-sort algorithm
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum VersionChunk<'a> {
|
||||
/// A single `_` in an identifier. Underscores are sorted before all other characters.
|
||||
Underscore,
|
||||
/// A &str chunk in the version sort.
|
||||
Str(&'a str),
|
||||
/// A numeric chunk in the version sort. Keeps track of the numeric value and leading zeros.
|
||||
Number {
|
||||
value: usize,
|
||||
zeros: usize,
|
||||
source: &'a str,
|
||||
},
|
||||
}
|
||||
|
||||
/// Determine which side of the version-sort comparison had more leading zeros.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum MoreLeadingZeros {
|
||||
Left,
|
||||
Right,
|
||||
Equal,
|
||||
}
|
||||
|
||||
/// Compare two identifiers based on the version sorting algorithm described in [the style guide]
|
||||
///
|
||||
/// [the style guide]: https://doc.rust-lang.org/nightly/style-guide/#sorting
|
||||
pub(crate) fn version_sort(a: &str, b: &str) -> std::cmp::Ordering {
|
||||
let iter_a = VersionChunkIter::new(a);
|
||||
let iter_b = VersionChunkIter::new(b);
|
||||
let mut more_leading_zeros = MoreLeadingZeros::Equal;
|
||||
|
||||
for either_or_both in iter_a.zip_longest(iter_b) {
|
||||
match either_or_both {
|
||||
EitherOrBoth::Left(_) => return std::cmp::Ordering::Greater,
|
||||
EitherOrBoth::Right(_) => return std::cmp::Ordering::Less,
|
||||
EitherOrBoth::Both(a, b) => match (a, b) {
|
||||
(VersionChunk::Underscore, VersionChunk::Underscore) => {
|
||||
continue;
|
||||
}
|
||||
(VersionChunk::Underscore, _) => return std::cmp::Ordering::Less,
|
||||
(_, VersionChunk::Underscore) => return std::cmp::Ordering::Greater,
|
||||
(VersionChunk::Str(ca), VersionChunk::Str(cb))
|
||||
| (VersionChunk::Str(ca), VersionChunk::Number { source: cb, .. })
|
||||
| (VersionChunk::Number { source: ca, .. }, VersionChunk::Str(cb)) => {
|
||||
match ca.cmp(&cb) {
|
||||
std::cmp::Ordering::Equal => {
|
||||
continue;
|
||||
}
|
||||
order @ _ => return order,
|
||||
}
|
||||
}
|
||||
(
|
||||
VersionChunk::Number {
|
||||
value: va,
|
||||
zeros: lza,
|
||||
..
|
||||
},
|
||||
VersionChunk::Number {
|
||||
value: vb,
|
||||
zeros: lzb,
|
||||
..
|
||||
},
|
||||
) => match va.cmp(&vb) {
|
||||
std::cmp::Ordering::Equal => {
|
||||
if lza == lzb {
|
||||
continue;
|
||||
}
|
||||
|
||||
if more_leading_zeros == MoreLeadingZeros::Equal && lza > lzb {
|
||||
more_leading_zeros = MoreLeadingZeros::Left;
|
||||
} else if more_leading_zeros == MoreLeadingZeros::Equal && lza < lzb {
|
||||
more_leading_zeros = MoreLeadingZeros::Right;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
order @ _ => return order,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
match more_leading_zeros {
|
||||
MoreLeadingZeros::Equal => std::cmp::Ordering::Equal,
|
||||
MoreLeadingZeros::Left => std::cmp::Ordering::Less,
|
||||
MoreLeadingZeros::Right => std::cmp::Ordering::Greater,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_chunks() {
|
||||
let mut iter = VersionChunkIter::new("x86_128");
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("x")));
|
||||
assert_eq!(
|
||||
iter.next(),
|
||||
Some(VersionChunk::Number {
|
||||
value: 86,
|
||||
zeros: 0,
|
||||
source: "86"
|
||||
})
|
||||
);
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
|
||||
assert_eq!(
|
||||
iter.next(),
|
||||
Some(VersionChunk::Number {
|
||||
value: 128,
|
||||
zeros: 0,
|
||||
source: "128"
|
||||
})
|
||||
);
|
||||
assert_eq!(iter.next(), None);
|
||||
|
||||
let mut iter = VersionChunkIter::new("w005s09t");
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("w")));
|
||||
assert_eq!(
|
||||
iter.next(),
|
||||
Some(VersionChunk::Number {
|
||||
value: 5,
|
||||
zeros: 2,
|
||||
source: "005"
|
||||
})
|
||||
);
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("s")));
|
||||
assert_eq!(
|
||||
iter.next(),
|
||||
Some(VersionChunk::Number {
|
||||
value: 9,
|
||||
zeros: 1,
|
||||
source: "09"
|
||||
})
|
||||
);
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("t")));
|
||||
assert_eq!(iter.next(), None);
|
||||
|
||||
let mut iter = VersionChunkIter::new("ZY_WX");
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("ZY")));
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("WX")));
|
||||
|
||||
let mut iter = VersionChunkIter::new("_v1");
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
|
||||
assert_eq!(
|
||||
iter.next(),
|
||||
Some(VersionChunk::Number {
|
||||
value: 1,
|
||||
zeros: 0,
|
||||
source: "1"
|
||||
})
|
||||
);
|
||||
|
||||
let mut iter = VersionChunkIter::new("_1v");
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
|
||||
assert_eq!(
|
||||
iter.next(),
|
||||
Some(VersionChunk::Number {
|
||||
value: 1,
|
||||
zeros: 0,
|
||||
source: "1"
|
||||
})
|
||||
);
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
|
||||
|
||||
let mut iter = VersionChunkIter::new("v009");
|
||||
assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
|
||||
assert_eq!(
|
||||
iter.next(),
|
||||
Some(VersionChunk::Number {
|
||||
value: 9,
|
||||
zeros: 2,
|
||||
source: "009"
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_version_sort() {
|
||||
let mut input = vec!["", "b", "a"];
|
||||
let expected = vec!["", "a", "b"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["x7x", "xxx"];
|
||||
let expected = vec!["x7x", "xxx"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["applesauce", "apple"];
|
||||
let expected = vec!["apple", "applesauce"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["aaaaa", "aaa_a"];
|
||||
let expected = vec!["aaa_a", "aaaaa"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["AAAAA", "AAA1A", "BBBBB", "BB_BB", "C3CCC"];
|
||||
let expected = vec!["AAA1A", "AAAAA", "BB_BB", "BBBBB", "C3CCC"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["1_000_000", "1_010_001"];
|
||||
let expected = vec!["1_000_000", "1_010_001"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec![
|
||||
"5", "50", "500", "5_000", "5_005", "5_050", "5_500", "50_000", "50_005", "50_050",
|
||||
"50_500",
|
||||
];
|
||||
let expected = vec![
|
||||
"5", "5_000", "5_005", "5_050", "5_500", "50", "50_000", "50_005", "50_050", "50_500",
|
||||
"500",
|
||||
];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["X86_64", "x86_64", "X86_128", "x86_128"];
|
||||
let expected = vec!["X86_64", "X86_128", "x86_64", "x86_128"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["__", "_"];
|
||||
let expected = vec!["_", "__"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["foo_", "foo"];
|
||||
let expected = vec!["foo", "foo_"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
|
||||
let expected = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected);
|
||||
|
||||
let mut input = vec![
|
||||
"x86_128", "usize", "uz", "v000", "v00", "v0", "v0s", "v00t", "v0u", "v001", "v01",
|
||||
"v1", "v009", "x87", "zyxw", "_ZYXW", "_abcd", "A2", "ABCD", "Z_YXW", "ZY_XW", "ZY_XW",
|
||||
"ZYXW", "v09", "v9", "v010", "v10", "w005s09t", "w5s009t", "x64", "x86", "x86_32",
|
||||
"ua", "x86_64", "ZYXW_", "a1", "abcd", "u_zzz", "u8", "u16", "u32", "u64", "u128",
|
||||
"u256",
|
||||
];
|
||||
let expected = vec![
|
||||
"_ZYXW", "_abcd", "A2", "ABCD", "Z_YXW", "ZY_XW", "ZY_XW", "ZYXW", "ZYXW_", "a1",
|
||||
"abcd", "u_zzz", "u8", "u16", "u32", "u64", "u128", "u256", "ua", "usize", "uz",
|
||||
"v000", "v00", "v0", "v0s", "v00t", "v0u", "v001", "v01", "v1", "v009", "v09", "v9",
|
||||
"v010", "v10", "w005s09t", "w5s009t", "x64", "x86", "x86_32", "x86_64", "x86_128",
|
||||
"x87", "zyxw",
|
||||
];
|
||||
input.sort_by(|a, b| version_sort(a, b));
|
||||
assert_eq!(input, expected)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user