rustdoc-search: count path edits with separate edit limit
Since the two are counted separately elsewhere, they should get their own limits, too. The biggest problem with combining them is that paths are loosely checked by not requiring every component to match, which means that if they are short and matched loosely, they can easily find "drunk typist" matches that make no sense, like this old result: std::collections::btree_map::itermut matching slice::itermut maxEditDistance = ("slice::itermut".length) / 3 = 14 / 3 = 4 editDistance("std", "slice") = 4 editDistance("itermut", "itermut") = 0 4 + 0 <= 4 PASS Of course, `slice::itermut` should not match stuff from btreemap. `slice` should not match `std`. The new result counts them separately: maxPathEditDistance = "slice".length / 3 = 5 / 3 = 1 maxEditDistance = "itermut".length / 3 = 7 / 3 = 2 editDistance("std", "slice") = 4 4 <= 1 FAIL Effectively, this makes path queries less "typo-resistant". It's not zero, but it means `vec` won't match the `v1` prelude. Queries without parent paths are unchanged.
This commit is contained in:
parent
a75fed74b6
commit
0ea58e2346
@ -1805,11 +1805,20 @@ function initSearch(rawSearchIndex) {
|
||||
return unifyFunctionTypes([row], [elem], whereClause, mgens);
|
||||
}
|
||||
|
||||
function checkPath(contains, ty, maxEditDistance) {
|
||||
/**
|
||||
* Compute an "edit distance" that ignores missing path elements.
|
||||
* @param {string[]} contains search query path
|
||||
* @param {Row} ty indexed item
|
||||
* @returns {null|number} edit distance
|
||||
*/
|
||||
function checkPath(contains, ty) {
|
||||
if (contains.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
let ret_dist = maxEditDistance + 1;
|
||||
const maxPathEditDistance = Math.floor(
|
||||
contains.reduce((acc, next) => acc + next.length, 0) / 3
|
||||
);
|
||||
let ret_dist = maxPathEditDistance + 1;
|
||||
const path = ty.path.split("::");
|
||||
|
||||
if (ty.parent && ty.parent.name) {
|
||||
@ -1821,15 +1830,23 @@ function initSearch(rawSearchIndex) {
|
||||
pathiter: for (let i = length - clength; i >= 0; i -= 1) {
|
||||
let dist_total = 0;
|
||||
for (let x = 0; x < clength; ++x) {
|
||||
const dist = editDistance(path[i + x], contains[x], maxEditDistance);
|
||||
if (dist > maxEditDistance) {
|
||||
continue pathiter;
|
||||
const [p, c] = [path[i + x], contains[x]];
|
||||
if (Math.floor((p.length - c.length) / 3) <= maxPathEditDistance &&
|
||||
p.indexOf(c) !== -1
|
||||
) {
|
||||
// discount distance on substring match
|
||||
dist_total += Math.floor((p.length - c.length) / 3);
|
||||
} else {
|
||||
const dist = editDistance(p, c, maxPathEditDistance);
|
||||
if (dist > maxPathEditDistance) {
|
||||
continue pathiter;
|
||||
}
|
||||
dist_total += dist;
|
||||
}
|
||||
dist_total += dist;
|
||||
}
|
||||
ret_dist = Math.min(ret_dist, Math.round(dist_total / clength));
|
||||
}
|
||||
return ret_dist;
|
||||
return ret_dist > maxPathEditDistance ? null : ret_dist;
|
||||
}
|
||||
|
||||
function typePassesFilter(filter, type) {
|
||||
@ -2030,8 +2047,8 @@ function initSearch(rawSearchIndex) {
|
||||
}
|
||||
|
||||
if (elem.fullPath.length > 1) {
|
||||
path_dist = checkPath(elem.pathWithoutLast, row, maxEditDistance);
|
||||
if (path_dist > maxEditDistance) {
|
||||
path_dist = checkPath(elem.pathWithoutLast, row);
|
||||
if (path_dist === null) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2045,7 +2062,7 @@ function initSearch(rawSearchIndex) {
|
||||
|
||||
const dist = editDistance(row.normalizedName, elem.normalizedPathLast, maxEditDistance);
|
||||
|
||||
if (index === -1 && dist + path_dist > maxEditDistance) {
|
||||
if (index === -1 && dist > maxEditDistance) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2100,13 +2117,9 @@ function initSearch(rawSearchIndex) {
|
||||
}
|
||||
|
||||
function innerRunQuery() {
|
||||
let queryLen = 0;
|
||||
for (const elem of parsedQuery.elems) {
|
||||
queryLen += elem.name.length;
|
||||
}
|
||||
for (const elem of parsedQuery.returned) {
|
||||
queryLen += elem.name.length;
|
||||
}
|
||||
const queryLen =
|
||||
parsedQuery.elems.reduce((acc, next) => acc + next.pathLast.length, 0) +
|
||||
parsedQuery.returned.reduce((acc, next) => acc + next.pathLast.length, 0);
|
||||
const maxEditDistance = Math.floor(queryLen / 3);
|
||||
|
||||
/**
|
||||
|
@ -7,7 +7,6 @@ const EXPECTED = {
|
||||
// Validate that type alias methods get the correct path.
|
||||
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
|
||||
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
|
||||
{ 'path': 'std::os::linux::process::PidFd', 'name': 'as_raw_fd' },
|
||||
{ 'path': 'std::os::fd::RawFd', 'name': 'as_raw_fd' },
|
||||
],
|
||||
};
|
||||
|
42
tests/rustdoc-js-std/path-maxeditdistance.js
Normal file
42
tests/rustdoc-js-std/path-maxeditdistance.js
Normal file
@ -0,0 +1,42 @@
|
||||
// exact-check
|
||||
const FILTER_CRATE = "std";
|
||||
const EXPECTED = [
|
||||
{
|
||||
query: 'vec::intoiterator',
|
||||
others: [
|
||||
// trait std::iter::IntoIterator is not the first result
|
||||
{ 'path': 'std::vec', 'name': 'IntoIter' },
|
||||
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
|
||||
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
|
||||
],
|
||||
},
|
||||
{
|
||||
query: 'vec::iter',
|
||||
others: [
|
||||
// std::net::ToSocketAttrs::iter should not show up here
|
||||
{ 'path': 'std::vec', 'name': 'IntoIter' },
|
||||
{ 'path': 'std::vec::Vec', 'name': 'from_iter' },
|
||||
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
|
||||
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
|
||||
{ 'path': 'std::collections::VecDeque', 'name': 'iter' },
|
||||
{ 'path': 'std::collections::VecDeque', 'name': 'iter_mut' },
|
||||
{ 'path': 'std::collections::VecDeque', 'name': 'from_iter' },
|
||||
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
|
||||
],
|
||||
},
|
||||
{
|
||||
query: 'slice::itermut',
|
||||
others: [
|
||||
// std::collections::btree_map::itermut should not show up here
|
||||
{ 'path': 'std::slice', 'name': 'IterMut' },
|
||||
{ 'path': 'std::slice', 'name': 'iter_mut' },
|
||||
],
|
||||
},
|
||||
];
|
@ -1,11 +1,20 @@
|
||||
const EXPECTED = {
|
||||
query: 'hashset::insert',
|
||||
others: [
|
||||
// ensure hashset::insert comes first
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
|
||||
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
|
||||
],
|
||||
};
|
||||
const EXPECTED = [
|
||||
{
|
||||
query: 'hashset::insert',
|
||||
others: [
|
||||
// ensure hashset::insert comes first
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
|
||||
],
|
||||
},
|
||||
{
|
||||
query: 'hash::insert',
|
||||
others: [
|
||||
// ensure hashset/hashmap::insert come first
|
||||
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
|
||||
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
@ -3,6 +3,5 @@ const EXPECTED = {
|
||||
'others': [
|
||||
{ 'path': 'exact_match::Si', 'name': 'pc' },
|
||||
{ 'path': 'exact_match::Psi', 'name': 'pc' },
|
||||
{ 'path': 'exact_match::Si', 'name': 'pa' },
|
||||
],
|
||||
};
|
||||
|
@ -1,7 +1,15 @@
|
||||
const EXPECTED = {
|
||||
'query': 'ig::pc',
|
||||
'others': [
|
||||
{ 'path': 'module_substring::Sig', 'name': 'pc' },
|
||||
{ 'path': 'module_substring::Si', 'name': 'pc' },
|
||||
],
|
||||
};
|
||||
const EXPECTED = [
|
||||
{
|
||||
'query': 'ig::pc',
|
||||
'others': [
|
||||
{ 'path': 'module_substring::Sig', 'name': 'pc' },
|
||||
],
|
||||
},
|
||||
{
|
||||
'query': 'si::pc',
|
||||
'others': [
|
||||
{ 'path': 'module_substring::Si', 'name': 'pc' },
|
||||
{ 'path': 'module_substring::Sig', 'name': 'pc' },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
35
tests/rustdoc-js/path-maxeditdistance.js
Normal file
35
tests/rustdoc-js/path-maxeditdistance.js
Normal file
@ -0,0 +1,35 @@
|
||||
// exact-check
|
||||
|
||||
const EXPECTED = [
|
||||
{
|
||||
'query': 'xxxxxxxxxxx::hocuspocusprestidigitation',
|
||||
// do not match abracadabra::hocuspocusprestidigitation
|
||||
'others': [],
|
||||
},
|
||||
{
|
||||
// exact match
|
||||
'query': 'abracadabra::hocuspocusprestidigitation',
|
||||
'others': [
|
||||
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
|
||||
],
|
||||
},
|
||||
{
|
||||
// swap br/rb; that's edit distance 2, where maxPathEditDistance = 3 (11 / 3)
|
||||
'query': 'arbacadarba::hocuspocusprestidigitation',
|
||||
'others': [
|
||||
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
|
||||
],
|
||||
},
|
||||
{
|
||||
// truncate 5 chars, where maxEditDistance = 7 (21 / 3)
|
||||
'query': 'abracadarba::hocusprestidigitation',
|
||||
'others': [
|
||||
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
|
||||
],
|
||||
},
|
||||
{
|
||||
// truncate 9 chars, where maxEditDistance = 5 (17 / 3)
|
||||
'query': 'abracadarba::hprestidigitation',
|
||||
'others': [],
|
||||
},
|
||||
];
|
3
tests/rustdoc-js/path-maxeditdistance.rs
Normal file
3
tests/rustdoc-js/path-maxeditdistance.rs
Normal file
@ -0,0 +1,3 @@
|
||||
#![crate_name="abracadabra"]
|
||||
|
||||
pub struct HocusPocusPrestidigitation;
|
@ -1,13 +1,13 @@
|
||||
// exact-check
|
||||
|
||||
const EXPECTED = {
|
||||
'query': 'b::ccccccc',
|
||||
'query': 'bbbbbb::ccccccc',
|
||||
'others': [
|
||||
// `ccccccc` is an exact match for all three of these.
|
||||
// However `b` is a closer match for `bb` than for any
|
||||
// of the others, so it ought to go first.
|
||||
{ 'path': 'path_ordering::bb', 'name': 'Ccccccc' },
|
||||
{ 'path': 'path_ordering::aa', 'name': 'Ccccccc' },
|
||||
{ 'path': 'path_ordering::dd', 'name': 'Ccccccc' },
|
||||
{ 'path': 'path_ordering::bbbbbb', 'name': 'Ccccccc' },
|
||||
{ 'path': 'path_ordering::abbbbb', 'name': 'Ccccccc' },
|
||||
{ 'path': 'path_ordering::dbbbbb', 'name': 'Ccccccc' },
|
||||
],
|
||||
};
|
||||
|
@ -1,9 +1,9 @@
|
||||
pub mod dd {
|
||||
pub mod dbbbbb {
|
||||
pub struct Ccccccc;
|
||||
}
|
||||
pub mod aa {
|
||||
pub mod abbbbb {
|
||||
pub struct Ccccccc;
|
||||
}
|
||||
pub mod bb {
|
||||
pub mod bbbbbb {
|
||||
pub struct Ccccccc;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user