Auto merge of #118024 - notriddle:notriddle/search-speed, r=GuillaumeGomez

rustdoc-search: optimize unifyFunctionTypes

Final profile output:
https://notriddle.com/rustdoc-html-demo-5/profile-4/index.html

This PR contains three commits that improve performance of this hot inner loop: reduces the number of allocations, a fast path for the 1-element basic query case, and reconstructing the multi-element query case to use recursion instead of an explicit `backtracking` array. It also adds new test cases that I found while working on this.

r? `@GuillaumeGomez`
This commit is contained in:
bors 2023-11-19 14:47:08 +00:00
commit 27794f95fd
3 changed files with 187 additions and 149 deletions

View File

@ -1318,7 +1318,7 @@ function initSearch(rawSearchIndex) {
* then this function will try with a different solution, or bail with false if it * then this function will try with a different solution, or bail with false if it
* runs out of candidates. * runs out of candidates.
* *
* @param {Array<FunctionType>} fnTypes - The objects to check. * @param {Array<FunctionType>} fnTypesIn - The objects to check.
* @param {Array<QueryElement>} queryElems - The elements from the parsed query. * @param {Array<QueryElement>} queryElems - The elements from the parsed query.
* @param {[FunctionType]} whereClause - Trait bounds for generic items. * @param {[FunctionType]} whereClause - Trait bounds for generic items.
* @param {Map<number,number>|null} mgensIn * @param {Map<number,number>|null} mgensIn
@ -1329,9 +1329,9 @@ function initSearch(rawSearchIndex) {
*/ */
function unifyFunctionTypes(fnTypesIn, queryElems, whereClause, mgensIn, solutionCb) { function unifyFunctionTypes(fnTypesIn, queryElems, whereClause, mgensIn, solutionCb) {
/** /**
* @type Map<integer, integer> * @type Map<integer, integer>|null
*/ */
let mgens = new Map(mgensIn); const mgens = mgensIn === null ? null : new Map(mgensIn);
if (queryElems.length === 0) { if (queryElems.length === 0) {
return !solutionCb || solutionCb(mgens); return !solutionCb || solutionCb(mgens);
} }
@ -1339,169 +1339,170 @@ function initSearch(rawSearchIndex) {
return false; return false;
} }
const ql = queryElems.length; const ql = queryElems.length;
let fl = fnTypesIn.length; const fl = fnTypesIn.length;
// One element fast path / base case
if (ql === 1 && queryElems[0].generics.length === 0) {
const queryElem = queryElems[0];
for (const fnType of fnTypesIn) {
if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) {
continue;
}
if (fnType.id < 0 && queryElem.id < 0) {
if (mgens && mgens.has(fnType.id) &&
mgens.get(fnType.id) !== queryElem.id) {
continue;
}
const mgensScratch = new Map(mgens);
mgensScratch.set(fnType.id, queryElem.id);
if (!solutionCb || solutionCb(mgensScratch)) {
return true;
}
} else if (!solutionCb || solutionCb(mgens ? new Map(mgens) : null)) {
// unifyFunctionTypeIsMatchCandidate already checks that ids match
return true;
}
}
for (const fnType of fnTypesIn) {
if (!unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) {
continue;
}
if (fnType.id < 0) {
if (mgens && mgens.has(fnType.id) &&
mgens.get(fnType.id) !== 0) {
continue;
}
const mgensScratch = new Map(mgens);
mgensScratch.set(fnType.id, 0);
if (unifyFunctionTypes(
whereClause[(-fnType.id) - 1],
queryElems,
whereClause,
mgensScratch,
solutionCb
)) {
return true;
}
} else if (unifyFunctionTypes(
fnType.generics,
queryElems,
whereClause,
mgens ? new Map(mgens) : null,
solutionCb
)) {
return true;
}
}
return false;
}
// Multiple element recursive case
/** /**
* @type Array<FunctionType> * @type Array<FunctionType>
*/ */
let fnTypes = fnTypesIn.slice(); const fnTypes = fnTypesIn.slice();
/** /**
* loop works by building up a solution set in the working arrays * Algorithm works by building up a solution set in the working arrays
* fnTypes gets mutated in place to make this work, while queryElems * fnTypes gets mutated in place to make this work, while queryElems
* is left alone * is left alone.
* *
* vvvvvvv `i` points here * It works backwards, because arrays can be cheaply truncated that way.
* queryElems = [ good, good, good, unknown, unknown ], *
* fnTypes = [ good, good, good, unknown, unknown ], * vvvvvvv `queryElem`
* ---------------- ^^^^^^^^^^^^^^^^ `j` iterates after `i`, * queryElems = [ unknown, unknown, good, good, good ]
* | looking for candidates * fnTypes = [ unknown, unknown, good, good, good ]
* everything before `i` is the * ^^^^^^^^^^^^^^^^ loop over these elements to find candidates
* current working solution
* *
* Everything in the current working solution is known to be a good * Everything in the current working solution is known to be a good
* match, but it might not be the match we wind up going with, because * match, but it might not be the match we wind up going with, because
* there might be more than one candidate match, and we need to try them all * there might be more than one candidate match, and we need to try them all
* before giving up. So, to handle this, it backtracks on failure. * before giving up. So, to handle this, it backtracks on failure.
*
* @type Array<{
* "fnTypesScratch": Array<FunctionType>,
* "queryElemsOffset": integer,
* "fnTypesOffset": integer
* }>
*/ */
const backtracking = []; const flast = fl - 1;
let i = 0; const qlast = ql - 1;
let j = 0; const queryElem = queryElems[qlast];
const backtrack = () => { let queryElemsTmp = null;
while (backtracking.length !== 0) { for (let i = flast; i >= 0; i -= 1) {
// this session failed, but there are other possible solutions const fnType = fnTypes[i];
// to backtrack, reset to (a copy of) the old array, do the swap or unboxing if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) {
const { continue;
fnTypesScratch,
mgensScratch,
queryElemsOffset,
fnTypesOffset,
unbox,
} = backtracking.pop();
mgens = new Map(mgensScratch);
const fnType = fnTypesScratch[fnTypesOffset];
const queryElem = queryElems[queryElemsOffset];
if (unbox) {
if (fnType.id < 0) {
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) {
continue;
}
mgens.set(fnType.id, 0);
}
const generics = fnType.id < 0 ?
whereClause[(-fnType.id) - 1] :
fnType.generics;
fnTypes = fnTypesScratch.toSpliced(fnTypesOffset, 1, ...generics);
fl = fnTypes.length;
// re-run the matching algorithm on this item
i = queryElemsOffset - 1;
} else {
if (fnType.id < 0) {
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) {
continue;
}
mgens.set(fnType.id, queryElem.id);
}
fnTypes = fnTypesScratch.slice();
fl = fnTypes.length;
const tmp = fnTypes[queryElemsOffset];
fnTypes[queryElemsOffset] = fnTypes[fnTypesOffset];
fnTypes[fnTypesOffset] = tmp;
// this is known as a good match; go to the next one
i = queryElemsOffset;
}
return true;
} }
return false; let mgensScratch;
}; if (fnType.id < 0) {
for (i = 0; i !== ql; ++i) { mgensScratch = new Map(mgens);
const queryElem = queryElems[i]; if (mgensScratch.has(fnType.id)
/** && mgensScratch.get(fnType.id) !== queryElem.id) {
* list of potential function types that go with the current query element. continue;
* @type Array<integer> }
*/ mgensScratch.set(fnType.id, queryElem.id);
const matchCandidates = []; } else {
let fnTypesScratch = null; mgensScratch = mgens;
let mgensScratch = null; }
// don't try anything before `i`, because they've already been // fnTypes[i] is a potential match
// paired off with the other query elements // fnTypes[flast] is the last item in the list
for (j = i; j !== fl; ++j) { // swap them, and drop the potential match from the list
const fnType = fnTypes[j]; // check if the remaining function types also match
if (unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) { fnTypes[i] = fnTypes[flast];
if (!fnTypesScratch) { fnTypes.length = flast;
fnTypesScratch = fnTypes.slice(); if (!queryElemsTmp) {
queryElemsTmp = queryElems.slice(0, qlast);
}
const passesUnification = unifyFunctionTypes(
fnTypes,
queryElemsTmp,
whereClause,
mgensScratch,
mgensScratch => {
if (fnType.generics.length === 0 && queryElem.generics.length === 0) {
return !solutionCb || solutionCb(mgensScratch);
} }
unifyFunctionTypes( return unifyFunctionTypes(
fnType.generics, fnType.generics,
queryElem.generics, queryElem.generics,
whereClause, whereClause,
mgens, mgensScratch,
mgensScratch => { solutionCb
matchCandidates.push({
fnTypesScratch,
mgensScratch,
queryElemsOffset: i,
fnTypesOffset: j,
unbox: false,
});
return false; // "reject" all candidates to gather all of them
}
); );
} }
if (unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) { );
if (!fnTypesScratch) { if (passesUnification) {
fnTypesScratch = fnTypes.slice(); return true;
}
if (!mgensScratch) {
mgensScratch = new Map(mgens);
}
backtracking.push({
fnTypesScratch,
mgensScratch,
queryElemsOffset: i,
fnTypesOffset: j,
unbox: true,
});
}
} }
if (matchCandidates.length === 0) { // backtrack
if (backtrack()) { fnTypes[flast] = fnTypes[i];
fnTypes[i] = fnType;
fnTypes.length = fl;
}
for (let i = flast; i >= 0; i -= 1) {
const fnType = fnTypes[i];
if (!unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) {
continue;
}
let mgensScratch;
if (fnType.id < 0) {
mgensScratch = new Map(mgens);
if (mgensScratch.has(fnType.id) && mgensScratch.get(fnType.id) !== 0) {
continue; continue;
} else {
return false;
} }
mgensScratch.set(fnType.id, 0);
} else {
mgensScratch = mgens;
} }
// use the current candidate const generics = fnType.id < 0 ?
const {fnTypesOffset: candidate, mgensScratch: mgensNew} = matchCandidates.pop(); whereClause[(-fnType.id) - 1] :
if (fnTypes[candidate].id < 0 && queryElems[i].id < 0) { fnType.generics;
mgens.set(fnTypes[candidate].id, queryElems[i].id); const passesUnification = unifyFunctionTypes(
} fnTypes.toSpliced(i, 1, ...generics),
for (const [fid, qid] of mgensNew) { queryElems,
mgens.set(fid, qid); whereClause,
} mgensScratch,
// `i` and `j` are paired off solutionCb
// `queryElems[i]` is left in place );
// `fnTypes[j]` is swapped with `fnTypes[i]` to pair them off if (passesUnification) {
const tmp = fnTypes[candidate]; return true;
fnTypes[candidate] = fnTypes[i];
fnTypes[i] = tmp;
// write other candidates to backtracking queue
for (const otherCandidate of matchCandidates) {
backtracking.push(otherCandidate);
}
// If we're on the last item, check the solution with the callback
// backtrack if the callback says its unsuitable
while (i === (ql - 1) && solutionCb && !solutionCb(mgens)) {
if (!backtrack()) {
return false;
}
} }
} }
return true; return false;
} }
function unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens) { function unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens) {
// type filters look like `trait:Read` or `enum:Result` // type filters look like `trait:Read` or `enum:Result`
@ -1514,15 +1515,17 @@ function initSearch(rawSearchIndex) {
// or, if mgens[fnType.id] = 0, then we've matched this generic with a bare trait // or, if mgens[fnType.id] = 0, then we've matched this generic with a bare trait
// and should make that same decision everywhere it appears // and should make that same decision everywhere it appears
if (fnType.id < 0 && queryElem.id < 0) { if (fnType.id < 0 && queryElem.id < 0) {
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) { if (mgens !== null) {
return false; if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) {
}
for (const [fid, qid] of mgens.entries()) {
if (fnType.id !== fid && queryElem.id === qid) {
return false; return false;
} }
if (fnType.id === fid && queryElem.id !== qid) { for (const [fid, qid] of mgens.entries()) {
return false; if (fnType.id !== fid && queryElem.id === qid) {
return false;
}
if (fnType.id === fid && queryElem.id !== qid) {
return false;
}
} }
} }
} else { } else {
@ -1575,7 +1578,7 @@ function initSearch(rawSearchIndex) {
} }
// mgens[fnType.id] === 0 indicates that we committed to unboxing this generic // mgens[fnType.id] === 0 indicates that we committed to unboxing this generic
// mgens[fnType.id] === null indicates that we haven't decided yet // mgens[fnType.id] === null indicates that we haven't decided yet
if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) { if (mgens !== null && mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) {
return false; return false;
} }
// This is only a potential unbox if the search query appears in the where clause // This is only a potential unbox if the search query appears in the where clause

View File

@ -0,0 +1,22 @@
// exact-check
const EXPECTED = [
{
'query': 'outside<U>, outside<V> -> outside<W>',
'others': [],
},
{
'query': 'outside<V>, outside<U> -> outside<W>',
'others': [],
},
{
'query': 'outside<U>, outside<U> -> outside<W>',
'others': [],
},
{
'query': 'outside<U>, outside<U> -> outside<U>',
'others': [
{"path": "generics2", "name": "should_match_3"}
],
},
];

View File

@ -0,0 +1,13 @@
pub struct Outside<T>(T);
pub fn no_match<U, V>(a: Outside<U>, b: Outside<V>) -> (Outside<U>, Outside<V>) {
unimplemented!();
}
pub fn no_match_2<U, V>(a: Outside<V>, b: Outside<U>) -> (Outside<U>, Outside<V>) {
unimplemented!();
}
pub fn should_match_3<U>(a: Outside<U>, b: Outside<U>) -> (Outside<U>, Outside<U>) {
unimplemented!();
}