From 6d5945284147be6dd22486fb1c189c57a98632cd Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 17 Nov 2023 13:05:18 -0700 Subject: [PATCH 1/4] rustdoc-search: less new Maps in unifyFunctionType This is a major source of expense on generic queries, and this commit reduces them. Profile output: https://notriddle.com/rustdoc-html-demo-5/profile-2/index.html --- src/librustdoc/html/static/js/search.js | 47 ++++++++++++++++--------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index a3606f4302b..0c337f47dc9 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1329,9 +1329,9 @@ function initSearch(rawSearchIndex) { */ function unifyFunctionTypes(fnTypesIn, queryElems, whereClause, mgensIn, solutionCb) { /** - * @type Map + * @type Map|null */ - let mgens = new Map(mgensIn); + let mgens = mgensIn === null ? null : new Map(mgensIn); if (queryElems.length === 0) { return !solutionCb || solutionCb(mgens); } @@ -1382,12 +1382,14 @@ function initSearch(rawSearchIndex) { fnTypesOffset, unbox, } = backtracking.pop(); - mgens = new Map(mgensScratch); + mgens = mgensScratch !== null ? new Map(mgensScratch) : null; const fnType = fnTypesScratch[fnTypesOffset]; const queryElem = queryElems[queryElemsOffset]; if (unbox) { if (fnType.id < 0) { - if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) { + if (mgens === null) { + mgens = new Map(); + } else if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) { continue; } mgens.set(fnType.id, 0); @@ -1401,7 +1403,9 @@ function initSearch(rawSearchIndex) { i = queryElemsOffset - 1; } else { if (fnType.id < 0) { - if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) { + if (mgens === null) { + mgens = new Map(); + } else if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) { continue; } mgens.set(fnType.id, queryElem.id); @@ -1456,7 +1460,7 @@ function initSearch(rawSearchIndex) { if (!fnTypesScratch) { fnTypesScratch = fnTypes.slice(); } - if (!mgensScratch) { + if (!mgensScratch && mgens !== null) { mgensScratch = new Map(mgens); } backtracking.push({ @@ -1478,10 +1482,19 @@ function initSearch(rawSearchIndex) { // use the current candidate const {fnTypesOffset: candidate, mgensScratch: mgensNew} = matchCandidates.pop(); if (fnTypes[candidate].id < 0 && queryElems[i].id < 0) { + if (mgens === null) { + mgens = new Map(); + } mgens.set(fnTypes[candidate].id, queryElems[i].id); } - for (const [fid, qid] of mgensNew) { - mgens.set(fid, qid); + if (mgensNew !== null) { + if (mgens === null) { + mgens = mgensNew; + } else { + for (const [fid, qid] of mgensNew) { + mgens.set(fid, qid); + } + } } // `i` and `j` are paired off // `queryElems[i]` is left in place @@ -1514,15 +1527,17 @@ function initSearch(rawSearchIndex) { // or, if mgens[fnType.id] = 0, then we've matched this generic with a bare trait // and should make that same decision everywhere it appears if (fnType.id < 0 && queryElem.id < 0) { - if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) { - return false; - } - for (const [fid, qid] of mgens.entries()) { - if (fnType.id !== fid && queryElem.id === qid) { + if (mgens !== null) { + if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) { return false; } - if (fnType.id === fid && queryElem.id !== qid) { - return false; + for (const [fid, qid] of mgens.entries()) { + if (fnType.id !== fid && queryElem.id === qid) { + return false; + } + if (fnType.id === fid && queryElem.id !== qid) { + return false; + } } } } else { @@ -1575,7 +1590,7 @@ function initSearch(rawSearchIndex) { } // mgens[fnType.id] === 0 indicates that we committed to unboxing this generic // mgens[fnType.id] === null indicates that we haven't decided yet - if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) { + if (mgens !== null && mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) { return false; } // This is only a potential unbox if the search query appears in the where clause From c76c2e71f01a61a9b11001b0dc3fe837975f41c0 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 17 Nov 2023 14:44:53 -0700 Subject: [PATCH 2/4] rustdoc-search: fast path for 1-query unification Short queries, in addition to being common, are also the base case for a lot of more complicated queries. We can avoid most of the backtracking data structures, and use simple recursive matching instead, by special casing them. Profile output: https://notriddle.com/rustdoc-html-demo-5/profile-3/index.html --- src/librustdoc/html/static/js/search.js | 78 ++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 0c337f47dc9..38391436aea 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1318,7 +1318,7 @@ function initSearch(rawSearchIndex) { * then this function will try with a different solution, or bail with false if it * runs out of candidates. * - * @param {Array} fnTypes - The objects to check. + * @param {Array} fnTypesIn - The objects to check. * @param {Array} queryElems - The elements from the parsed query. * @param {[FunctionType]} whereClause - Trait bounds for generic items. * @param {Map|null} mgensIn @@ -1340,6 +1340,79 @@ function initSearch(rawSearchIndex) { } const ql = queryElems.length; let fl = fnTypesIn.length; + + // Fast path + if (queryElems.length === 1 && queryElems[0].generics.length === 0) { + const queryElem = queryElems[0]; + for (const fnType of fnTypesIn) { + if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) { + continue; + } + if (fnType.id < 0 && queryElem.id < 0) { + if (mgens === null) { + mgens = new Map(); + } + const alreadyAssigned = mgens.has(fnType.id); + if (alreadyAssigned) { + if (mgens.get(fnType.id) !== queryElem.id) { + continue; + } + } else { + mgens.set(fnType.id, queryElem.id); + } + if (!solutionCb || solutionCb(mgens)) { + return true; + } + if (!alreadyAssigned) { + mgens.delete(fnType.id); + } + } else if (!solutionCb || solutionCb(mgens)) { + // unifyFunctionTypeIsMatchCandidate already checks that ids match + return true; + } + } + for (const fnType of fnTypesIn) { + if (!unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) { + continue; + } + if (fnType.id < 0) { + if (mgens === null) { + mgens = new Map(); + } + const alreadyAssigned = mgens.has(fnType.id); + if (alreadyAssigned) { + if (mgens.get(fnType.id) !== 0) { + continue; + } + } else { + mgens.set(fnType.id, 0); + } + if (unifyFunctionTypes( + whereClause[(-fnType.id) - 1], + queryElems, + whereClause, + mgens, + solutionCb + )) { + return true; + } + if (!alreadyAssigned) { + mgens.delete(fnType.id); + } + } else if (unifyFunctionTypes( + fnType.generics, + queryElems, + whereClause, + mgens, + solutionCb + )) { + return true; + } + } + return false; + } + + // Slow path /** * @type Array */ @@ -1405,7 +1478,8 @@ function initSearch(rawSearchIndex) { if (fnType.id < 0) { if (mgens === null) { mgens = new Map(); - } else if (mgens.has(fnType.id) && mgens.get(fnType.id) !== queryElem.id) { + } else if (mgens.has(fnType.id) && + mgens.get(fnType.id) !== queryElem.id) { continue; } mgens.set(fnType.id, queryElem.id); From a66972d5516c9d2e6e1fb99c263ca2aa2fc67b9e Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 17 Nov 2023 15:44:29 -0700 Subject: [PATCH 3/4] rustdoc-search: fix accidental shared, mutable map --- src/librustdoc/html/static/js/search.js | 44 ++++++++----------------- tests/rustdoc-js/generics2.js | 22 +++++++++++++ tests/rustdoc-js/generics2.rs | 13 ++++++++ 3 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 tests/rustdoc-js/generics2.js create mode 100644 tests/rustdoc-js/generics2.rs diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 38391436aea..a2aa50194e4 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1349,24 +1349,16 @@ function initSearch(rawSearchIndex) { continue; } if (fnType.id < 0 && queryElem.id < 0) { - if (mgens === null) { - mgens = new Map(); + if (mgens && mgens.has(fnType.id) && + mgens.get(fnType.id) !== queryElem.id) { + continue; } - const alreadyAssigned = mgens.has(fnType.id); - if (alreadyAssigned) { - if (mgens.get(fnType.id) !== queryElem.id) { - continue; - } - } else { - mgens.set(fnType.id, queryElem.id); - } - if (!solutionCb || solutionCb(mgens)) { + const mgensScratch = new Map(mgens); + mgensScratch.set(fnType.id, queryElem.id); + if (!solutionCb || solutionCb(mgensScratch)) { return true; } - if (!alreadyAssigned) { - mgens.delete(fnType.id); - } - } else if (!solutionCb || solutionCb(mgens)) { + } else if (!solutionCb || solutionCb(mgens ? new Map(mgens) : null)) { // unifyFunctionTypeIsMatchCandidate already checks that ids match return true; } @@ -1376,34 +1368,26 @@ function initSearch(rawSearchIndex) { continue; } if (fnType.id < 0) { - if (mgens === null) { - mgens = new Map(); - } - const alreadyAssigned = mgens.has(fnType.id); - if (alreadyAssigned) { - if (mgens.get(fnType.id) !== 0) { - continue; - } - } else { - mgens.set(fnType.id, 0); + if (mgens && mgens.has(fnType.id) && + mgens.get(fnType.id) !== 0) { + continue; } + const mgensScratch = new Map(mgens); + mgensScratch.set(fnType.id, 0); if (unifyFunctionTypes( whereClause[(-fnType.id) - 1], queryElems, whereClause, - mgens, + mgensScratch, solutionCb )) { return true; } - if (!alreadyAssigned) { - mgens.delete(fnType.id); - } } else if (unifyFunctionTypes( fnType.generics, queryElems, whereClause, - mgens, + mgens ? new Map(mgens) : null, solutionCb )) { return true; diff --git a/tests/rustdoc-js/generics2.js b/tests/rustdoc-js/generics2.js new file mode 100644 index 00000000000..f08704349a4 --- /dev/null +++ b/tests/rustdoc-js/generics2.js @@ -0,0 +1,22 @@ +// exact-check + +const EXPECTED = [ + { + 'query': 'outside, outside -> outside', + 'others': [], + }, + { + 'query': 'outside, outside -> outside', + 'others': [], + }, + { + 'query': 'outside, outside -> outside', + 'others': [], + }, + { + 'query': 'outside, outside -> outside', + 'others': [ + {"path": "generics2", "name": "should_match_3"} + ], + }, +]; diff --git a/tests/rustdoc-js/generics2.rs b/tests/rustdoc-js/generics2.rs new file mode 100644 index 00000000000..1177ade6831 --- /dev/null +++ b/tests/rustdoc-js/generics2.rs @@ -0,0 +1,13 @@ +pub struct Outside(T); + +pub fn no_match(a: Outside, b: Outside) -> (Outside, Outside) { + unimplemented!(); +} + +pub fn no_match_2(a: Outside, b: Outside) -> (Outside, Outside) { + unimplemented!(); +} + +pub fn should_match_3(a: Outside, b: Outside) -> (Outside, Outside) { + unimplemented!(); +} From d82b3748d548c3e9cde680446424ca45a2d0ae36 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 18 Nov 2023 12:31:46 -0700 Subject: [PATCH 4/4] rustdoc-search: switch to recursive backtracking This is significantly faster, because - It allows the one-element fast path to kick in on multi- element queries. - It constructs intermediate data structures more lazily than the old system did. It's measurably faster than the old algo even without the fast path, but that fast path still helps significantly. --- src/librustdoc/html/static/js/search.js | 242 +++++++++--------------- 1 file changed, 86 insertions(+), 156 deletions(-) diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index a2aa50194e4..e9dd3c439b3 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1331,7 +1331,7 @@ function initSearch(rawSearchIndex) { /** * @type Map|null */ - let mgens = mgensIn === null ? null : new Map(mgensIn); + const mgens = mgensIn === null ? null : new Map(mgensIn); if (queryElems.length === 0) { return !solutionCb || solutionCb(mgens); } @@ -1339,10 +1339,10 @@ function initSearch(rawSearchIndex) { return false; } const ql = queryElems.length; - let fl = fnTypesIn.length; + const fl = fnTypesIn.length; - // Fast path - if (queryElems.length === 1 && queryElems[0].generics.length === 0) { + // One element fast path / base case + if (ql === 1 && queryElems[0].generics.length === 0) { const queryElem = queryElems[0]; for (const fnType of fnTypesIn) { if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) { @@ -1396,183 +1396,113 @@ function initSearch(rawSearchIndex) { return false; } - // Slow path + // Multiple element recursive case /** * @type Array */ - let fnTypes = fnTypesIn.slice(); + const fnTypes = fnTypesIn.slice(); /** - * loop works by building up a solution set in the working arrays + * Algorithm works by building up a solution set in the working arrays * fnTypes gets mutated in place to make this work, while queryElems - * is left alone + * is left alone. * - * vvvvvvv `i` points here - * queryElems = [ good, good, good, unknown, unknown ], - * fnTypes = [ good, good, good, unknown, unknown ], - * ---------------- ^^^^^^^^^^^^^^^^ `j` iterates after `i`, - * | looking for candidates - * everything before `i` is the - * current working solution + * It works backwards, because arrays can be cheaply truncated that way. + * + * vvvvvvv `queryElem` + * queryElems = [ unknown, unknown, good, good, good ] + * fnTypes = [ unknown, unknown, good, good, good ] + * ^^^^^^^^^^^^^^^^ loop over these elements to find candidates * * Everything in the current working solution is known to be a good * match, but it might not be the match we wind up going with, because * there might be more than one candidate match, and we need to try them all * before giving up. So, to handle this, it backtracks on failure. - * - * @type Array<{ - * "fnTypesScratch": Array, - * "queryElemsOffset": integer, - * "fnTypesOffset": integer - * }> */ - const backtracking = []; - let i = 0; - let j = 0; - const backtrack = () => { - while (backtracking.length !== 0) { - // this session failed, but there are other possible solutions - // to backtrack, reset to (a copy of) the old array, do the swap or unboxing - const { - fnTypesScratch, - mgensScratch, - queryElemsOffset, - fnTypesOffset, - unbox, - } = backtracking.pop(); - mgens = mgensScratch !== null ? new Map(mgensScratch) : null; - const fnType = fnTypesScratch[fnTypesOffset]; - const queryElem = queryElems[queryElemsOffset]; - if (unbox) { - if (fnType.id < 0) { - if (mgens === null) { - mgens = new Map(); - } else if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) { - continue; - } - mgens.set(fnType.id, 0); - } - const generics = fnType.id < 0 ? - whereClause[(-fnType.id) - 1] : - fnType.generics; - fnTypes = fnTypesScratch.toSpliced(fnTypesOffset, 1, ...generics); - fl = fnTypes.length; - // re-run the matching algorithm on this item - i = queryElemsOffset - 1; - } else { - if (fnType.id < 0) { - if (mgens === null) { - mgens = new Map(); - } else if (mgens.has(fnType.id) && - mgens.get(fnType.id) !== queryElem.id) { - continue; - } - mgens.set(fnType.id, queryElem.id); - } - fnTypes = fnTypesScratch.slice(); - fl = fnTypes.length; - const tmp = fnTypes[queryElemsOffset]; - fnTypes[queryElemsOffset] = fnTypes[fnTypesOffset]; - fnTypes[fnTypesOffset] = tmp; - // this is known as a good match; go to the next one - i = queryElemsOffset; - } - return true; + const flast = fl - 1; + const qlast = ql - 1; + const queryElem = queryElems[qlast]; + let queryElemsTmp = null; + for (let i = flast; i >= 0; i -= 1) { + const fnType = fnTypes[i]; + if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) { + continue; } - return false; - }; - for (i = 0; i !== ql; ++i) { - const queryElem = queryElems[i]; - /** - * list of potential function types that go with the current query element. - * @type Array - */ - const matchCandidates = []; - let fnTypesScratch = null; - let mgensScratch = null; - // don't try anything before `i`, because they've already been - // paired off with the other query elements - for (j = i; j !== fl; ++j) { - const fnType = fnTypes[j]; - if (unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) { - if (!fnTypesScratch) { - fnTypesScratch = fnTypes.slice(); + let mgensScratch; + if (fnType.id < 0) { + mgensScratch = new Map(mgens); + if (mgensScratch.has(fnType.id) + && mgensScratch.get(fnType.id) !== queryElem.id) { + continue; + } + mgensScratch.set(fnType.id, queryElem.id); + } else { + mgensScratch = mgens; + } + // fnTypes[i] is a potential match + // fnTypes[flast] is the last item in the list + // swap them, and drop the potential match from the list + // check if the remaining function types also match + fnTypes[i] = fnTypes[flast]; + fnTypes.length = flast; + if (!queryElemsTmp) { + queryElemsTmp = queryElems.slice(0, qlast); + } + const passesUnification = unifyFunctionTypes( + fnTypes, + queryElemsTmp, + whereClause, + mgensScratch, + mgensScratch => { + if (fnType.generics.length === 0 && queryElem.generics.length === 0) { + return !solutionCb || solutionCb(mgensScratch); } - unifyFunctionTypes( + return unifyFunctionTypes( fnType.generics, queryElem.generics, whereClause, - mgens, - mgensScratch => { - matchCandidates.push({ - fnTypesScratch, - mgensScratch, - queryElemsOffset: i, - fnTypesOffset: j, - unbox: false, - }); - return false; // "reject" all candidates to gather all of them - } + mgensScratch, + solutionCb ); } - if (unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) { - if (!fnTypesScratch) { - fnTypesScratch = fnTypes.slice(); - } - if (!mgensScratch && mgens !== null) { - mgensScratch = new Map(mgens); - } - backtracking.push({ - fnTypesScratch, - mgensScratch, - queryElemsOffset: i, - fnTypesOffset: j, - unbox: true, - }); - } + ); + if (passesUnification) { + return true; } - if (matchCandidates.length === 0) { - if (backtrack()) { + // backtrack + fnTypes[flast] = fnTypes[i]; + fnTypes[i] = fnType; + fnTypes.length = fl; + } + for (let i = flast; i >= 0; i -= 1) { + const fnType = fnTypes[i]; + if (!unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) { + continue; + } + let mgensScratch; + if (fnType.id < 0) { + mgensScratch = new Map(mgens); + if (mgensScratch.has(fnType.id) && mgensScratch.get(fnType.id) !== 0) { continue; - } else { - return false; } + mgensScratch.set(fnType.id, 0); + } else { + mgensScratch = mgens; } - // use the current candidate - const {fnTypesOffset: candidate, mgensScratch: mgensNew} = matchCandidates.pop(); - if (fnTypes[candidate].id < 0 && queryElems[i].id < 0) { - if (mgens === null) { - mgens = new Map(); - } - mgens.set(fnTypes[candidate].id, queryElems[i].id); - } - if (mgensNew !== null) { - if (mgens === null) { - mgens = mgensNew; - } else { - for (const [fid, qid] of mgensNew) { - mgens.set(fid, qid); - } - } - } - // `i` and `j` are paired off - // `queryElems[i]` is left in place - // `fnTypes[j]` is swapped with `fnTypes[i]` to pair them off - const tmp = fnTypes[candidate]; - fnTypes[candidate] = fnTypes[i]; - fnTypes[i] = tmp; - // write other candidates to backtracking queue - for (const otherCandidate of matchCandidates) { - backtracking.push(otherCandidate); - } - // If we're on the last item, check the solution with the callback - // backtrack if the callback says its unsuitable - while (i === (ql - 1) && solutionCb && !solutionCb(mgens)) { - if (!backtrack()) { - return false; - } + const generics = fnType.id < 0 ? + whereClause[(-fnType.id) - 1] : + fnType.generics; + const passesUnification = unifyFunctionTypes( + fnTypes.toSpliced(i, 1, ...generics), + queryElems, + whereClause, + mgensScratch, + solutionCb + ); + if (passesUnification) { + return true; } } - return true; + return false; } function unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens) { // type filters look like `trait:Read` or `enum:Result`