Auto merge of #92570 - jsha:rustdoc-search-refactor, r=GuillaumeGomez

Simplify rustdoc search test

Previously, rustdoc search attempted to parse search.js and extract out only certain methods and variables.

This change makes search.js and search-index.js loadable as [CommonJS modules](https://nodejs.org/api/modules.html#modules-commonjs-modules), so they can be loaded directly.

As part of that change, I had to separate execSearch from interacting with the DOM. This wound up being a nice cleanup that made more explicit what inputs it was taking.

I removed search.js' dependency on storage.js by moving hasOwnPropertyRustdoc directly into search.js, and replacing onEach with forEach in a path that is called by the tester.

r? `@GuillaumeGomez`

Demo: https://rustdoc.crud.net/jsha/rustdoc-search-refactor/std/?search=foo
This commit is contained in:
bors 2022-05-17 19:50:44 +00:00
commit 4c5f6e6277
4 changed files with 117 additions and 313 deletions

View File

@ -438,7 +438,13 @@ pub(super) fn write_shared(
write_crate("search-index.js", &|| {
let mut v = String::from("var searchIndex = JSON.parse('{\\\n");
v.push_str(&all_indexes.join(",\\\n"));
v.push_str("\\\n}');\nif (window.initSearch) {window.initSearch(searchIndex)};");
v.push_str(
r#"\
}');
if (typeof window !== 'undefined' && window.initSearch) {window.initSearch(searchIndex)};
if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex};
"#,
);
Ok(v.into_bytes())
})?;

View File

@ -1,5 +1,5 @@
/* global addClass, getNakedUrl, getSettingValue, hasOwnPropertyRustdoc, initSearch, onEach */
/* global onEachLazy, removeClass, searchState, browserSupportsHistoryApi */
/* global addClass, getNakedUrl, getSettingValue */
/* global onEachLazy, removeClass, searchState, browserSupportsHistoryApi, exports */
"use strict";
@ -38,6 +38,11 @@ const itemTypes = [
// used for special search precedence
const TY_PRIMITIVE = itemTypes.indexOf("primitive");
const TY_KEYWORD = itemTypes.indexOf("keyword");
const ROOT_PATH = typeof window !== "undefined" ? window.rootPath : "../";
function hasOwnPropertyRustdoc(obj, property) {
return Object.prototype.hasOwnProperty.call(obj, property);
}
// In the search display, allows to switch between tabs.
function printTab(nb) {
@ -106,7 +111,7 @@ function levenshtein(s1, s2) {
return s1_len + s2_len;
}
window.initSearch = rawSearchIndex => {
function initSearch(rawSearchIndex) {
const MAX_LEV_DISTANCE = 3;
const MAX_RESULTS = 200;
const GENERICS_DATA = 2;
@ -120,15 +125,6 @@ window.initSearch = rawSearchIndex => {
let searchIndex;
let currentResults;
const ALIASES = Object.create(null);
const params = searchState.getQueryStringParams();
// Populate search bar with query string search term when provided,
// but only if the input bar is empty. This avoid the obnoxious issue
// where you start trying to do a search, and the index loads, and
// suddenly your search is gone!
if (searchState.input.value === "") {
searchState.input.value = params.search || "";
}
function isWhitespace(c) {
return " \t\n\r".indexOf(c) !== -1;
@ -726,10 +722,11 @@ window.initSearch = rawSearchIndex => {
* @param {ParsedQuery} parsedQuery - The parsed user query
* @param {Object} searchWords - The list of search words to query against
* @param {Object} [filterCrates] - Crate to search in if defined
* @param {Object} [currentCrate] - Current crate, to rank results from this crate higher
*
* @return {ResultsTable}
*/
function execQuery(parsedQuery, searchWords, filterCrates) {
function execQuery(parsedQuery, searchWords, filterCrates, currentCrate) {
const results_others = {}, results_in_args = {}, results_returned = {};
function transformResults(results) {
@ -761,7 +758,7 @@ window.initSearch = rawSearchIndex => {
return out;
}
function sortResults(results, isType) {
function sortResults(results, isType, preferredCrate) {
const userQuery = parsedQuery.userQuery;
const ar = [];
for (const entry in results) {
@ -796,9 +793,9 @@ window.initSearch = rawSearchIndex => {
return a - b;
}
// sort by crate (non-current crate goes later)
a = (aaa.item.crate !== window.currentCrate);
b = (bbb.item.crate !== window.currentCrate);
// sort by crate (current crate comes first)
a = (aaa.item.crate !== preferredCrate);
b = (bbb.item.crate !== preferredCrate);
if (a !== b) {
return a - b;
}
@ -1178,7 +1175,7 @@ window.initSearch = rawSearchIndex => {
};
}
function handleAliases(ret, query, filterCrates) {
function handleAliases(ret, query, filterCrates, currentCrate) {
const lowerQuery = query.toLowerCase();
// We separate aliases and crate aliases because we want to have current crate
// aliases to be before the others in the displayed results.
@ -1194,7 +1191,7 @@ window.initSearch = rawSearchIndex => {
} else {
Object.keys(ALIASES).forEach(crate => {
if (ALIASES[crate][lowerQuery]) {
const pushTo = crate === window.currentCrate ? crateAliases : aliases;
const pushTo = crate === currentCrate ? crateAliases : aliases;
const query_aliases = ALIASES[crate][lowerQuery];
for (const alias of query_aliases) {
pushTo.push(createAliasFromItem(searchIndex[alias]));
@ -1226,8 +1223,9 @@ window.initSearch = rawSearchIndex => {
ret.others.pop();
}
};
onEach(aliases, pushFunc);
onEach(crateAliases, pushFunc);
aliases.forEach(pushFunc);
crateAliases.forEach(pushFunc);
}
/**
@ -1444,11 +1442,11 @@ window.initSearch = rawSearchIndex => {
}
const ret = createQueryResults(
sortResults(results_in_args, true),
sortResults(results_returned, true),
sortResults(results_others, false),
sortResults(results_in_args, true, currentCrate),
sortResults(results_returned, true, currentCrate),
sortResults(results_others, false, currentCrate),
parsedQuery);
handleAliases(ret, parsedQuery.original.replace(/"/g, ""), filterCrates);
handleAliases(ret, parsedQuery.original.replace(/"/g, ""), filterCrates, currentCrate);
if (parsedQuery.error !== null && ret.others.length !== 0) {
// It means some doc aliases were found so let's "remove" the error!
ret.query.error = null;
@ -1521,18 +1519,18 @@ window.initSearch = rawSearchIndex => {
if (type === "mod") {
displayPath = path + "::";
href = window.rootPath + path.replace(/::/g, "/") + "/" +
name + "/index.html";
href = ROOT_PATH + path.replace(/::/g, "/") + "/" +
name + "/index.html";
} else if (type === "import") {
displayPath = item.path + "::";
href = window.rootPath + item.path.replace(/::/g, "/") + "/index.html#reexport." + name;
href = ROOT_PATH + item.path.replace(/::/g, "/") + "/index.html#reexport." + name;
} else if (type === "primitive" || type === "keyword") {
displayPath = "";
href = window.rootPath + path.replace(/::/g, "/") +
"/" + type + "." + name + ".html";
href = ROOT_PATH + path.replace(/::/g, "/") +
"/" + type + "." + name + ".html";
} else if (type === "externcrate") {
displayPath = "";
href = window.rootPath + name + "/index.html";
href = ROOT_PATH + name + "/index.html";
} else if (item.parent !== undefined) {
const myparent = item.parent;
let anchor = "#" + type + "." + name;
@ -1555,14 +1553,14 @@ window.initSearch = rawSearchIndex => {
} else {
displayPath = path + "::" + myparent.name + "::";
}
href = window.rootPath + path.replace(/::/g, "/") +
"/" + pageType +
"." + pageName +
".html" + anchor;
href = ROOT_PATH + path.replace(/::/g, "/") +
"/" + pageType +
"." + pageName +
".html" + anchor;
} else {
displayPath = item.path + "::";
href = window.rootPath + item.path.replace(/::/g, "/") +
"/" + type + "." + name + ".html";
href = ROOT_PATH + item.path.replace(/::/g, "/") +
"/" + type + "." + name + ".html";
}
return [displayPath, href];
}
@ -1835,7 +1833,7 @@ window.initSearch = rawSearchIndex => {
}
showResults(
execQuery(query, searchWords, filterCrates),
execQuery(query, searchWords, filterCrates, window.currentCrate),
params.go_to_first,
filterCrates);
}
@ -2015,6 +2013,16 @@ window.initSearch = rawSearchIndex => {
}
function registerSearchEvents() {
const params = searchState.getQueryStringParams();
// Populate search bar with query string search term when provided,
// but only if the input bar is empty. This avoid the obnoxious issue
// where you start trying to do a search, and the index loads, and
// suddenly your search is gone!
if (searchState.input.value === "") {
searchState.input.value = params.search || "";
}
const searchAfter500ms = () => {
searchState.clearInputTimeout();
if (searchState.input.value.length === 0) {
@ -2167,20 +2175,32 @@ window.initSearch = rawSearchIndex => {
* @type {Array<string>}
*/
const searchWords = buildIndex(rawSearchIndex);
registerSearchEvents();
function runSearchIfNeeded() {
if (typeof window !== "undefined") {
registerSearchEvents();
// If there's a search term in the URL, execute the search now.
if (searchState.getQueryStringParams().search) {
if (window.searchState.getQueryStringParams().search) {
search();
}
}
runSearchIfNeeded();
};
if (window.searchIndex !== undefined) {
initSearch(window.searchIndex);
if (typeof exports !== "undefined") {
exports.initSearch = initSearch;
exports.execQuery = execQuery;
exports.parseQuery = parseQuery;
}
return searchWords;
}
if (typeof window !== "undefined") {
window.initSearch = initSearch;
if (window.searchIndex !== undefined) {
initSearch(window.searchIndex);
}
} else {
// Running in Node, not a browser. Run initSearch just to produce the
// exports.
initSearch({});
}
})();

View File

@ -100,11 +100,6 @@ function onEachLazy(lazyArray, func, reversed) {
reversed);
}
// eslint-disable-next-line no-unused-vars
function hasOwnPropertyRustdoc(obj, property) {
return Object.prototype.hasOwnProperty.call(obj, property);
}
function updateLocalStorage(name, value) {
try {
window.localStorage.setItem("rustdoc-" + name, value);

View File

@ -1,182 +1,6 @@
const fs = require('fs');
const path = require('path');
function getNextStep(content, pos, stop) {
while (pos < content.length && content[pos] !== stop &&
(content[pos] === ' ' || content[pos] === '\t' || content[pos] === '\n')) {
pos += 1;
}
if (pos >= content.length) {
return null;
}
if (content[pos] !== stop) {
return pos * -1;
}
return pos;
}
// Stupid function extractor based on indent. Doesn't support block
// comments. If someone puts a ' or an " in a block comment this
// will blow up. Template strings are not tested and might also be
// broken.
function extractFunction(content, functionName) {
var level = 0;
var splitter = "function " + functionName + "(";
var stop;
var pos, start;
while (true) {
start = content.indexOf(splitter);
if (start === -1) {
break;
}
pos = start;
while (pos < content.length && content[pos] !== ')') {
pos += 1;
}
if (pos >= content.length) {
break;
}
pos = getNextStep(content, pos + 1, '{');
if (pos === null) {
break;
} else if (pos < 0) {
content = content.slice(-pos);
continue;
}
while (pos < content.length) {
// Eat single-line comments
if (content[pos] === '/' && pos > 0 && content[pos - 1] === '/') {
do {
pos += 1;
} while (pos < content.length && content[pos] !== '\n');
// Eat multiline comment.
} else if (content[pos] === '*' && pos > 0 && content[pos - 1] === '/') {
do {
pos += 1;
} while (pos < content.length && content[pos] !== '/' && content[pos - 1] !== '*');
// Eat quoted strings
} else if ((content[pos] === '"' || content[pos] === "'" || content[pos] === "`") &&
(pos === 0 || content[pos - 1] !== '/')) {
stop = content[pos];
do {
if (content[pos] === '\\') {
pos += 1;
}
pos += 1;
} while (pos < content.length && content[pos] !== stop);
// Otherwise, check for block level.
} else if (content[pos] === '{') {
level += 1;
} else if (content[pos] === '}') {
level -= 1;
if (level === 0) {
return content.slice(start, pos + 1);
}
}
pos += 1;
}
content = content.slice(start + 1);
}
return null;
}
// Stupid function extractor for array.
function extractArrayVariable(content, arrayName, kind) {
if (typeof kind === "undefined") {
kind = "let ";
}
var splitter = kind + arrayName;
while (true) {
var start = content.indexOf(splitter);
if (start === -1) {
break;
}
var pos = getNextStep(content, start, '=');
if (pos === null) {
break;
} else if (pos < 0) {
content = content.slice(-pos);
continue;
}
pos = getNextStep(content, pos, '[');
if (pos === null) {
break;
} else if (pos < 0) {
content = content.slice(-pos);
continue;
}
while (pos < content.length) {
if (content[pos] === '"' || content[pos] === "'") {
var stop = content[pos];
do {
if (content[pos] === '\\') {
pos += 2;
} else {
pos += 1;
}
} while (pos < content.length &&
(content[pos] !== stop || content[pos - 1] === '\\'));
} else if (content[pos] === ']' &&
pos + 1 < content.length &&
content[pos + 1] === ';') {
return content.slice(start, pos + 2);
}
pos += 1;
}
content = content.slice(start + 1);
}
if (kind === "let ") {
return extractArrayVariable(content, arrayName, "const ");
}
return null;
}
// Stupid function extractor for variable.
function extractVariable(content, varName, kind) {
if (typeof kind === "undefined") {
kind = "let ";
}
var splitter = kind + varName;
while (true) {
var start = content.indexOf(splitter);
if (start === -1) {
break;
}
var pos = getNextStep(content, start, '=');
if (pos === null) {
break;
} else if (pos < 0) {
content = content.slice(-pos);
continue;
}
while (pos < content.length) {
if (content[pos] === '"' || content[pos] === "'") {
var stop = content[pos];
do {
if (content[pos] === '\\') {
pos += 2;
} else {
pos += 1;
}
} while (pos < content.length &&
(content[pos] !== stop || content[pos - 1] === '\\'));
} else if (content[pos] === ';' || content[pos] === ',') {
return content.slice(start, pos + 1);
}
pos += 1;
}
content = content.slice(start + 1);
}
if (kind === "let ") {
return extractVariable(content, varName, "const ");
}
return null;
}
function loadContent(content) {
var Module = module.constructor;
var m = new Module();
@ -194,20 +18,6 @@ function readFile(filePath) {
return fs.readFileSync(filePath, 'utf8');
}
function loadThings(thingsToLoad, kindOfLoad, funcToCall, fileContent) {
var content = '';
for (var i = 0; i < thingsToLoad.length; ++i) {
var tmp = funcToCall(fileContent, thingsToLoad[i]);
if (tmp === null) {
console.log('unable to find ' + kindOfLoad + ' "' + thingsToLoad[i] + '"');
process.exit(1);
}
content += tmp;
content += 'exports.' + thingsToLoad[i] + ' = ' + thingsToLoad[i] + ';';
}
return content;
}
function contentToDiffLine(key, value) {
return `"${key}": "${value}",`;
}
@ -264,46 +74,6 @@ function lookForEntry(entry, data) {
return null;
}
function loadSearchJsAndIndex(searchJs, searchIndex, storageJs, crate) {
if (searchIndex[searchIndex.length - 1].length === 0) {
searchIndex.pop();
}
searchIndex.pop();
var fullSearchIndex = searchIndex.join("\n") + '\nexports.rawSearchIndex = searchIndex;';
searchIndex = loadContent(fullSearchIndex);
var finalJS = "";
var arraysToLoad = ["itemTypes"];
var variablesToLoad = ["MAX_LEV_DISTANCE", "MAX_RESULTS", "NO_TYPE_FILTER",
"GENERICS_DATA", "NAME", "INPUTS_DATA", "OUTPUT_DATA",
"TY_PRIMITIVE", "TY_KEYWORD",
"levenshtein_row2"];
// execQuery first parameter is built in getQuery (which takes in the search input).
// execQuery last parameter is built in buildIndex.
// buildIndex requires the hashmap from search-index.
var functionsToLoad = ["buildHrefAndPath", "pathSplitter", "levenshtein", "validateResult",
"buildIndex", "execQuery", "parseQuery", "createQueryResults",
"isWhitespace", "isSpecialStartCharacter", "isStopCharacter",
"parseInput", "getItemsBefore", "getNextElem", "createQueryElement",
"isReturnArrow", "isPathStart", "getStringElem", "newParsedQuery",
"itemTypeFromName", "isEndCharacter", "isErrorCharacter",
"isIdentCharacter", "isSeparatorCharacter", "getIdentEndPosition",
"checkExtraTypeFilterCharacters", "isWhitespaceCharacter"];
const functions = ["hasOwnPropertyRustdoc", "onEach"];
ALIASES = {};
finalJS += 'window = { "currentCrate": "' + crate + '", rootPath: "../" };\n';
finalJS += loadThings(functions, 'function', extractFunction, storageJs);
finalJS += loadThings(arraysToLoad, 'array', extractArrayVariable, searchJs);
finalJS += loadThings(variablesToLoad, 'variable', extractVariable, searchJs);
finalJS += loadThings(functionsToLoad, 'function', extractFunction, searchJs);
var loaded = loadContent(finalJS);
var index = loaded.buildIndex(searchIndex.rawSearchIndex);
return [loaded, index];
}
// This function checks if `expected` has all the required fields needed for the checks.
function checkNeededFields(fullPath, expected, error_text, queryName, position) {
let fieldsToCheck;
@ -359,8 +129,7 @@ function valueCheck(fullPath, expected, result, error_text, queryName) {
'compared to EXPECTED');
}
} else if (expected !== null && typeof expected !== "undefined" &&
expected.constructor == Object)
{
expected.constructor == Object) {
for (const key in expected) {
if (!expected.hasOwnProperty(key)) {
continue;
@ -382,21 +151,20 @@ function valueCheck(fullPath, expected, result, error_text, queryName) {
}
}
function runParser(query, expected, loaded, loadedFile, queryName) {
function runParser(query, expected, parseQuery, queryName) {
var error_text = [];
checkNeededFields("", expected, error_text, queryName, null);
if (error_text.length === 0) {
valueCheck('', expected, loaded.parseQuery(query), error_text, queryName);
valueCheck('', expected, parseQuery(query), error_text, queryName);
}
return error_text;
}
function runSearch(query, expected, index, loaded, loadedFile, queryName) {
const filter_crate = loadedFile.FILTER_CRATE;
function runSearch(query, expected, doSearch, loadedFile, queryName) {
const ignore_order = loadedFile.ignore_order;
const exact_check = loadedFile.exact_check;
var results = loaded.execQuery(loaded.parseQuery(query), index, filter_crate);
var results = doSearch(query, loadedFile.FILTER_CRATE);
var error_text = [];
for (var key in expected) {
@ -488,7 +256,7 @@ function runCheck(loadedFile, key, callback) {
return 0;
}
function runChecks(testFile, loaded, index) {
function runChecks(testFile, doSearch, parseQuery) {
var checkExpected = false;
var checkParsed = false;
var testFileContent = readFile(testFile) + 'exports.QUERY = QUERY;';
@ -518,24 +286,40 @@ function runChecks(testFile, loaded, index) {
if (checkExpected) {
res += runCheck(loadedFile, "EXPECTED", (query, expected, text) => {
return runSearch(query, expected, index, loaded, loadedFile, text);
return runSearch(query, expected, doSearch, loadedFile, text);
});
}
if (checkParsed) {
res += runCheck(loadedFile, "PARSED", (query, expected, text) => {
return runParser(query, expected, loaded, loadedFile, text);
return runParser(query, expected, parseQuery, text);
});
}
return res;
}
function load_files(doc_folder, resource_suffix, crate) {
var searchJs = readFile(path.join(doc_folder, "search" + resource_suffix + ".js"));
var storageJs = readFile(path.join(doc_folder, "storage" + resource_suffix + ".js"));
var searchIndex = readFile(
path.join(doc_folder, "search-index" + resource_suffix + ".js")).split("\n");
/**
* Load searchNNN.js and search-indexNNN.js.
*
* @param {string} doc_folder - Path to a folder generated by running rustdoc
* @param {string} resource_suffix - Version number between filename and .js, e.g. "1.59.0"
* @returns {Object} - Object containing two keys: `doSearch`, which runs a search
* with the loaded index and returns a table of results; and `parseQuery`, which is the
* `parseQuery` function exported from the search module.
*/
function loadSearchJS(doc_folder, resource_suffix) {
const searchJs = path.join(doc_folder, "search" + resource_suffix + ".js");
const searchIndexJs = path.join(doc_folder, "search-index" + resource_suffix + ".js");
const searchIndex = require(searchIndexJs);
const searchModule = require(searchJs);
const searchWords = searchModule.initSearch(searchIndex.searchIndex);
return loadSearchJsAndIndex(searchJs, searchIndex, storageJs, crate);
return {
doSearch: function (queryStr, filterCrate, currentCrate) {
return searchModule.execQuery(searchModule.parseQuery(queryStr), searchWords,
filterCrate, currentCrate);
},
parseQuery: searchModule.parseQuery,
}
}
function showHelp() {
@ -598,35 +382,34 @@ function parseOptions(args) {
return null;
}
function checkFile(test_file, opts, loaded, index) {
const test_name = path.basename(test_file, ".js");
process.stdout.write('Checking "' + test_name + '" ... ');
return runChecks(test_file, loaded, index);
}
function main(argv) {
var opts = parseOptions(argv.slice(2));
if (opts === null) {
return 1;
}
var [loaded, index] = load_files(
let parseAndSearch = loadSearchJS(
opts["doc_folder"],
opts["resource_suffix"],
opts["crate_name"]);
opts["resource_suffix"]);
var errors = 0;
let doSearch = function (queryStr, filterCrate) {
return parseAndSearch.doSearch(queryStr, filterCrate, opts["crate_name"]);
};
if (opts["test_file"].length !== 0) {
opts["test_file"].forEach(function(file) {
errors += checkFile(file, opts, loaded, index);
opts["test_file"].forEach(function (file) {
process.stdout.write(`Testing ${file} ... `);
errors += runChecks(file, doSearch, parseAndSearch.parseQuery);
});
} else if (opts["test_folder"].length !== 0) {
fs.readdirSync(opts["test_folder"]).forEach(function(file) {
fs.readdirSync(opts["test_folder"]).forEach(function (file) {
if (!file.endsWith(".js")) {
return;
}
errors += checkFile(path.join(opts["test_folder"], file), opts, loaded, index);
process.stdout.write(`Testing ${file} ... `);
errors += runChecks(path.join(opts["test_folder"], file), doSearch,
parseAndSearch.parseQuery);
});
}
return errors > 0 ? 1 : 0;