Auto merge of - notriddle:notriddle/type-search-slice-array, r=GuillaumeGomez

rustdoc: search for slices and arrays by type with `[]`

This feature extends rustdoc to support the syntax that most users will naturally attempt to use to search for slices and arrays. Part of 

Function signature searches already support arrays and slices. The explicit name `primitive:slice<u8>` and `primitive:array<u8>` can be used to match a slice or array of bytes, while square brackets `[u8]` will match either one. Empty square brackets, `[]`, will match any slice regardless of what it contains.

Preview:

* [`option -> []`](https://notriddle.com/rustdoc-demo-html-3/search-slice-array/std/index.html?search=option%20-%3E%20%5B%5D)
* [`[u8] -> str`](https://notriddle.com/rustdoc-demo-html-3/search-slice-array/std/index.html?search=%5Bu8%5D%20-%3E%20str)
* [`Box<[u8]> -> str`](https://notriddle.com/rustdoc-demo-html-3/search-slice-array/std/index.html?search=Box%3C%5Bu8%5D%3E%20-%3E%20str)

Motivation:

When type-based search was first landed, it was directly described as "incomplete". Here's [a comment] from the discussion thread:

[a comment]: https://github.com/rust-lang/rust/pull/23289#issuecomment-79437386

> This is looking really great, nice work! I can think of a number of cases that aren't quite covered by this, but I feel like this is a great improvement regardless and it can always be iterated on so I'm fine landing with a few known cases where it may not work :)

Filling out the missing functionality is going to mean adding support for more of Rust's [type expression] syntax, such as slices (in this PR), tuples, references, raw pointers, function pointers, and generics.

[type expression]: https://doc.rust-lang.org/reference/types.html#type-expressions

There does seem to be demand for this sort of thing, such as [this Discord message](https://discord.com/channels/442252698964721669/443150878111694848/1042145740065099796) expressing regret at rustdoc not supporting tuples in search queries.
This commit is contained in:
bors 2023-06-11 14:48:58 +00:00
commit 81c02da94e
8 changed files with 503 additions and 83 deletions

@ -105,6 +105,11 @@ will match these queries:
But it *does not* match `Result<Vec, u8>` or `Result<u8<Vec>>`.
Function signature searches also support arrays and slices. The explicit name
`primitive:slice<u8>` and `primitive:array<u8>` can be used to match a slice
or array of bytes, while square brackets `[u8]` will match either one. Empty
square brackets, `[]`, will match any slice regardless of what it contains.
### Shortcuts
Pressing `S` while focused elsewhere on the page will move focus to the

@ -1077,6 +1077,10 @@ function preLoadCss(cssUrl) {
<code>-&gt; vec</code> or <code>String, enum:Cow -&gt; bool</code>)",
"You can look for items with an exact name by putting double quotes around \
your request: <code>\"string\"</code>",
"Look for functions that accept or return \
<a href=\"https://doc.rust-lang.org/std/primitive.slice.html\">slices</a> and \
<a href=\"https://doc.rust-lang.org/std/primitive.array.html\">arrays</a> by writing \
square brackets (e.g., <code>-&gt; [u8]</code> or <code>[] -&gt; Option</code>)",
"Look for items inside another one by searching for a path: <code>vec::Vec</code>",
].map(x => "<p>" + x + "</p>").join("");
const div_infos = document.createElement("div");

@ -208,6 +208,46 @@ function initSearch(rawSearchIndex) {
let typeNameIdMap;
const ALIASES = new Map();
/**
* Special type name IDs for searching by array.
*/
let typeNameIdOfArray;
/**
* Special type name IDs for searching by slice.
*/
let typeNameIdOfSlice;
/**
* Special type name IDs for searching by both array and slice (`[]` syntax).
*/
let typeNameIdOfArrayOrSlice;
/**
* Add an item to the type Name->ID map, or, if one already exists, use it.
* Returns the number. If name is "" or null, return -1 (pure generic).
*
* This is effectively string interning, so that function matching can be
* done more quickly. Two types with the same name but different item kinds
* get the same ID.
*
* @param {string} name
*
* @returns {integer}
*/
function buildTypeMapIndex(name) {
if (name === "" || name === null) {
return -1;
}
if (typeNameIdMap.has(name)) {
return typeNameIdMap.get(name);
} else {
const id = typeNameIdMap.size;
typeNameIdMap.set(name, id);
return id;
}
}
function isWhitespace(c) {
return " \t\n\r".indexOf(c) !== -1;
}
@ -217,7 +257,7 @@ function initSearch(rawSearchIndex) {
}
function isEndCharacter(c) {
return ",>-".indexOf(c) !== -1;
return ",>-]".indexOf(c) !== -1;
}
function isStopCharacter(c) {
@ -466,35 +506,64 @@ function initSearch(rawSearchIndex) {
let start = parserState.pos;
let end;
// We handle the strings on their own mostly to make code easier to follow.
if (parserState.userQuery[parserState.pos] === "\"") {
start += 1;
getStringElem(query, parserState, isInGenerics);
end = parserState.pos - 1;
} else {
end = getIdentEndPosition(parserState);
}
if (parserState.pos < parserState.length &&
parserState.userQuery[parserState.pos] === "<"
) {
if (start >= end) {
throw ["Found generics without a path"];
}
if (parserState.userQuery[parserState.pos] === "[") {
parserState.pos += 1;
getItemsBefore(query, parserState, generics, ">");
}
if (start >= end && generics.length === 0) {
return;
}
elems.push(
createQueryElement(
query,
parserState,
parserState.userQuery.slice(start, end),
getItemsBefore(query, parserState, generics, "]");
const typeFilter = parserState.typeFilter;
if (typeFilter !== null && typeFilter !== "primitive") {
throw [
"Invalid search type: primitive ",
"[]",
" and ",
typeFilter,
" both specified",
];
}
parserState.typeFilter = null;
parserState.totalElems += 1;
if (isInGenerics) {
parserState.genericsElems += 1;
}
elems.push({
name: "[]",
id: -1,
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics,
isInGenerics
)
);
typeFilter: "primitive",
});
} else {
// We handle the strings on their own mostly to make code easier to follow.
if (parserState.userQuery[parserState.pos] === "\"") {
start += 1;
getStringElem(query, parserState, isInGenerics);
end = parserState.pos - 1;
} else {
end = getIdentEndPosition(parserState);
}
if (parserState.pos < parserState.length &&
parserState.userQuery[parserState.pos] === "<"
) {
if (start >= end) {
throw ["Found generics without a path"];
}
parserState.pos += 1;
getItemsBefore(query, parserState, generics, ">");
}
if (start >= end && generics.length === 0) {
return;
}
elems.push(
createQueryElement(
query,
parserState,
parserState.userQuery.slice(start, end),
generics,
isInGenerics
)
);
}
}
/**
@ -518,6 +587,17 @@ function initSearch(rawSearchIndex) {
const oldTypeFilter = parserState.typeFilter;
parserState.typeFilter = null;
let extra = "";
if (endChar === ">") {
extra = "<";
} else if (endChar === "]") {
extra = "[";
} else if (endChar === "") {
extra = "->";
} else {
extra = endChar;
}
while (parserState.pos < parserState.length) {
const c = parserState.userQuery[parserState.pos];
if (c === endChar) {
@ -547,14 +627,6 @@ function initSearch(rawSearchIndex) {
foundStopChar = true;
continue;
} else if (isEndCharacter(c)) {
let extra = "";
if (endChar === ">") {
extra = "<";
} else if (endChar === "") {
extra = "->";
} else {
extra = endChar;
}
throw ["Unexpected ", c, " after ", extra];
}
if (!foundStopChar) {
@ -581,9 +653,9 @@ function initSearch(rawSearchIndex) {
}
const posBefore = parserState.pos;
start = parserState.pos;
getNextElem(query, parserState, elems, endChar === ">");
getNextElem(query, parserState, elems, endChar !== "");
if (endChar !== "" && parserState.pos >= parserState.length) {
throw ["Unclosed ", "<"];
throw ["Unclosed ", extra];
}
// This case can be encountered if `getNextElem` encountered a "stop character" right
// from the start. For example if you have `,,` or `<>`. In this case, we simply move up
@ -594,7 +666,7 @@ function initSearch(rawSearchIndex) {
foundStopChar = false;
}
if (parserState.pos >= parserState.length && endChar !== "") {
throw ["Unclosed ", "<"];
throw ["Unclosed ", extra];
}
// We are either at the end of the string or on the `endChar` character, let's move forward
// in any case.
@ -779,7 +851,8 @@ function initSearch(rawSearchIndex) {
*
* ident = *(ALPHA / DIGIT / "_")
* path = ident *(DOUBLE-COLON ident) [!]
* arg = [type-filter *WS COLON *WS] path [generics]
* slice = OPEN-SQUARE-BRACKET [ nonempty-arg-list ] CLOSE-SQUARE-BRACKET
* arg = [type-filter *WS COLON *WS] (path [generics] / slice)
* type-sep = COMMA/WS *(COMMA/WS)
* nonempty-arg-list = *(type-sep) arg *(type-sep arg) *(type-sep)
* generics = OPEN-ANGLE-BRACKET [ nonempty-arg-list ] *(type-sep)
@ -821,6 +894,8 @@ function initSearch(rawSearchIndex) {
*
* OPEN-ANGLE-BRACKET = "<"
* CLOSE-ANGLE-BRACKET = ">"
* OPEN-SQUARE-BRACKET = "["
* CLOSE-SQUARE-BRACKET = "]"
* COLON = ":"
* DOUBLE-COLON = "::"
* QUOTE = %x22
@ -1170,7 +1245,22 @@ function initSearch(rawSearchIndex) {
// ones with no type filter, which can match any entry regardless of its
// own type.
for (const generic of elem.generics) {
if (generic.typeFilter !== -1 && !handleGeneric(generic)) {
if (generic.typeFilter === TY_PRIMITIVE &&
generic.id === typeNameIdOfArrayOrSlice) {
const genericArray = {
id: typeNameIdOfArray,
typeFilter: TY_PRIMITIVE,
generics: generic.generics,
};
const genericSlice = {
id: typeNameIdOfSlice,
typeFilter: TY_PRIMITIVE,
generics: generic.generics,
};
if (!handleGeneric(genericArray) && !handleGeneric(genericSlice)) {
return false;
}
} else if (generic.typeFilter !== -1 && !handleGeneric(generic)) {
return false;
}
}
@ -1217,7 +1307,12 @@ function initSearch(rawSearchIndex) {
return row.generics.length > 0 ? checkIfInGenerics(row, elem) : false;
}
if (row.id === elem.id && typePassesFilter(elem.typeFilter, row.ty)) {
const matchesExact = row.id === elem.id;
const matchesArrayOrSlice = elem.id === typeNameIdOfArrayOrSlice &&
(row.id === typeNameIdOfSlice || row.id === typeNameIdOfArray);
if ((matchesExact || matchesArrayOrSlice) &&
typePassesFilter(elem.typeFilter, row.ty)) {
if (elem.generics.length > 0) {
return checkGenerics(row, elem);
}
@ -2082,34 +2177,6 @@ function initSearch(rawSearchIndex) {
filterCrates);
}
/**
* Add an item to the type Name->ID map, or, if one already exists, use it.
* Returns the number. If name is "" or null, return -1 (pure generic).
*
* This is effectively string interning, so that function matching can be
* done more quickly. Two types with the same name but different item kinds
* get the same ID.
*
* @param {Map<string, integer>} typeNameIdMap
* @param {string} name
*
* @returns {integer}
*/
function buildTypeMapIndex(typeNameIdMap, name) {
if (name === "" || name === null) {
return -1;
}
if (typeNameIdMap.has(name)) {
return typeNameIdMap.get(name);
} else {
const id = typeNameIdMap.size;
typeNameIdMap.set(name, id);
return id;
}
}
/**
* Convert a list of RawFunctionType / ID to object-based FunctionType.
*
@ -2128,7 +2195,7 @@ function initSearch(rawSearchIndex) {
*
* @return {Array<FunctionSearchType>}
*/
function buildItemSearchTypeAll(types, lowercasePaths, typeNameIdMap) {
function buildItemSearchTypeAll(types, lowercasePaths) {
const PATH_INDEX_DATA = 0;
const GENERICS_DATA = 1;
return types.map(type => {
@ -2140,15 +2207,14 @@ function initSearch(rawSearchIndex) {
pathIndex = type[PATH_INDEX_DATA];
generics = buildItemSearchTypeAll(
type[GENERICS_DATA],
lowercasePaths,
typeNameIdMap
lowercasePaths
);
}
return {
// `0` is used as a sentinel because it's fewer bytes than `null`
id: pathIndex === 0
? -1
: buildTypeMapIndex(typeNameIdMap, lowercasePaths[pathIndex - 1].name),
: buildTypeMapIndex(lowercasePaths[pathIndex - 1].name),
ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
generics: generics,
};
@ -2171,7 +2237,7 @@ function initSearch(rawSearchIndex) {
*
* @return {null|FunctionSearchType}
*/
function buildFunctionSearchType(functionSearchType, lowercasePaths, typeNameIdMap) {
function buildFunctionSearchType(functionSearchType, lowercasePaths) {
const INPUTS_DATA = 0;
const OUTPUT_DATA = 1;
// `0` is used as a sentinel because it's fewer bytes than `null`
@ -2184,15 +2250,14 @@ function initSearch(rawSearchIndex) {
inputs = [{
id: pathIndex === 0
? -1
: buildTypeMapIndex(typeNameIdMap, lowercasePaths[pathIndex - 1].name),
: buildTypeMapIndex(lowercasePaths[pathIndex - 1].name),
ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
generics: [],
}];
} else {
inputs = buildItemSearchTypeAll(
functionSearchType[INPUTS_DATA],
lowercasePaths,
typeNameIdMap
lowercasePaths
);
}
if (functionSearchType.length > 1) {
@ -2201,15 +2266,14 @@ function initSearch(rawSearchIndex) {
output = [{
id: pathIndex === 0
? -1
: buildTypeMapIndex(typeNameIdMap, lowercasePaths[pathIndex - 1].name),
: buildTypeMapIndex(lowercasePaths[pathIndex - 1].name),
ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
generics: [],
}];
} else {
output = buildItemSearchTypeAll(
functionSearchType[OUTPUT_DATA],
lowercasePaths,
typeNameIdMap
lowercasePaths
);
}
} else {
@ -2233,6 +2297,12 @@ function initSearch(rawSearchIndex) {
let currentIndex = 0;
let id = 0;
// Initialize type map indexes for primitive list types
// that can be searched using `[]` syntax.
typeNameIdOfArray = buildTypeMapIndex("array");
typeNameIdOfSlice = buildTypeMapIndex("slice");
typeNameIdOfArrayOrSlice = buildTypeMapIndex("[]");
for (const crate in rawSearchIndex) {
if (!hasOwnPropertyRustdoc(rawSearchIndex, crate)) {
continue;
@ -2363,8 +2433,7 @@ function initSearch(rawSearchIndex) {
parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined,
type: buildFunctionSearchType(
itemFunctionSearchTypes[i],
lowercasePaths,
typeNameIdMap
lowercasePaths
),
id: id,
normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""),

@ -12,4 +12,11 @@ const EXPECTED = [
{ 'path': 'std::option::Option', 'name': 'get_or_insert_default' },
],
},
{
'query': 'option -> []',
'others': [
{ 'path': 'std::option::Option', 'name': 'as_slice' },
{ 'path': 'std::option::Option', 'name': 'as_mut_slice' },
],
},
];

@ -0,0 +1,9 @@
// exact-match
// https://github.com/rust-lang/rust/issues/60485#issuecomment-663900624
const EXPECTED = {
'query': 'OsString -> String',
'others': [
{ 'path': 'std::ffi::OsString', 'name': 'into_string' },
]
};

@ -0,0 +1,305 @@
const PARSED = [
{
query: '[[[D, []]]',
elems: [],
foundElems: 0,
original: '[[[D, []]]',
returned: [],
userQuery: '[[[d, []]]',
error: 'Unclosed `[`',
},
{
query: '[[[D, []]]]',
elems: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [
{
name: "d",
fullPath: ["d"],
pathWithoutLast: [],
pathLast: "d",
generics: [],
typeFilter: -1,
},
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [],
typeFilter: 15,
},
],
typeFilter: 15,
},
],
typeFilter: 15,
},
],
typeFilter: 15,
},
],
foundElems: 1,
original: '[[[D, []]]]',
returned: [],
userQuery: '[[[d, []]]]',
error: null,
},
{
query: '[] u8',
elems: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [],
typeFilter: 15,
},
{
name: "u8",
fullPath: ["u8"],
pathWithoutLast: [],
pathLast: "u8",
generics: [],
typeFilter: -1,
},
],
foundElems: 2,
original: "[] u8",
returned: [],
userQuery: "[] u8",
error: null,
},
{
query: '[u8]',
elems: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [
{
name: "u8",
fullPath: ["u8"],
pathWithoutLast: [],
pathLast: "u8",
generics: [],
typeFilter: -1,
},
],
typeFilter: 15,
},
],
foundElems: 1,
original: "[u8]",
returned: [],
userQuery: "[u8]",
error: null,
},
{
query: '[u8,u8]',
elems: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [
{
name: "u8",
fullPath: ["u8"],
pathWithoutLast: [],
pathLast: "u8",
generics: [],
typeFilter: -1,
},
{
name: "u8",
fullPath: ["u8"],
pathWithoutLast: [],
pathLast: "u8",
generics: [],
typeFilter: -1,
},
],
typeFilter: 15,
},
],
foundElems: 1,
original: "[u8,u8]",
returned: [],
userQuery: "[u8,u8]",
error: null,
},
{
query: '[u8<u8>]',
elems: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [
{
name: "u8",
fullPath: ["u8"],
pathWithoutLast: [],
pathLast: "u8",
generics: [
{
name: "u8",
fullPath: ["u8"],
pathWithoutLast: [],
pathLast: "u8",
generics: [],
typeFilter: -1,
},
],
typeFilter: -1,
},
],
typeFilter: 15,
},
],
foundElems: 1,
original: "[u8<u8>]",
returned: [],
userQuery: "[u8<u8>]",
error: null,
},
{
query: '[]',
elems: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [],
typeFilter: 15,
},
],
foundElems: 1,
original: "[]",
returned: [],
userQuery: "[]",
error: null,
},
{
query: '[>',
elems: [],
foundElems: 0,
original: "[>",
returned: [],
userQuery: "[>",
error: "Unexpected `>` after `[`",
},
{
query: '[<',
elems: [],
foundElems: 0,
original: "[<",
returned: [],
userQuery: "[<",
error: "Found generics without a path",
},
{
query: '[a>',
elems: [],
foundElems: 0,
original: "[a>",
returned: [],
userQuery: "[a>",
error: "Unexpected `>` after `[`",
},
{
query: '[a<',
elems: [],
foundElems: 0,
original: "[a<",
returned: [],
userQuery: "[a<",
error: "Unclosed `<`",
},
{
query: '[a',
elems: [],
foundElems: 0,
original: "[a",
returned: [],
userQuery: "[a",
error: "Unclosed `[`",
},
{
query: '[',
elems: [],
foundElems: 0,
original: "[",
returned: [],
userQuery: "[",
error: "Unclosed `[`",
},
{
query: ']',
elems: [],
foundElems: 0,
original: "]",
returned: [],
userQuery: "]",
error: "Unexpected `]`",
},
{
query: 'primitive:[u8]',
elems: [
{
name: "[]",
fullPath: ["[]"],
pathWithoutLast: [],
pathLast: "[]",
generics: [
{
name: "u8",
fullPath: ["u8"],
pathWithoutLast: [],
pathLast: "u8",
generics: [],
typeFilter: -1,
},
],
typeFilter: 15,
},
],
foundElems: 1,
original: "primitive:[u8]",
returned: [],
userQuery: "primitive:[u8]",
error: null,
},
{
query: 'macro:[u8]',
elems: [],
foundElems: 0,
original: "macro:[u8]",
returned: [],
userQuery: "macro:[u8]",
error: "Invalid search type: primitive `[]` and `macro` both specified",
},
];

@ -51,4 +51,23 @@ const EXPECTED = [
{ 'path': 'slice_array', 'name': 'gamma' },
],
},
{
'query': '[TraitCat]',
'in_args': [
{ 'path': 'slice_array', 'name': 'gamma' },
{ 'path': 'slice_array', 'name': 'epsilon' },
],
},
{
'query': 'R<[Q]>',
'returned': [
{ 'path': 'slice_array', 'name': 'bet' },
],
},
{
'query': 'R<[P]>',
'in_args': [
{ 'path': 'slice_array', 'name': 'alpha' },
],
},
];

@ -14,3 +14,5 @@ pub trait TraitCat {}
pub trait TraitDog {}
pub fn gamma<T: TraitCat + TraitDog>(t: [T; 32]) {}
pub fn epsilon<T: TraitCat + TraitDog>(t: &[T]) {}