Rollup merge of #126057 - Sunshine40:rustdoc-search-non-english, r=notriddle

Make html rendered by rustdoc allow searching non-English identifier / alias

Fix alias search result showing `undefined` description.

Inspired by https://github.com/rust-lang/mdBook/issues/2393 .

Not sure if it's worth it adding full-text search functionality to rustdoc rendered html.
This commit is contained in:
León Orell Valerian Liehr 2024-06-08 04:25:45 +02:00 committed by GitHub
commit 1f715eb641
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 362 additions and 85 deletions

View File

@ -61,8 +61,8 @@ ENV SCRIPT python3 ../x.py check --stage 0 --set build.optimized-compiler-builti
/scripts/validate-toolstate.sh && \
/scripts/validate-error-codes.sh && \
reuse --include-submodules lint && \
# Runs checks to ensure that there are no ES5 issues in our JS code.
es-check es8 ../src/librustdoc/html/static/js/*.js && \
# Runs checks to ensure that there are no issues in our JS code.
es-check es2019 ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/librustdoc/html/static/.eslintrc.js ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/tools/rustdoc-js/.eslintrc.js ../src/tools/rustdoc-js/tester.js && \
eslint -c ../src/tools/rustdoc-gui/.eslintrc.js ../src/tools/rustdoc-gui/tester.js

View File

@ -5,7 +5,7 @@ module.exports = {
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 8,
"ecmaVersion": 2019,
"sourceType": "module"
},
"rules": {

View File

@ -41,8 +41,9 @@ let ParserState;
* foundElems: number,
* totalElems: number,
* literalSearch: boolean,
* corrections: Array<{from: string, to: integer}>,
* corrections: Array<{from: string, to: integer}> | null,
* typeFingerprint: Uint32Array,
* error: Array<string> | null,
* }}
*/
let ParsedQuery;

View File

@ -89,6 +89,10 @@ const ROOT_PATH = typeof window !== "undefined" ? window.rootPath : "../";
// of permutations we need to check.
const UNBOXING_LIMIT = 5;
// used for search query verification
const REGEX_IDENT = /\p{ID_Start}\p{ID_Continue}*|_\p{ID_Continue}+/uy;
const REGEX_INVALID_TYPE_FILTER = /[^a-z]/ui;
// In the search display, allows to switch between tabs.
function printTab(nb) {
let iter = 0;
@ -410,18 +414,21 @@ function initSearch(rawSearchIndex) {
}
/**
* Returns `true` if the given `c` character is valid for an ident.
* If the current parser position is at the beginning of an identifier,
* move the position to the end of it and return `true`. Otherwise, return `false`.
*
* @param {string} c
* @param {ParserState} parserState
*
* @return {boolean}
*/
function isIdentCharacter(c) {
return (
c === "_" ||
(c >= "0" && c <= "9") ||
(c >= "a" && c <= "z") ||
(c >= "A" && c <= "Z"));
function consumeIdent(parserState) {
REGEX_IDENT.lastIndex = parserState.pos;
const match = parserState.userQuery.match(REGEX_IDENT);
if (match) {
parserState.pos += match[0].length;
return true;
}
return false;
}
/**
@ -618,70 +625,62 @@ function initSearch(rawSearchIndex) {
* @return {integer}
*/
function getIdentEndPosition(parserState) {
const start = parserState.pos;
let afterIdent = consumeIdent(parserState);
let end = parserState.pos;
let foundExclamation = -1;
let macroExclamation = -1;
while (parserState.pos < parserState.length) {
const c = parserState.userQuery[parserState.pos];
if (!isIdentCharacter(c)) {
if (c === "!") {
if (foundExclamation !== -1) {
throw ["Cannot have more than one ", "!", " in an ident"];
} else if (parserState.pos + 1 < parserState.length &&
isIdentCharacter(parserState.userQuery[parserState.pos + 1])
) {
if (c === "!") {
if (macroExclamation !== -1) {
throw ["Cannot have more than one ", "!", " in an ident"];
} else if (parserState.pos + 1 < parserState.length) {
const pos = parserState.pos;
parserState.pos++;
const beforeIdent = consumeIdent(parserState);
parserState.pos = pos;
if (beforeIdent) {
throw ["Unexpected ", "!", ": it can only be at the end of an ident"];
}
foundExclamation = parserState.pos;
} else if (isPathSeparator(c)) {
if (c === ":") {
if (!isPathStart(parserState)) {
}
if (afterIdent) macroExclamation = parserState.pos;
} else if (isPathSeparator(c)) {
if (c === ":") {
if (!isPathStart(parserState)) {
break;
}
// Skip current ":".
parserState.pos += 1;
} else {
while (parserState.pos + 1 < parserState.length) {
const next_c = parserState.userQuery[parserState.pos + 1];
if (next_c !== " ") {
break;
}
// Skip current ":".
parserState.pos += 1;
} else {
while (parserState.pos + 1 < parserState.length) {
const next_c = parserState.userQuery[parserState.pos + 1];
if (next_c !== " ") {
break;
}
parserState.pos += 1;
}
}
if (foundExclamation !== -1) {
if (foundExclamation !== start &&
isIdentCharacter(parserState.userQuery[foundExclamation - 1])
) {
throw ["Cannot have associated items in macros"];
} else {
// while the never type has no associated macros, we still
// can parse a path like that
foundExclamation = -1;
}
}
} else if (
c === "[" ||
c === "(" ||
isEndCharacter(c) ||
isSpecialStartCharacter(c) ||
isSeparatorCharacter(c)
) {
break;
} else if (parserState.pos > 0) {
throw ["Unexpected ", c, " after ", parserState.userQuery[parserState.pos - 1]];
} else {
throw ["Unexpected ", c];
}
if (macroExclamation !== -1) {
throw ["Cannot have associated items in macros"];
}
} else if (
c === "[" ||
c === "(" ||
isEndCharacter(c) ||
isSpecialStartCharacter(c) ||
isSeparatorCharacter(c)
) {
break;
} else if (parserState.pos > 0) {
throw ["Unexpected ", c, " after ", parserState.userQuery[parserState.pos - 1],
" (not a valid identifier)"];
} else {
throw ["Unexpected ", c, " (not a valid identifier)"];
}
parserState.pos += 1;
afterIdent = consumeIdent(parserState);
end = parserState.pos;
}
// if start == end - 1, we got the never type
if (foundExclamation !== -1 &&
foundExclamation !== start &&
isIdentCharacter(parserState.userQuery[foundExclamation - 1])
) {
if (macroExclamation !== -1) {
if (parserState.typeFilter === null) {
parserState.typeFilter = "macro";
} else if (parserState.typeFilter !== "macro") {
@ -693,7 +692,7 @@ function initSearch(rawSearchIndex) {
" both specified",
];
}
end = foundExclamation;
end = macroExclamation;
}
return end;
}
@ -1071,16 +1070,15 @@ function initSearch(rawSearchIndex) {
function checkExtraTypeFilterCharacters(start, parserState) {
const query = parserState.userQuery.slice(start, parserState.pos).trim();
for (const c in query) {
if (!isIdentCharacter(query[c])) {
throw [
"Unexpected ",
query[c],
" in type filter (before ",
":",
")",
];
}
const match = query.match(REGEX_INVALID_TYPE_FILTER);
if (match) {
throw [
"Unexpected ",
match[0],
" in type filter (before ",
":",
")",
];
}
}
@ -2127,7 +2125,7 @@ function initSearch(rawSearchIndex) {
};
}
function handleAliases(ret, query, filterCrates, currentCrate) {
async function handleAliases(ret, query, filterCrates, currentCrate) {
const lowerQuery = query.toLowerCase();
// We separate aliases and crate aliases because we want to have current crate
// aliases to be before the others in the displayed results.
@ -2163,6 +2161,15 @@ function initSearch(rawSearchIndex) {
crateAliases.sort(sortFunc);
aliases.sort(sortFunc);
const fetchDesc = alias => {
return searchIndexEmptyDesc.get(alias.crate).contains(alias.bitIndex) ?
"" : searchState.loadDesc(alias);
};
const [crateDescs, descs] = await Promise.all([
Promise.all(crateAliases.map(fetchDesc)),
Promise.all(aliases.map(fetchDesc)),
]);
const pushFunc = alias => {
alias.alias = query;
const res = buildHrefAndPath(alias);
@ -2176,7 +2183,13 @@ function initSearch(rawSearchIndex) {
}
};
aliases.forEach((alias, i) => {
alias.desc = descs[i];
});
aliases.forEach(pushFunc);
crateAliases.forEach((alias, i) => {
alias.desc = crateDescs[i];
});
crateAliases.forEach(pushFunc);
}
@ -2538,7 +2551,8 @@ function initSearch(rawSearchIndex) {
sorted_returned,
sorted_others,
parsedQuery);
handleAliases(ret, parsedQuery.original.replace(/"/g, ""), filterCrates, currentCrate);
await handleAliases(ret, parsedQuery.original.replace(/"/g, ""),
filterCrates, currentCrate);
await Promise.all([ret.others, ret.returned, ret.in_args].map(async list => {
const descs = await Promise.all(list.map(result => {
return searchIndexEmptyDesc.get(result.crate).contains(result.bitIndex) ?

View File

@ -6,7 +6,7 @@ module.exports = {
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 2018,
"ecmaVersion": 2019,
"sourceType": "module"
},
"rules": {

View File

@ -6,7 +6,7 @@ module.exports = {
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 8,
"ecmaVersion": 2019,
"sourceType": "module"
},
"rules": {

View File

@ -24,7 +24,7 @@ const PARSED = [
original: "-> *",
returned: [],
userQuery: "-> *",
error: "Unexpected `*` after ` `",
error: "Unexpected `*` after ` ` (not a valid identifier)",
},
{
query: 'a<"P">',
@ -204,16 +204,25 @@ const PARSED = [
original: "_:",
returned: [],
userQuery: "_:",
error: "Unexpected `:` (expected path after type filter `_:`)",
error: "Unexpected `_` (not a valid identifier)",
},
{
query: "_:a",
query: "ab:",
elems: [],
foundElems: 0,
original: "_:a",
original: "ab:",
returned: [],
userQuery: "_:a",
error: "Unknown type filter `_`",
userQuery: "ab:",
error: "Unexpected `:` (expected path after type filter `ab:`)",
},
{
query: "a:b",
elems: [],
foundElems: 0,
original: "a:b",
returned: [],
userQuery: "a:b",
error: "Unknown type filter `a`",
},
{
query: "a-bb",
@ -240,7 +249,7 @@ const PARSED = [
original: "ab'",
returned: [],
userQuery: "ab'",
error: "Unexpected `'` after `b`",
error: "Unexpected `'` after `b` (not a valid identifier)",
},
{
query: "a->",

View File

@ -1,6 +1,6 @@
const EXPECTED = {
'query': 'Fo',
'others': [
{ 'path': 'basic', 'name': 'Foo' },
{ 'path': 'basic', 'name': 'Foo', 'desc': 'Docs for Foo' },
],
};

View File

@ -5,6 +5,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Struct',
'desc': 'Doc for <code>Struct</code>',
'alias': 'StructItem',
'href': '../doc_alias/struct.Struct.html',
'is_alias': true
@ -17,6 +18,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Struct',
'name': 'field',
'desc': 'Doc for <code>Struct</code>s <code>field</code>',
'alias': 'StructFieldItem',
'href': '../doc_alias/struct.Struct.html#structfield.field',
'is_alias': true
@ -29,6 +31,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Struct',
'name': 'method',
'desc': 'Doc for <code>Struct::method</code>',
'alias': 'StructMethodItem',
'href': '../doc_alias/struct.Struct.html#method.method',
'is_alias': true
@ -45,6 +48,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Struct',
'name': 'ImplConstItem',
'desc': 'Doc for <code>Struct::ImplConstItem</code>',
'alias': 'StructImplConstItem',
'href': '../doc_alias/struct.Struct.html#associatedconstant.ImplConstItem',
'is_alias': true
@ -57,6 +61,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Struct',
'name': 'function',
'desc': 'Doc for <code>Trait::function</code> implemented for Struct',
'alias': 'ImplTraitFunction',
'href': '../doc_alias/struct.Struct.html#method.function',
'is_alias': true
@ -69,6 +74,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Enum',
'desc': 'Doc for <code>Enum</code>',
'alias': 'EnumItem',
'href': '../doc_alias/enum.Enum.html',
'is_alias': true
@ -81,6 +87,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Enum',
'name': 'Variant',
'desc': 'Doc for <code>Enum::Variant</code>',
'alias': 'VariantItem',
'href': '../doc_alias/enum.Enum.html#variant.Variant',
'is_alias': true
@ -93,6 +100,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Enum',
'name': 'method',
'desc': 'Doc for <code>Enum::method</code>',
'alias': 'EnumMethodItem',
'href': '../doc_alias/enum.Enum.html#method.method',
'is_alias': true
@ -105,6 +113,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Typedef',
'desc': 'Doc for type alias <code>Typedef</code>',
'alias': 'TypedefItem',
'href': '../doc_alias/type.Typedef.html',
'is_alias': true
@ -117,6 +126,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Trait',
'desc': 'Doc for <code>Trait</code>',
'alias': 'TraitItem',
'href': '../doc_alias/trait.Trait.html',
'is_alias': true
@ -129,6 +139,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Trait',
'name': 'Target',
'desc': 'Doc for <code>Trait::Target</code>',
'alias': 'TraitTypeItem',
'href': '../doc_alias/trait.Trait.html#associatedtype.Target',
'is_alias': true
@ -141,6 +152,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Trait',
'name': 'AssociatedConst',
'desc': 'Doc for <code>Trait::AssociatedConst</code>',
'alias': 'AssociatedConstItem',
'href': '../doc_alias/trait.Trait.html#associatedconstant.AssociatedConst',
'is_alias': true
@ -153,6 +165,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Trait',
'name': 'function',
'desc': 'Doc for <code>Trait::function</code>',
'alias': 'TraitFunctionItem',
'href': '../doc_alias/trait.Trait.html#tymethod.function',
'is_alias': true
@ -165,6 +178,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'function',
'desc': 'Doc for <code>function</code>',
'alias': 'FunctionItem',
'href': '../doc_alias/fn.function.html',
'is_alias': true
@ -177,6 +191,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Module',
'desc': 'Doc for <code>Module</code>',
'alias': 'ModuleItem',
'href': '../doc_alias/Module/index.html',
'is_alias': true
@ -189,6 +204,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Const',
'desc': 'Doc for <code>Const</code>',
'alias': 'ConstItem',
'href': '../doc_alias/constant.Const.html',
'is_alias': true
@ -205,6 +221,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Static',
'desc': 'Doc for <code>Static</code>',
'alias': 'StaticItem',
'href': '../doc_alias/static.Static.html',
'is_alias': true
@ -217,6 +234,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Union',
'desc': 'Doc for <code>Union</code>',
'alias': 'UnionItem',
'href': '../doc_alias/union.Union.html',
'is_alias': true
@ -225,6 +243,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Union',
'name': 'union_item',
'desc': 'Doc for <code>Union::union_item</code>',
'href': '../doc_alias/union.Union.html#structfield.union_item'
},
],
@ -235,6 +254,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Union',
'name': 'union_item',
'desc': 'Doc for <code>Union::union_item</code>',
'alias': 'UnionFieldItem',
'href': '../doc_alias/union.Union.html#structfield.union_item',
'is_alias': true
@ -247,6 +267,7 @@ const EXPECTED = [
{
'path': 'doc_alias::Union',
'name': 'method',
'desc': 'Doc for <code>Union::method</code>',
'alias': 'UnionMethodItem',
'href': '../doc_alias/union.Union.html#method.method',
'is_alias': true
@ -259,6 +280,7 @@ const EXPECTED = [
{
'path': 'doc_alias',
'name': 'Macro',
'desc': 'Doc for <code>Macro</code>',
'alias': 'MacroItem',
'href': '../doc_alias/macro.Macro.html',
'is_alias': true

View File

@ -1,12 +1,16 @@
/// Doc for `Struct`
#[doc(alias = "StructItem")]
pub struct Struct {
/// Doc for `Struct`'s `field`
#[doc(alias = "StructFieldItem")]
pub field: u32,
}
impl Struct {
/// Doc for `Struct::ImplConstItem`
#[doc(alias = "StructImplConstItem")]
pub const ImplConstItem: i32 = 0;
/// Doc for `Struct::method`
#[doc(alias = "StructMethodItem")]
pub fn method(&self) {}
}
@ -15,61 +19,78 @@ impl Trait for Struct {
type Target = u32;
const AssociatedConst: i32 = 12;
/// Doc for `Trait::function` implemented for Struct
#[doc(alias = "ImplTraitFunction")]
fn function() -> Self::Target {
0
}
}
/// Doc for `Enum`
#[doc(alias = "EnumItem")]
pub enum Enum {
/// Doc for `Enum::Variant`
#[doc(alias = "VariantItem")]
Variant,
}
impl Enum {
/// Doc for `Enum::method`
#[doc(alias = "EnumMethodItem")]
pub fn method(&self) {}
}
/// Doc for type alias `Typedef`
#[doc(alias = "TypedefItem")]
pub type Typedef = i32;
/// Doc for `Trait`
#[doc(alias = "TraitItem")]
pub trait Trait {
/// Doc for `Trait::Target`
#[doc(alias = "TraitTypeItem")]
type Target;
/// Doc for `Trait::AssociatedConst`
#[doc(alias = "AssociatedConstItem")]
const AssociatedConst: i32;
/// Doc for `Trait::function`
#[doc(alias = "TraitFunctionItem")]
fn function() -> Self::Target;
}
/// Doc for `function`
#[doc(alias = "FunctionItem")]
pub fn function() {}
/// Doc for `Module`
#[doc(alias = "ModuleItem")]
pub mod Module {}
/// Doc for `Const`
#[doc(alias = "ConstItem")]
pub const Const: u32 = 0;
/// Doc for `Static`
#[doc(alias = "StaticItem")]
pub static Static: u32 = 0;
/// Doc for `Union`
#[doc(alias = "UnionItem")]
pub union Union {
/// Doc for `Union::union_item`
#[doc(alias = "UnionFieldItem")]
pub union_item: u32,
pub y: f32,
}
impl Union {
/// Doc for `Union::method`
#[doc(alias = "UnionMethodItem")]
pub fn method(&self) {}
}
/// Doc for `Macro`
#[doc(alias = "MacroItem")]
#[macro_export]
macro_rules! Macro {

View File

@ -0,0 +1,163 @@
const PARSED = [
{
query: '中文',
elems: [{
name: "中文",
fullPath: ["中文"],
pathWithoutLast: [],
pathLast: "中文",
generics: [],
typeFilter: -1,
}],
returned: [],
foundElems: 1,
original: "中文",
userQuery: "中文",
error: null,
},
{
query: '_0Mixed中英文',
elems: [{
name: "_0mixed中英文",
fullPath: ["_0mixed中英文"],
pathWithoutLast: [],
pathLast: "_0mixed中英文",
generics: [],
typeFilter: -1,
}],
foundElems: 1,
original: "_0Mixed中英文",
returned: [],
userQuery: "_0mixed中英文",
error: null,
},
{
query: 'my_crate::中文API',
elems: [{
name: "my_crate::中文api",
fullPath: ["my_crate", "中文api"],
pathWithoutLast: ["my_crate"],
pathLast: "中文api",
generics: [],
typeFilter: -1,
}],
foundElems: 1,
original: "my_crate::中文API",
returned: [],
userQuery: "my_crate::中文api",
error: null,
},
{
query: '类型A,类型B<约束C>->返回类型<关联类型=路径::约束D>',
elems: [{
name: "类型a",
fullPath: ["类型a"],
pathWithoutLast: [],
pathLast: "类型a",
generics: [],
}, {
name: "类型b",
fullPath: ["类型b"],
pathWithoutLast: [],
pathLast: "类型b",
generics: [{
name: "约束c",
fullPath: ["约束c"],
pathWithoutLast: [],
pathLast: "约束c",
generics: [],
}],
}],
foundElems: 3,
totalElems: 5,
literalSearch: true,
original: "类型A,类型B<约束C>->返回类型<关联类型=路径::约束D>",
returned: [{
name: "返回类型",
fullPath: ["返回类型"],
pathWithoutLast: [],
pathLast: "返回类型",
generics: [],
}],
userQuery: "类型a,类型b<约束c>->返回类型<关联类型=路径::约束d>",
error: null,
},
{
query: 'my_crate 中文宏!',
elems: [{
name: "my_crate 中文宏",
fullPath: ["my_crate", "中文宏"],
pathWithoutLast: ["my_crate"],
pathLast: "中文宏",
generics: [],
typeFilter: 16,
}],
foundElems: 1,
original: "my_crate 中文宏!",
returned: [],
userQuery: "my_crate 中文宏!",
error: null,
},
{
query: '非法符号——',
elems: [],
foundElems: 0,
original: "非法符号——",
returned: [],
userQuery: "非法符号——",
error: "Unexpected `—` after `号` (not a valid identifier)",
}
]
const EXPECTED = [
{
query: '加法',
others: [
{
name: "add",
path: "non_english_identifier",
is_alias: true,
alias: "加法",
href: "../non_english_identifier/macro.add.html"
},
{
name: "add",
path: "non_english_identifier",
is_alias: true,
alias: "加法",
href: "../non_english_identifier/fn.add.html"
},
{
name: "加法",
path: "non_english_identifier",
href: "../non_english_identifier/trait.加法.html",
desc: "Add"
},
{
name: "中文名称的加法宏",
path: "non_english_identifier",
href: "../non_english_identifier/macro.中文名称的加法宏.html",
},
{
name: "中文名称的加法API",
path: "non_english_identifier",
href: "../non_english_identifier/fn.中文名称的加法API.html",
}],
in_args: [{
name: "加上",
path: "non_english_identifier::加法",
href: "../non_english_identifier/trait.加法.html#tymethod.加上",
}],
returned: [],
},
{ // Extensive type-based search is still buggy, experimental & work-in-progress.
query: '可迭代->可选',
others: [{
name: "总计",
path: "non_english_identifier",
href: "../non_english_identifier/fn.总计.html",
desc: "“sum”"
}],
in_args: [],
returned: [],
},
];

View File

@ -0,0 +1,47 @@
#[doc(alias = "加法")]
pub fn add(left: usize, right: usize) -> usize {
left + right
}
pub fn API(left: usize, right: usize) -> usize {
left + right
}
#[macro_export]
macro_rules! {
($left:expr, $right:expr) => {
($left) + ($right)
};
}
#[doc(alias = "加法")]
#[macro_export]
macro_rules! add {
($left:expr, $right:expr) => {
($left) + ($right)
};
}
/// Add
pub trait <> {
type ;
fn (self, : ) -> Self::;
}
/// IntoIterator
pub trait {
type ;
type : Iterator<Item = Self::>;
fn (self) -> Self::;
}
pub type <> = Option<>;
/// "sum"
pub fn <, >(: ) -> <::>
where
: < = >,
: <, = >,
{
.().reduce(|, | .())
}